From f821e80213e38e93f96255b3deacb737a600ed40 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sun, 5 Aug 2018 18:34:22 +0200 Subject: gm107/ir: use scalar tex instructions where possible TEXS, TLD4 and TLD4S are variants of tex instructions which are more scalar, which gives RA more freedom and is less likely to insert silly MOVs to satisfy quad registers. shader-db changes: total instructions in shared programs : 7687265 -> 7614782 (-0.94%) total gprs used in shared programs : 803620 -> 798045 (-0.69%) total shared used in shared programs : 639636 -> 639636 (0.00%) total local used in shared programs : 24648 -> 24648 (0.00%) total bytes used in shared programs : 82103400 -> 81330696 (-0.94%) local shared gpr inst bytes helped 0 0 3648 10647 10647 hurt 0 0 464 205 205 Reviewed-by: Ilia Mirkin --- .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 158 +++++++++++++++++++- src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 162 +++++++++++++++++++++ 2 files changed, 317 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index b2e22195ec5..241061ab837 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -192,6 +192,7 @@ private: void emitTEXs(int); void emitTEX(); + void emitTEXS(); void emitTLD(); void emitTLD4(); void emitTXD(); @@ -2718,6 +2719,103 @@ CodeEmitterGM107::emitTEXs(int pos) emitGPR(pos); } +static uint8_t +getTEXSMask(uint8_t mask) +{ + switch (mask) { + case 0x1: return 0x0; + case 0x2: return 0x1; + case 0x3: return 0x4; + case 0x4: return 0x2; + case 0x7: return 0x0; + case 0x8: return 0x3; + case 0x9: return 0x5; + case 0xa: return 0x6; + case 0xb: return 0x1; + case 0xc: return 0x7; + case 0xd: return 0x2; + case 0xe: return 0x3; + case 0xf: return 0x4; + default: + assert(!"invalid mask"); + } +} + +static uint8_t +getTEXSTarget(const TexInstruction *tex) +{ + assert(tex->op == OP_TEX || tex->op == OP_TXL); + + switch (tex->tex.target.getEnum()) { + case TEX_TARGET_1D: + assert(tex->tex.levelZero); + return 0x0; + case TEX_TARGET_2D: + case TEX_TARGET_RECT: + if (tex->tex.levelZero) + return 0x2; + if (tex->op == OP_TXL) + return 0x3; + return 0x1; + case TEX_TARGET_2D_SHADOW: + case TEX_TARGET_RECT_SHADOW: + if (tex->tex.levelZero) + return 0x6; + if (tex->op == OP_TXL) + return 0x5; + return 0x4; + case TEX_TARGET_2D_ARRAY: + if (tex->tex.levelZero) + return 0x8; + return 0x7; + case TEX_TARGET_2D_ARRAY_SHADOW: + assert(tex->tex.levelZero); + return 0x9; + case TEX_TARGET_3D: + if (tex->tex.levelZero) + return 0xb; + assert(tex->op != OP_TXL); + return 0xa; + case TEX_TARGET_CUBE: + assert(!tex->tex.levelZero); + if (tex->op == OP_TXL) + return 0xd; + return 0xc; + default: + assert(false); + return 0x0; + } +} + +static uint8_t +getTLDSTarget(const TexInstruction *tex) +{ + switch (tex->tex.target.getEnum()) { + case TEX_TARGET_1D: + if (tex->tex.levelZero) + return 0x0; + return 0x1; + case TEX_TARGET_2D: + case TEX_TARGET_RECT: + if (tex->tex.levelZero) + return tex->tex.useOffsets ? 0x4 : 0x2; + return tex->tex.useOffsets ? 0xc : 0x5; + case TEX_TARGET_2D_MS: + assert(tex->tex.levelZero); + return 0x6; + case TEX_TARGET_3D: + assert(tex->tex.levelZero); + return 0x7; + case TEX_TARGET_2D_ARRAY: + assert(tex->tex.levelZero); + return 0x8; + + default: + assert(false); + return 0x0; + } +} + void CodeEmitterGM107::emitTEX() { @@ -2760,6 +2858,49 @@ CodeEmitterGM107::emitTEX() emitGPR (0x00, insn->def(0)); } +void +CodeEmitterGM107::emitTEXS() +{ + const TexInstruction *insn = this->insn->asTex(); + + switch (insn->op) { + case OP_TEX: + case OP_TXL: + emitInsn (0xd8000000); + emitField(0x35, 4, getTEXSTarget(insn)); + emitField(0x32, 3, getTEXSMask(insn->tex.mask)); + break; + case OP_TXF: + emitInsn (0xda000000); + emitField(0x35, 4, getTLDSTarget(insn)); + emitField(0x32, 3, getTEXSMask(insn->tex.mask)); + break; + case OP_TXG: + assert(insn->tex.useOffsets != 4); + emitInsn (0xdf000000); + emitField(0x34, 2, insn->tex.gatherComp); + emitField(0x33, 1, insn->tex.useOffsets == 1); + emitField(0x32, 1, insn->tex.target.isShadow()); + break; + default: + unreachable("unknown op in emitTEXS()"); + break; + } + + emitField(0x31, 1, insn->tex.liveOnly); + emitField(0x24, 13, insn->tex.r); + if (insn->defExists(1)) + emitGPR(0x1c, insn->def(1)); + else + emitGPR(0x1c); + if (insn->srcExists(1)) + emitGPR(0x14, insn->getSrc(1)); + else + emitGPR(0x14); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + void CodeEmitterGM107::emitTLD() { @@ -3474,15 +3615,26 @@ CodeEmitterGM107::emitInstruction(Instruction *i) emitPIXLD(); break; case OP_TEX: - case OP_TXB: case OP_TXL: + if (insn->asTex()->tex.scalar) + emitTEXS(); + else + emitTEX(); + break; + case OP_TXB: emitTEX(); break; case OP_TXF: - emitTLD(); + if (insn->asTex()->tex.scalar) + emitTEXS(); + else + emitTLD(); break; case OP_TXG: - emitTLD4(); + if (insn->asTex()->tex.scalar) + emitTEXS(); + else + emitTLD4(); break; case OP_TXD: emitTXD(); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 0059ecaeb6c..87a39de99d6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -275,6 +275,9 @@ private: void texConstraintNVE0(TexInstruction *); void texConstraintGM107(TexInstruction *); + bool isScalarTexGM107(TexInstruction *); + void handleScalarTexGM107(TexInstruction *); + std::list constrList; const Target *targ; @@ -2119,6 +2122,158 @@ RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn, constrList.push_back(merge); } +bool +RegAlloc::InsertConstraintsPass::isScalarTexGM107(TexInstruction *tex) +{ + if (tex->tex.sIndirectSrc >= 0 || + tex->tex.rIndirectSrc >= 0) + return false; + + if (tex->tex.mask == 5 || tex->tex.mask == 6) + return false; + + switch (tex->op) { + case OP_TEX: + case OP_TXF: + case OP_TXG: + case OP_TXL: + break; + default: + return false; + } + + // legal variants: + // TEXS.1D.LZ + // TEXS.2D + // TEXS.2D.LZ + // TEXS.2D.LL + // TEXS.2D.DC + // TEXS.2D.LL.DC + // TEXS.2D.LZ.DC + // TEXS.A2D + // TEXS.A2D.LZ + // TEXS.A2D.LZ.DC + // TEXS.3D + // TEXS.3D.LZ + // TEXS.CUBE + // TEXS.CUBE.LL + + // TLDS.1D.LZ + // TLDS.1D.LL + // TLDS.2D.LZ + // TLSD.2D.LZ.AOFFI + // TLDS.2D.LZ.MZ + // TLDS.2D.LL + // TLDS.2D.LL.AOFFI + // TLDS.A2D.LZ + // TLDS.3D.LZ + + // TLD4S: all 2D/RECT variants and only offset + + switch (tex->op) { + case OP_TEX: + if (tex->tex.useOffsets) + return false; + + switch (tex->tex.target.getEnum()) { + case TEX_TARGET_1D: + case TEX_TARGET_2D_ARRAY_SHADOW: + return tex->tex.levelZero; + case TEX_TARGET_CUBE: + return !tex->tex.levelZero; + case TEX_TARGET_2D: + case TEX_TARGET_2D_ARRAY: + case TEX_TARGET_2D_SHADOW: + case TEX_TARGET_3D: + case TEX_TARGET_RECT: + case TEX_TARGET_RECT_SHADOW: + return true; + default: + return false; + } + + case OP_TXL: + if (tex->tex.useOffsets) + return false; + + switch (tex->tex.target.getEnum()) { + case TEX_TARGET_2D: + case TEX_TARGET_2D_SHADOW: + case TEX_TARGET_RECT: + case TEX_TARGET_RECT_SHADOW: + case TEX_TARGET_CUBE: + return true; + default: + return false; + } + + case OP_TXF: + switch (tex->tex.target.getEnum()) { + case TEX_TARGET_1D: + return !tex->tex.useOffsets; + case TEX_TARGET_2D: + case TEX_TARGET_RECT: + return true; + case TEX_TARGET_2D_ARRAY: + case TEX_TARGET_2D_MS: + case TEX_TARGET_3D: + return !tex->tex.useOffsets && tex->tex.levelZero; + default: + return false; + } + + case OP_TXG: + if (tex->tex.useOffsets > 1) + return false; + if (tex->tex.mask != 0x3 && tex->tex.mask != 0xf) + return false; + + switch (tex->tex.target.getEnum()) { + case TEX_TARGET_2D: + case TEX_TARGET_2D_MS: + case TEX_TARGET_2D_SHADOW: + case TEX_TARGET_RECT: + case TEX_TARGET_RECT_SHADOW: + return true; + default: + return false; + } + + default: + return false; + } +} + +void +RegAlloc::InsertConstraintsPass::handleScalarTexGM107(TexInstruction *tex) +{ + int defCount = tex->defCount(0xff); + int srcCount = tex->srcCount(0xff); + + tex->tex.scalar = true; + + // 1. handle defs + if (defCount > 3) + condenseDefs(tex, 2, 3); + if (defCount > 1) + condenseDefs(tex, 0, 1); + + // 2. handle srcs + // special case for TXF.A2D + if (tex->op == OP_TXF && tex->tex.target == TEX_TARGET_2D_ARRAY) { + assert(srcCount >= 3); + condenseSrcs(tex, 1, 2); + } else { + if (srcCount > 3) + condenseSrcs(tex, 2, 3); + // only if we have more than 2 sources + if (srcCount > 2) + condenseSrcs(tex, 0, 1); + } + + assert(!tex->defExists(2) && !tex->srcExists(2)); +} + void RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) { @@ -2126,6 +2281,13 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) if (isTextureOp(tex->op)) textureMask(tex); + + if (isScalarTexGM107(tex)) { + handleScalarTexGM107(tex); + return; + } + + assert(!tex->tex.scalar); condenseDefs(tex); if (isSurfaceOp(tex->op)) { -- cgit v1.2.3