From f3aa999383074d666d6e3f3506e66b0c937904ca Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 26 Apr 2014 02:08:23 -0400 Subject: nv50/ir: change texture offsets to ValueRefs, allow nonconst This allows us to have non-constant offsets for textureGatherOffset and textureGatherOffsets. Signed-off-by: Ilia Mirkin --- src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 7 ++++ src/gallium/drivers/nouveau/codegen/nv50_ir.h | 3 +- .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 4 ++- .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 6 ++-- .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 4 ++- .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 12 +++---- .../nouveau/codegen/nv50_ir_lowering_nv50.cpp | 8 +++++ .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 37 +++++++++++++++++----- 8 files changed, 61 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index a24a66c77a3..13f8cf20c76 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -911,6 +911,9 @@ TexInstruction::~TexInstruction() dPdx[c].set(NULL); dPdy[c].set(NULL); } + for (int n = 0; n < 4; ++n) + for (int c = 0; c < 3; ++c) + offset[n][c].set(NULL); } TexInstruction * @@ -930,6 +933,10 @@ TexInstruction::clone(ClonePolicy& pol, Instruction *i) const } } + for (int n = 0; n < tex->tex.useOffsets; ++n) + for (int c = 0; c < 3; ++c) + tex->offset[n][c].set(offset[n][c]); + return tex; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 919d3a4c7bd..8872cfb7025 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -913,13 +913,14 @@ public: bool derivAll; int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets - int8_t offset[4][3]; + int8_t offset[3]; // only used on nv50 enum TexQuery query; } tex; ValueRef dPdx[3]; ValueRef dPdy[3]; + ValueRef offset[4][3]; }; class CmpInstruction : public Instruction diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 63d55252b54..5992c543d69 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1104,12 +1104,14 @@ CodeEmitterGK110::emitTEX(const TexInstruction *i) // ? } - if (i->tex.useOffsets) { + if (i->tex.useOffsets == 1) { switch (i->op) { case OP_TXF: code[1] |= 0x200; break; default: code[1] |= 0x800; break; } } + if (i->tex.useOffsets == 4) + code[1] |= 0x1000; } void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index 9eccd9f0ccd..077eba8e3d4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -1475,9 +1475,9 @@ CodeEmitterNV50::emitTEX(const TexInstruction *i) code[0] |= 0x08000000; } else if (i->tex.useOffsets) { - code[1] |= (i->tex.offset[0][0] & 0xf) << 24; - code[1] |= (i->tex.offset[0][1] & 0xf) << 20; - code[1] |= (i->tex.offset[0][2] & 0xf) << 16; + code[1] |= (i->tex.offset[0] & 0xf) << 24; + code[1] |= (i->tex.offset[1] & 0xf) << 20; + code[1] |= (i->tex.offset[2] & 0xf) << 16; } code[0] |= (i->tex.mask & 0x3) << 25; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 11a7c2b2bc7..b1f76cf8043 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1208,8 +1208,10 @@ CodeEmitterNVC0::emitTEX(const TexInstruction *i) i->tex.target == TEX_TARGET_2D_MS_ARRAY) code[1] |= 1 << 23; - if (i->tex.useOffsets) // in vecSrc0.w + if (i->tex.useOffsets == 1) code[1] |= 1 << 22; + if (i->tex.useOffsets == 4) + code[1] |= 1 << 23; srcId(i, src1, 26); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 402cecfb5f8..3e44bf9fdba 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1763,11 +1763,11 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow()) texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info); + texi->tex.useOffsets = tgsi.getNumTexOffsets(); for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { for (c = 0; c < 3; ++c) { - texi->tex.offset[s][c] = tgsi.getTexOffset(s).getValueU32(c, info); - if (texi->tex.offset[s][c]) - texi->tex.useOffsets = s + 1; + texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL)); + texi->offset[s][c].setInsn(texi); } } @@ -1800,11 +1800,11 @@ Converter::handleTXF(Value *dst[4], int R, int L_M) setTexRS(texi, c, R, -1); + texi->tex.useOffsets = tgsi.getNumTexOffsets(); for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { for (c = 0; c < 3; ++c) { - texi->tex.offset[s][c] = tgsi.getTexOffset(s).getValueU32(c, info); - if (texi->tex.offset[s][c]) - texi->tex.useOffsets = s + 1; + texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL)); + texi->offset[s][c].setInsn(texi); } } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp index 69e88e6902d..eafc0a73bc6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -695,6 +695,14 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i) // texel offsets are 3 immediate fields in the instruction, // nv50 cannot do textureGatherOffsets assert(i->tex.useOffsets <= 1); + if (i->tex.useOffsets) { + for (int c = 0; c < 3; ++c) { + ImmediateValue val; + assert(i->offset[0][c].getImmediate(val)); + i->tex.offset[c] = val.reg.data.u32; + i->offset[0][c].set(NULL); + } + } return true; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index c7e9063fe65..dc1eb5e80a5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -739,21 +739,42 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) // offset is last source (lod 1st, dc 2nd) if (i->tex.useOffsets) { - uint32_t value = 0; int n, c; int s = i->srcCount(0xff, true); if (i->srcExists(s)) // move potential predicate out of the way i->moveSources(s, 1); + if (i->tex.useOffsets == 4 && i->srcExists(s + 1)) + i->moveSources(s + 1, 1); if (i->op == OP_TXG) { - assert(i->tex.useOffsets == 1); - for (c = 0; c < 3; ++c) - value |= (i->tex.offset[0][c] & 0xff) << (c * 8); + // Either there is 1 offset, which goes into the 2 low bytes of the + // first source, or there are 4 offsets, which go into 2 sources (8 + // values, 1 byte each). + Value *offs[2] = {NULL, NULL}; + for (n = 0; n < i->tex.useOffsets; n++) { + for (c = 0; c < 2; ++c) { + if ((n % 2) == 0 && c == 0) + offs[n / 2] = i->offset[n][c].get(); + else + bld.mkOp3(OP_INSBF, TYPE_U32, + offs[n / 2], + i->offset[n][c].get(), + bld.mkImm(0x800 | ((n * 16 + c * 8) % 32)), + offs[n / 2]); + } + } + i->setSrc(s, offs[0]); + if (offs[1]) + i->setSrc(s + 1, offs[1]); } else { - for (n = 0; n < i->tex.useOffsets; ++n) - for (c = 0; c < 3; ++c) - value |= (i->tex.offset[n][c] & 0xf) << (n * 12 + c * 4); + unsigned imm = 0; + assert(i->tex.useOffsets == 1); + for (c = 0; c < 3; ++c) { + ImmediateValue val; + assert(i->offset[0][c].getImmediate(val)); + imm |= (val.reg.data.u32 & 0xf) << (c * 4); + } + i->setSrc(s, bld.loadImm(NULL, imm)); } - i->setSrc(s, bld.loadImm(NULL, value)); } if (chipset >= NVISA_GK104_CHIPSET) { -- cgit v1.2.3