summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nv50/codegen
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nv50/codegen')
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.cpp12
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.h4
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h13
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_driver.h2
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp896
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp1118
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp25
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp10
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_target.h8
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp531
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.h72
11 files changed, 2472 insertions, 219 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
index 19a90806c70..048759060ad 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
@@ -559,8 +559,11 @@ void Instruction::init()
subOp = 0;
saturate = 0;
- join = terminator = 0;
- ftz = dnz = 0;
+ join = 0;
+ exit = 0;
+ terminator = 0;
+ ftz = 0;
+ dnz = 0;
atomic = 0;
perPatch = 0;
fixed = 0;
@@ -982,6 +985,9 @@ Program::Program(Type type, Target *arch)
calls.insert(&main->call);
dbgFlags = 0;
+ optLevel = 0;
+
+ targetPriv = NULL;
}
Program::~Program()
@@ -1085,6 +1091,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
if (!prog)
return -1;
prog->dbgFlags = info->dbgFlags;
+ prog->optLevel = info->optLevel;
switch (info->bin.sourceRep) {
#if 0
@@ -1105,6 +1112,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
prog->print();
+ targ->parseDriverInfo(info);
prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
prog->convertToSSA();
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h
index c0a867d9552..6ec4fc95441 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -140,6 +140,7 @@ enum operation
#define NV50_IR_SUBOP_LDC_IS 2
#define NV50_IR_SUBOP_LDC_ISL 3
#define NV50_IR_SUBOP_SHIFT_WRAP 1
+#define NV50_IR_SUBOP_EMU_PRERET 1
enum DataType
{
@@ -1060,6 +1061,9 @@ public:
MemoryPool mem_ImmediateValue;
uint32_t dbgFlags;
+ uint8_t optLevel;
+
+ void *targetPriv; // e.g. to carry information between passes
void releaseInstruction(Instruction *);
void releaseValue(Value *);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
index 022a27f1748..9ee04dbcd12 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
@@ -46,7 +46,8 @@ public:
inline void remove(Instruction *i) { assert(i->bb == bb); bb->remove(i); }
inline LValue *getScratch(int size = 4, DataFile = FILE_GPR);
- inline LValue *getSSA(int size = 4); // scratch value for a single assignment
+ // scratch value for a single assignment:
+ inline LValue *getSSA(int size = 4, DataFile = FILE_GPR);
inline Instruction *mkOp(operation, DataType, Value *);
Instruction *mkOp1(operation, DataType, Value *, Value *);
@@ -215,18 +216,16 @@ LValue *
BuildUtil::getScratch(int size, DataFile f)
{
LValue *lval = new_LValue(func, f);
- if (size != 4)
- lval->reg.size = size;
+ lval->reg.size = size;
return lval;
}
LValue *
-BuildUtil::getSSA(int size)
+BuildUtil::getSSA(int size, DataFile f)
{
- LValue *lval = new_LValue(func, FILE_GPR);
+ LValue *lval = new_LValue(func, f);
lval->ssa = 1;
- if (size != 4)
- lval->reg.size = size;
+ lval->reg.size = size;
return lval;
}
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
index ae733a1a924..dc42b8295e9 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
@@ -175,6 +175,8 @@ struct nv50_ir_prog_info
/* driver callback to assign input/output locations */
int (*assignSlots)(struct nv50_ir_prog_info *);
+
+ void *driverPriv;
};
#ifdef __cplusplus
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
index 1c09494f46d..c534d4a0c5e 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
@@ -21,14 +21,19 @@
*/
#include "nv50_ir.h"
-#include "nv50_ir_target.h"
+#include "nv50_ir_target_nv50.h"
namespace nv50_ir {
+#define NV50_OP_ENC_LONG 0
+#define NV50_OP_ENC_SHORT 1
+#define NV50_OP_ENC_IMM 2
+#define NV50_OP_ENC_LONG_ALT 3
+
class CodeEmitterNV50 : public CodeEmitter
{
public:
- CodeEmitterNV50(const Target *);
+ CodeEmitterNV50(const TargetNV50 *);
virtual bool emitInstruction(Instruction *);
@@ -36,23 +41,25 @@ public:
inline void setProgramType(Program::Type pType) { progType = pType; }
-private:
- const Target *targ;
+ virtual void prepareEmission(Function *);
+private:
Program::Type progType;
+ const TargetNV50 *targ;
+
private:
inline void defId(const ValueDef&, const int pos);
inline void srcId(const ValueRef&, const int pos);
inline void srcId(const ValueRef *, const int pos);
- inline void srcAddr16(const ValueRef&, const int pos);
+ inline void srcAddr16(const ValueRef&, bool adj, const int pos);
inline void srcAddr8(const ValueRef&, const int pos);
void emitFlagsRd(const Instruction *);
void emitFlagsWr(const Instruction *);
- void emitCondCode(CondCode cc, int pos);
+ void emitCondCode(CondCode cc, DataType ty, int pos);
inline void setARegBits(unsigned int);
@@ -61,16 +68,16 @@ private:
void setDst(const Value *);
void setDst(const Instruction *, int d);
- void emitSrc0(const ValueRef&);
- void emitSrc1(const ValueRef&);
- void emitSrc2(const ValueRef&);
+ void setSrcFileBits(const Instruction *, int enc);
+ void setSrc(const Instruction *, unsigned int s, int slot);
void emitForm_MAD(const Instruction *);
void emitForm_ADD(const Instruction *);
void emitForm_MUL(const Instruction *);
void emitForm_IMM(const Instruction *);
- void emitLoadStoreSize(DataType ty, int pos);
+ void emitLoadStoreSizeLG(DataType ty, int pos);
+ void emitLoadStoreSizeCS(DataType ty);
void roundMode_MAD(const Instruction *);
void roundMode_CVT(RoundMode);
@@ -88,9 +95,10 @@ private:
void emitUADD(const Instruction *);
void emitAADD(const Instruction *);
void emitFADD(const Instruction *);
- void emitUMUL(const Instruction *);
+ void emitIMUL(const Instruction *);
void emitFMUL(const Instruction *);
void emitFMAD(const Instruction *);
+ void emitIMAD(const Instruction *);
void emitMINMAX(const Instruction *);
@@ -98,17 +106,20 @@ private:
void emitSFnOp(const Instruction *, uint8_t subOp);
void emitShift(const Instruction *);
- void emitARL(const Instruction *);
+ void emitARL(const Instruction *, unsigned int shl);
void emitLogicOp(const Instruction *);
+ void emitNOT(const Instruction *);
void emitCVT(const Instruction *);
void emitSET(const Instruction *);
void emitTEX(const TexInstruction *);
+ void emitTXQ(const TexInstruction *);
void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
void emitFlow(const Instruction *, uint8_t flowOp);
+ void emitPRERETEmu(const FlowInstruction *);
};
#define SDATA(a) ((a).rep()->reg.data)
@@ -126,13 +137,20 @@ void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
code[pos / 32] |= SDATA(*src).id << (pos % 32);
}
-void CodeEmitterNV50::srcAddr16(const ValueRef& src, const int pos)
+void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
{
assert(src.get());
- uint32_t offset = SDATA(src).offset;
+ int32_t offset = SDATA(src).offset;
+
+ assert(!adj || src.get()->reg.size <= 4);
+ if (adj)
+ offset /= src.get()->reg.size;
- assert(offset <= 0xffff && (pos % 32) <= 16);
+ assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
+
+ if (offset < 0)
+ offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
code[pos / 32] |= offset << (pos % 32);
}
@@ -143,14 +161,15 @@ void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
uint32_t offset = SDATA(src).offset;
- assert(offset <= 0x1fc && !(offset & 0x3));
+ assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
code[pos / 32] |= (offset >> 2) << (pos % 32);
}
void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
{
- assert(def.get());
+ assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
+
code[pos / 32] |= DDATA(def).id << (pos % 32);
}
@@ -170,11 +189,11 @@ CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
void
CodeEmitterNV50::emitMNeg12(const Instruction *i)
{
- code[1] |= i->src[0].mod.neg() << 26;
- code[1] |= i->src[1].mod.neg() << 27;
+ code[1] |= i->src(0).mod.neg() << 26;
+ code[1] |= i->src(1).mod.neg() << 27;
}
-void CodeEmitterNV50::emitCondCode(CondCode cc, int pos)
+void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
{
uint8_t enc;
@@ -210,6 +229,9 @@ void CodeEmitterNV50::emitCondCode(CondCode cc, int pos)
assert(!"invalid condition code");
break;
}
+ if (ty != TYPE_NONE && !isFloatType(ty))
+ enc &= ~0x8; // unordered only exists for float types
+
code[pos / 32] |= enc << (pos % 32);
}
@@ -222,8 +244,8 @@ CodeEmitterNV50::emitFlagsRd(const Instruction *i)
if (s >= 0) {
assert(i->getSrc(s)->reg.file == FILE_FLAGS);
- emitCondCode(i->cc, 32 + 7);
- srcId(i->src[s], 32 + 12);
+ emitCondCode(i->cc, TYPE_NONE, 32 + 7);
+ srcId(i->src(s), 32 + 12);
} else {
code[1] |= 0x0780;
}
@@ -234,8 +256,22 @@ CodeEmitterNV50::emitFlagsWr(const Instruction *i)
{
assert(!(code[1] & 0x70));
- if (i->flagsDef >= 0)
- code[1] |= (DDATA(i->def[i->flagsDef]).id << 4) | 0x40;
+ int flagsDef = i->flagsDef;
+
+ // find flags definition and check that it is the last def
+ if (flagsDef < 0) {
+ for (int d = 0; i->defExists(d); ++d)
+ if (i->def(d).getFile() == FILE_FLAGS)
+ flagsDef = d;
+ if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
+ WARN("Instruction::flagsDef was not set properly\n");
+ }
+ if (flagsDef == 0 && i->defExists(1))
+ WARN("flags def should not be the primary definition\n");
+
+ if (flagsDef >= 0)
+ code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
+
}
void
@@ -248,20 +284,27 @@ CodeEmitterNV50::setARegBits(unsigned int u)
void
CodeEmitterNV50::setAReg16(const Instruction *i, int s)
{
- s = i->src[s].indirect[0];
- if (s >= 0)
- setARegBits(SDATA(i->src[s]).id + 1);
+ if (i->srcExists(s)) {
+ s = i->src(s).indirect[0];
+ if (s >= 0)
+ setARegBits(SDATA(i->src(s)).id + 1);
+ }
}
void
CodeEmitterNV50::setImmediate(const Instruction *i, int s)
{
- const ImmediateValue *imm = i->src[s].get()->asImm();
+ const ImmediateValue *imm = i->src(s).get()->asImm();
assert(imm);
+ uint32_t u = imm->reg.data.u32;
+
+ if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
+ u = ~u;
+
code[1] |= 3;
- code[0] |= (imm->reg.data.u32 & 0x3f) << 16;
- code[1] |= (imm->reg.data.u32 >> 6) << 2;
+ code[0] |= (u & 0x3f) << 16;
+ code[1] |= (u >> 6) << 2;
}
void
@@ -271,13 +314,18 @@ CodeEmitterNV50::setDst(const Value *dst)
assert(reg->file != FILE_ADDRESS);
- if (reg->data.id < 0) {
+ if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
code[0] |= (127 << 2) | 1;
code[1] |= 8;
} else {
- if (reg->file == FILE_SHADER_OUTPUT)
+ int id;
+ if (reg->file == FILE_SHADER_OUTPUT) {
code[1] |= 8;
- code[0] |= reg->data.id << 2;
+ id = reg->data.offset / 4;
+ } else {
+ id = reg->data.id;
+ }
+ code[0] |= id << 2;
}
}
@@ -293,60 +341,135 @@ CodeEmitterNV50::setDst(const Instruction *i, int d)
}
}
+// 3 * 2 bits:
+// 0: r
+// 1: a/s
+// 2: c
+// 3: i
void
-CodeEmitterNV50::emitSrc0(const ValueRef& ref)
+CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
{
- const Storage *reg = &ref.rep()->reg;
-
- if (reg->file == FILE_SHADER_INPUT)
- code[1] |= 0x00200000;
- else
- if (reg->file != FILE_GPR)
- ERROR("invalid src0 register file: %d\n", reg->file);
+ uint8_t mode = 0;
- assert(reg->data.id < 128);
- code[0] |= reg->data.id << 9;
-}
+ for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
+ switch (i->src(s).getFile()) {
+ case FILE_GPR:
+ break;
+ case FILE_MEMORY_SHARED:
+ case FILE_SHADER_INPUT:
+ mode |= 1 << (s * 2);
+ break;
+ case FILE_MEMORY_CONST:
+ mode |= 2 << (s * 2);
+ break;
+ case FILE_IMMEDIATE:
+ mode |= 3 << (s * 2);
+ break;
+ default:
+ ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
+ assert(0);
+ break;
+ }
+ }
+ switch (mode) {
+ case 0x00: // rrr
+ break;
+ case 0x01: // arr/grr
+ if (progType == Program::TYPE_GEOMETRY) {
+ code[0] |= 0x01800000;
+ if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
+ code[1] |= 0x00200000;
+ } else {
+ if (enc == NV50_OP_ENC_SHORT)
+ code[0] |= 0x01000000;
+ else
+ code[1] |= 0x00200000;
+ }
+ break;
+ case 0x03: // irr
+ assert(i->op == OP_MOV);
+ return;
+ case 0x0c: // rir
+ break;
+ case 0x0d: // gir
+ code[0] |= 0x01000000;
+ assert(progType == Program::TYPE_GEOMETRY ||
+ progType == Program::TYPE_COMPUTE);
+ break;
+ case 0x08: // rcr
+ code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
+ code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
+ break;
+ case 0x09: // acr/gcr
+ if (progType == Program::TYPE_GEOMETRY) {
+ code[0] |= 0x01800000;
+ } else {
+ code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
+ code[1] |= 0x00200000;
+ }
+ code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
+ break;
+ case 0x20: // rrc
+ code[0] |= 0x01000000;
+ code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
+ break;
+ case 0x21: // arc
+ code[0] |= 0x01000000;
+ code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
+ assert(progType != Program::TYPE_GEOMETRY);
+ break;
+ default:
+ ERROR("not encodable: %x\n", mode);
+ assert(0);
+ break;
+ }
+ if (progType != Program::TYPE_COMPUTE)
+ return;
-void
-CodeEmitterNV50::emitSrc1(const ValueRef& ref)
-{
- const Storage *reg = &ref.rep()->reg;
+ if ((mode & 3) == 1) {
+ const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
- if (reg->file == FILE_MEMORY_CONST) {
- assert(!(code[1] & 0x01800000));
- code[0] |= 1 << 23;
- code[1] |= reg->fileIndex << 22;
- } else
- if (reg->file != FILE_GPR) {
- ERROR("invalid src1 register file: %d\n", reg->file);
+ switch (i->getSrc(0)->reg.type) {
+ case TYPE_U8:
+ break;
+ case TYPE_U16:
+ code[0] |= 1 << pos;
+ break;
+ case TYPE_S16:
+ code[0] |= 2 << pos;
+ break;
+ default:
+ code[0] |= 3 << pos;
+ assert(i->getSrc(0)->reg.size == 4);
+ break;
+ }
}
-
- assert(reg->data.id < 128);
- code[0] |= reg->data.id << 16;
}
void
-CodeEmitterNV50::emitSrc2(const ValueRef& ref)
+CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
{
- const Storage *reg = &ref.rep()->reg;
-
- if (reg->file == FILE_MEMORY_CONST) {
- assert(!(code[1] & 0x01800000));
- code[0] |= 1 << 24;
- code[1] |= reg->fileIndex << 22;
- } else
- if (reg->file != FILE_GPR) {
- ERROR("invalid src1 register file: %d\n", reg->file);
+ if (Target::operationSrcNr[i->op] <= s)
+ return;
+ const Storage *reg = &i->src(s).rep()->reg;
+
+ unsigned int id = (reg->file == FILE_GPR) ?
+ reg->data.id :
+ reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
+
+ switch (slot) {
+ case 0: code[0] |= id << 9; break;
+ case 1: code[0] |= id << 16; break;
+ case 2: code[1] |= id << 14; break;
+ default:
+ assert(0);
+ break;
}
-
- assert(reg->data.id < 128);
- code[1] |= reg->data.id << 14;
}
// the default form:
// - long instruction
-// - 1 to 3 sources in slots 0, 1, 2
+// - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
// - address & flags
void
CodeEmitterNV50::emitForm_MAD(const Instruction *i)
@@ -359,14 +482,10 @@ CodeEmitterNV50::emitForm_MAD(const Instruction *i)
setDst(i, 0);
- if (i->srcExists(0))
- emitSrc0(i->src[0]);
-
- if (i->srcExists(1))
- emitSrc1(i->src[1]);
-
- if (i->srcExists(2))
- emitSrc2(i->src[2]);
+ setSrcFileBits(i, NV50_OP_ENC_LONG);
+ setSrc(i, 0, 0);
+ setSrc(i, 1, 1);
+ setSrc(i, 2, 2);
setAReg16(i, 1);
}
@@ -383,16 +502,14 @@ CodeEmitterNV50::emitForm_ADD(const Instruction *i)
setDst(i, 0);
- if (i->srcExists(0))
- emitSrc0(i->src[0]);
-
- if (i->srcExists(1))
- emitSrc2(i->src[1]);
+ setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
+ setSrc(i, 0, 0);
+ setSrc(i, 1, 2);
setAReg16(i, 1);
}
-// default short form
+// default short form (rr, ar, rc, gr)
void
CodeEmitterNV50::emitForm_MUL(const Instruction *i)
{
@@ -402,15 +519,13 @@ CodeEmitterNV50::emitForm_MUL(const Instruction *i)
setDst(i, 0);
- if (i->srcExists(0))
- emitSrc0(i->src[0]);
-
- if (i->srcExists(1))
- emitSrc1(i->src[1]);
+ setSrcFileBits(i, NV50_OP_ENC_SHORT);
+ setSrc(i, 0, 0);
+ setSrc(i, 1, 1);
}
// usual immediate form
-// - 1 to 3 sources where last is immediate
+// - 1 to 3 sources where last is immediate (rir, gir)
// - no address or predicate possible
void
CodeEmitterNV50::emitForm_IMM(const Instruction *i)
@@ -422,21 +537,18 @@ CodeEmitterNV50::emitForm_IMM(const Instruction *i)
setDst(i, 0);
- if (i->srcExists(2)) {
- emitSrc0(i->src[0]);
- emitSrc1(i->src[1]);
- setImmediate(i, 2);
- } else
- if (i->srcExists(1)) {
- emitSrc0(i->src[0]);
+ setSrcFileBits(i, NV50_OP_ENC_IMM);
+ if (Target::operationSrcNr[i->op] > 1) {
+ setSrc(i, 0, 0);
setImmediate(i, 1);
+ setSrc(i, 2, 1);
} else {
setImmediate(i, 0);
}
}
void
-CodeEmitterNV50::emitLoadStoreSize(DataType ty, int pos)
+CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
{
uint8_t enc;
@@ -445,7 +557,9 @@ CodeEmitterNV50::emitLoadStoreSize(DataType ty, int pos)
case TYPE_S32: // fall through
case TYPE_U32: enc = 0x6; break;
case TYPE_B128: enc = 0x5; break;
- case TYPE_F64: enc = 0x4; break;
+ case TYPE_F64: // fall through
+ case TYPE_S64: // fall through
+ case TYPE_U64: enc = 0x4; break;
case TYPE_S16: enc = 0x3; break;
case TYPE_U16: enc = 0x2; break;
case TYPE_S8: enc = 0x1; break;
@@ -459,18 +573,58 @@ CodeEmitterNV50::emitLoadStoreSize(DataType ty, int pos)
}
void
+CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
+{
+ switch (ty) {
+ case TYPE_U8: break;
+ case TYPE_U16: code[1] |= 0x4000; break;
+ case TYPE_S16: code[1] |= 0x8000; break;
+ case TYPE_F32:
+ case TYPE_S32:
+ case TYPE_U32: code[1] |= 0xc000; break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+void
CodeEmitterNV50::emitLOAD(const Instruction *i)
{
- DataFile sf = i->src[0].getFile();
+ DataFile sf = i->src(0).getFile();
+ int32_t offset = i->getSrc(0)->reg.data.offset;
switch (sf) {
case FILE_SHADER_INPUT:
- code[0] = 0x10000001;
- code[1] = 0x04200000 | (i->lanes << 14);
+ // use 'mov' where we can
+ code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
+ code[1] = 0x00200000 | (i->lanes << 14);
+ if (typeSizeof(i->dType) == 4)
+ code[1] |= 0x04000000;
+ break;
+ case FILE_MEMORY_SHARED:
+ if (targ->getChipset() >= 0x84) {
+ assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
+ code[0] = 0x10000001;
+ code[1] = 0x40000000;
+
+ if (typeSizeof(i->dType) == 4)
+ code[1] |= 0x04000000;
+
+ emitLoadStoreSizeCS(i->sType);
+ } else {
+ assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
+ code[0] = 0x10000001;
+ code[1] = 0x00200000 | (i->lanes << 14);
+ emitLoadStoreSizeCS(i->sType);
+ }
break;
case FILE_MEMORY_CONST:
code[0] = 0x10000001;
- code[1] = 0x24000000 | (i->getSrc(0)->reg.fileIndex << 22);
+ code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
+ if (typeSizeof(i->dType) == 4)
+ code[1] |= 0x04000000;
+ emitLoadStoreSizeCS(i->sType);
break;
case FILE_MEMORY_LOCAL:
code[0] = 0xd0000001;
@@ -486,18 +640,18 @@ CodeEmitterNV50::emitLOAD(const Instruction *i)
}
if (sf == FILE_MEMORY_LOCAL ||
sf == FILE_MEMORY_GLOBAL)
- emitLoadStoreSize(i->sType, 21 + 32);
+ emitLoadStoreSizeLG(i->sType, 21 + 32);
setDst(i, 0);
emitFlagsRd(i);
emitFlagsWr(i);
- if (i->src[0].getFile() == FILE_MEMORY_GLOBAL) {
- srcId(*i->src[0].getIndirect(0), 9);
+ if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
+ srcId(*i->src(0).getIndirect(0), 9);
} else {
setAReg16(i, 0);
- srcAddr16(i->src[0], 9);
+ srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
}
}
@@ -509,19 +663,21 @@ CodeEmitterNV50::emitSTORE(const Instruction *i)
switch (f) {
case FILE_SHADER_OUTPUT:
- code[0] = 0x00000001 | ((offset >> 2) << 2);
+ code[0] = 0x00000001 | ((offset >> 2) << 9);
code[1] = 0x80c00000;
- srcId(i->src[1], 32 + 15);
+ srcId(i->src(1), 32 + 14);
break;
case FILE_MEMORY_GLOBAL:
- code[0] = 0xd0000000;
+ code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
code[1] = 0xa0000000;
- emitLoadStoreSize(i->dType, 21 + 32);
+ emitLoadStoreSizeLG(i->dType, 21 + 32);
+ srcId(i->src(1), 2);
break;
case FILE_MEMORY_LOCAL:
code[0] = 0xd0000001;
code[1] = 0x60000000;
- emitLoadStoreSize(i->dType, 21 + 32);
+ emitLoadStoreSizeLG(i->dType, 21 + 32);
+ srcId(i->src(1), 2);
break;
case FILE_MEMORY_SHARED:
code[0] = 0x00000001;
@@ -536,28 +692,27 @@ CodeEmitterNV50::emitSTORE(const Instruction *i)
break;
case 4:
code[0] |= (offset >> 2) << 9;
- code[1] |= 0x04000000;
+ code[1] |= 0x04200000;
break;
default:
assert(0);
break;
}
+ srcId(i->src(1), 32 + 14);
break;
default:
assert(!"invalid store destination file");
break;
}
- if (f != FILE_SHADER_OUTPUT) {
- srcId(i->src[1], 2);
- if (f == FILE_MEMORY_GLOBAL)
- srcId(*i->src[0].getIndirect(0), 9);
- if (f == FILE_MEMORY_LOCAL)
- srcAddr16(i->src[0], 9);
- }
- if (f != FILE_MEMORY_GLOBAL)
+ if (f == FILE_MEMORY_GLOBAL)
+ srcId(*i->src(0).getIndirect(0), 9);
+ else
setAReg16(i, 0);
+ if (f == FILE_MEMORY_LOCAL)
+ srcAddr16(i->src(0), false, 9);
+
emitFlagsRd(i);
}
@@ -572,21 +727,22 @@ CodeEmitterNV50::emitMOV(const Instruction *i)
if (sf == FILE_FLAGS) {
code[0] = 0x00000001;
code[1] = 0x20000000;
- defId(i->def[0], 2);
- srcId(i->src[0], 12);
+ defId(i->def(0), 2);
+ srcId(i->src(0), 12);
emitFlagsRd(i);
} else
if (sf == FILE_ADDRESS) {
code[0] = 0x00000001;
code[1] = 0x40000000;
- defId(i->def[0], 2);
- setARegBits(SDATA(i->src[0]).id + 1);
+ defId(i->def(0), 2);
+ setARegBits(SDATA(i->src(0)).id + 1);
+ emitFlagsRd(i);
} else
if (df == FILE_FLAGS) {
code[0] = 0x00000001;
code[1] = 0xa0000000;
- defId(i->def[0], 4);
- srcId(i->src[0], 9);
+ defId(i->def(0), 4);
+ srcId(i->src(0), 9);
emitFlagsRd(i);
} else
if (sf == FILE_IMMEDIATE) {
@@ -598,10 +754,12 @@ CodeEmitterNV50::emitMOV(const Instruction *i)
code[0] = 0x10008000;
} else {
code[0] = 0x10000001;
- code[1] = 0x04000000 | (i->lanes << 14);
+ code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
+ code[1] |= (i->lanes << 14);
+ emitFlagsRd(i);
}
- defId(i->def[0], 2);
- srcId(i->src[0], 9);
+ defId(i->def(0), 2);
+ srcId(i->src(0), 9);
}
if (df == FILE_SHADER_OUTPUT) {
assert(i->encSize == 8);
@@ -628,7 +786,7 @@ CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
emitForm_ADD(i);
if (!i->srcExists(1))
- srcId(i->src[0], 32 + 14);
+ srcId(i->src(0), 32 + 14);
}
void
@@ -637,8 +795,8 @@ CodeEmitterNV50::emitPFETCH(const Instruction *i)
code[0] = 0x11800001;
code[1] = 0x04200000 | (0xf << 14);
- defId(i->def[0], 2);
- srcAddr8(i->src[0], 9);
+ defId(i->def(0), 2);
+ srcAddr8(i->src(0), 9);
setAReg16(i, 0);
}
@@ -647,27 +805,27 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
{
code[0] = 0x80000000;
- defId(i->def[0], 2);
- srcAddr8(i->src[0], 16);
+ defId(i->def(0), 2);
+ srcAddr8(i->src(0), 16);
if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
code[0] |= 1 << 8;
} else {
if (i->op == OP_PINTERP) {
code[0] |= 1 << 25;
- srcId(i->src[1], 9);
+ srcId(i->src(1), 9);
}
if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
code[0] |= 1 << 24;
}
if (i->encSize == 8) {
- emitFlagsRd(i);
- code[1] |=
+ code[1] =
(code[0] & (3 << 24)) >> (24 - 16) |
- (code[0] & (1 << 8)) >> (18 - 8);
+ (code[0] & (1 << 8)) << (18 - 8);
code[0] &= ~0x03000100;
code[0] |= 1;
+ emitFlagsRd(i);
}
}
@@ -693,8 +851,8 @@ CodeEmitterNV50::emitMINMAX(const Instruction *i)
assert(0);
break;
}
- code[1] |= i->src[0].mod.abs() << 20;
- code[1] |= i->src[1].mod.abs() << 19;
+ code[1] |= i->src(0).mod.abs() << 20;
+ code[1] |= i->src(1).mod.abs() << 19;
}
emitForm_MAD(i);
}
@@ -702,8 +860,8 @@ CodeEmitterNV50::emitMINMAX(const Instruction *i)
void
CodeEmitterNV50::emitFMAD(const Instruction *i)
{
- const int neg_mul = i->src[0].mod.neg() ^ i->src[1].mod.neg();
- const int neg_add = i->src[2].mod.neg();
+ const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
+ const int neg_add = i->src(2).mod.neg();
code[0] = 0xe0000000;
@@ -711,30 +869,32 @@ CodeEmitterNV50::emitFMAD(const Instruction *i)
emitForm_MUL(i);
assert(!neg_mul && !neg_add);
} else {
- emitForm_MAD(i);
- code[1] |= neg_mul << 26;
+ code[1] = neg_mul << 26;
code[1] |= neg_add << 27;
if (i->saturate)
code[1] |= 1 << 29;
+ emitForm_MAD(i);
}
}
void
CodeEmitterNV50::emitFADD(const Instruction *i)
{
- const int neg0 = i->src[0].mod.neg();
- const int neg1 = i->src[1].mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
+ const int neg0 = i->src(0).mod.neg();
+ const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
code[0] = 0xb0000000;
- assert(!(i->src[0].mod | i->src[1].mod).abs());
+ assert(!(i->src(0).mod | i->src(1).mod).abs());
- if (i->src[1].getFile() == FILE_IMMEDIATE) {
+ if (i->src(1).getFile() == FILE_IMMEDIATE) {
+ code[1] = 0;
emitForm_IMM(i);
code[0] |= neg0 << 15;
code[0] |= neg1 << 22;
} else
if (i->encSize == 8) {
+ code[1] = 0;
emitForm_ADD(i);
code[1] |= neg0 << 26;
code[1] |= neg1 << 27;
@@ -744,27 +904,40 @@ CodeEmitterNV50::emitFADD(const Instruction *i)
emitForm_MUL(i);
code[0] |= neg0 << 15;
code[0] |= neg1 << 22;
+ if (i->saturate)
+ code[0] |= 1 << 8;
}
}
void
CodeEmitterNV50::emitUADD(const Instruction *i)
{
+ const int neg0 = i->src(0).mod.neg();
+ const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
+
code[0] = 0x20008000;
- if (i->src[0].getFile() == FILE_IMMEDIATE) {
+ if (i->src(1).getFile() == FILE_IMMEDIATE) {
+ code[1] = 0;
emitForm_IMM(i);
} else
if (i->encSize == 8) {
code[0] = 0x20000000;
- code[1] = 0x04000000;
+ code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
emitForm_ADD(i);
} else {
emitForm_MUL(i);
}
- assert(!(i->src[0].mod.neg() && i->src[1].mod.neg()));
- code[0] |= i->src[0].mod.neg() << 28;
- code[0] |= i->src[1].mod.neg() << 22;
+ assert(!(neg0 && neg1));
+ code[0] |= neg0 << 28;
+ code[0] |= neg1 << 22;
+
+ if (i->flagsSrc >= 0) {
+ // addc == sub | subr
+ assert(!(code[0] & 0x10400000) && !i->getPredicate());
+ code[0] |= 0x10400000;
+ srcId(i->src(i->flagsSrc), 32 + 12);
+ }
}
void
@@ -775,30 +948,47 @@ CodeEmitterNV50::emitAADD(const Instruction *i)
code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
code[1] = 0x20000000;
- code[0] |= (DDATA(i->def[0]).id + 1) << 2;
+ code[0] |= (DDATA(i->def(0)).id + 1) << 2;
emitFlagsRd(i);
if (s && i->srcExists(0))
- setARegBits(SDATA(i->src[0]).id + 1);
+ setARegBits(SDATA(i->src(0)).id + 1);
+}
+
+void
+CodeEmitterNV50::emitIMUL(const Instruction *i)
+{
+ code[0] = 0x40000000;
+
+ if (i->encSize == 8) {
+ code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
+ emitForm_MAD(i);
+ } else {
+ if (i->sType == TYPE_S16)
+ code[0] |= 0x8100;
+ emitForm_MUL(i);
+ }
}
void
CodeEmitterNV50::emitFMUL(const Instruction *i)
{
- const int neg = (i->src[0].mod ^ i->src[1].mod).neg();
+ const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
code[0] = 0xc0000000;
- if (i->src[0].getFile() == FILE_IMMEDIATE) {
+ if (i->src(1).getFile() == FILE_IMMEDIATE) {
+ code[1] = 0;
emitForm_IMM(i);
if (neg)
code[0] |= 0x8000;
} else
if (i->encSize == 8) {
- emitForm_MAD(i);
+ code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
if (neg)
code[1] |= 0x08000000;
+ emitForm_MAD(i);
} else {
emitForm_MUL(i);
if (neg)
@@ -807,12 +997,38 @@ CodeEmitterNV50::emitFMUL(const Instruction *i)
}
void
+CodeEmitterNV50::emitIMAD(const Instruction *i)
+{
+ code[0] = 0x60000000;
+ if (isSignedType(i->sType))
+ code[1] = i->saturate ? 0x40000000 : 0x20000000;
+ else
+ code[1] = 0x00000000;
+
+ int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
+ int neg2 = i->src(2).mod.neg();
+
+ assert(!(neg1 & neg2));
+ code[1] |= neg1 << 27;
+ code[1] |= neg2 << 26;
+
+ emitForm_MAD(i);
+
+ if (i->flagsSrc >= 0) {
+ // add with carry from $cX
+ assert(!(code[1] & 0x0c000000) && !i->getPredicate());
+ code[1] |= 0xc << 24;
+ srcId(i->src(i->flagsSrc), 32 + 12);
+ }
+}
+
+void
CodeEmitterNV50::emitSET(const Instruction *i)
{
code[0] = 0x30000000;
code[1] = 0x60000000;
- emitCondCode(i->asCmp()->setCond, 32 + 14);
+ emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
switch (i->sType) {
case TYPE_F32: code[0] |= 0x80000000; break;
@@ -824,6 +1040,11 @@ CodeEmitterNV50::emitSET(const Instruction *i)
assert(0);
break;
}
+ if (i->src(0).mod.neg()) code[1] |= 0x04000000;
+ if (i->src(1).mod.neg()) code[1] |= 0x08000000;
+ if (i->src(0).mod.abs()) code[1] |= 0x00100000;
+ if (i->src(1).mod.abs()) code[1] |= 0x00080000;
+
emitForm_MAD(i);
}
@@ -938,6 +1159,7 @@ CodeEmitterNV50::emitCVT(const Instruction *i)
assert(0);
break;
}
+ break;
case TYPE_S16:
case TYPE_U16:
case TYPE_S8:
@@ -958,12 +1180,12 @@ CodeEmitterNV50::emitCVT(const Instruction *i)
default:
break;
}
- code[1] ^= i->src[0].mod.neg() << 29;
- code[1] |= i->src[0].mod.abs() << 20;
+ code[1] ^= i->src(0).mod.neg() << 29;
+ code[1] |= i->src(0).mod.abs() << 20;
if (i->saturate)
code[1] |= 1 << 19;
- assert(i->op != OP_ABS || !i->src[0].mod.neg());
+ assert(i->op != OP_ABS || !i->src(0).mod.neg());
emitForm_MAD(i);
}
@@ -974,8 +1196,8 @@ CodeEmitterNV50::emitPreOp(const Instruction *i)
code[0] = 0xb0000000;
code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
- code[1] |= i->src[0].mod.abs() << 20;
- code[1] |= i->src[0].mod.neg() << 26;
+ code[1] |= i->src(0).mod.abs() << 20;
+ code[1] |= i->src(0).mod.neg() << 26;
emitForm_MAD(i);
}
@@ -990,18 +1212,37 @@ CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
emitForm_MUL(i);
} else {
code[1] = subOp << 29;
- code[1] |= i->src[0].mod.abs() << 20;
- code[1] |= i->src[0].mod.neg() << 26;
+ code[1] |= i->src(0).mod.abs() << 20;
+ code[1] |= i->src(0).mod.neg() << 26;
emitForm_MAD(i);
}
}
void
+CodeEmitterNV50::emitNOT(const Instruction *i)
+{
+ code[0] = 0xd0000000;
+ code[1] = 0x0002c000;
+
+ switch (i->sType) {
+ case TYPE_U32:
+ case TYPE_S32:
+ code[1] |= 0x04000000;
+ break;
+ default:
+ break;
+ }
+ emitForm_MAD(i);
+ setSrc(i, 0, 1);
+}
+
+void
CodeEmitterNV50::emitLogicOp(const Instruction *i)
{
code[0] = 0xd0000000;
+ code[1] = 0;
- if (i->src[1].getFile() == FILE_IMMEDIATE) {
+ if (i->src(1).getFile() == FILE_IMMEDIATE) {
switch (i->op) {
case OP_OR: code[0] |= 0x0100; break;
case OP_XOR: code[0] |= 0x8000; break;
@@ -1019,37 +1260,45 @@ CodeEmitterNV50::emitLogicOp(const Instruction *i)
assert(0);
break;
}
+ if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
+ code[1] |= 1 << 16;
+ if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
+ code[1] |= 1 << 17;
+
emitForm_MAD(i);
}
}
void
-CodeEmitterNV50::emitARL(const Instruction *i)
+CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
{
- assert(i->src[1].getFile() == FILE_IMMEDIATE);
-
- code[0] = 0x00000001 | (i->getSrc(1)->reg.data.u32 & 0x3f) << 16;
+ code[0] = 0x00000001 | (shl << 16);
code[1] = 0xc0000000;
- code[0] |= (DDATA(i->def[0]).id + 1) << 2;
- emitSrc0(i->src[0]);
+ code[0] |= (DDATA(i->def(0)).id + 1) << 2;
+
+ setSrcFileBits(i, NV50_OP_ENC_IMM);
+ setSrc(i, 0, 0);
emitFlagsRd(i);
}
void
CodeEmitterNV50::emitShift(const Instruction *i)
{
- if (i->def[0].getFile() == FILE_ADDRESS) {
- emitARL(i);
+ if (i->def(0).getFile() == FILE_ADDRESS) {
+ assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
+ emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
} else {
code[0] = 0x30000001;
code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
- if (isSignedType(i->sType))
+ if (i->op == OP_SHR && isSignedType(i->sType))
code[1] |= 1 << 27;
- if (i->src[1].getFile() == FILE_IMMEDIATE) {
+ if (i->src(1).getFile() == FILE_IMMEDIATE) {
code[1] |= 1 << 20;
code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
+ defId(i->def(0), 2);
+ srcId(i->src(0), 9);
emitFlagsRd(i);
} else {
emitForm_MAD(i);
@@ -1080,7 +1329,7 @@ CodeEmitterNV50::emitTEX(const TexInstruction *i)
code[1] = 0x40000000;
break;
case OP_TXF:
- code[0] = 0x01000000;
+ code[0] |= 0x01000000;
break;
case OP_TXG:
code[0] = 0x01000000;
@@ -1096,7 +1345,7 @@ CodeEmitterNV50::emitTEX(const TexInstruction *i)
int argc = i->tex.target.getArgCount();
- if (i->op == OP_TXB || i->op == OP_TXL)
+ if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
argc += 1;
if (i->tex.target.isShadow())
argc += 1;
@@ -1108,9 +1357,9 @@ CodeEmitterNV50::emitTEX(const TexInstruction *i)
code[0] |= 0x08000000;
} else
if (i->tex.useOffsets) {
- code[1] |= (i->tex.offset[0][0] & 0xf) << 16;
+ code[1] |= (i->tex.offset[0][0] & 0xf) << 24;
code[1] |= (i->tex.offset[0][1] & 0xf) << 20;
- code[1] |= (i->tex.offset[0][2] & 0xf) << 24;
+ code[1] |= (i->tex.offset[0][2] & 0xf) << 16;
}
code[0] |= (i->tex.mask & 0x3) << 25;
@@ -1119,27 +1368,100 @@ CodeEmitterNV50::emitTEX(const TexInstruction *i)
if (i->tex.liveOnly)
code[1] |= 4;
- defId(i->def[0], 2);
+ defId(i->def(0), 2);
+
+ emitFlagsRd(i);
+}
+
+void
+CodeEmitterNV50::emitTXQ(const TexInstruction *i)
+{
+ assert(i->tex.query == TXQ_DIMS);
+
+ code[0] = 0xf0000001;
+ code[1] = 0x60000000;
+
+ code[0] |= i->tex.r << 9;
+ code[0] |= i->tex.s << 17;
+
+ code[0] |= (i->tex.mask & 0x3) << 25;
+ code[1] |= (i->tex.mask & 0xc) << 12;
+
+ defId(i->def(0), 2);
emitFlagsRd(i);
}
void
+CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
+{
+ uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
+
+ code[0] = 0x10000003; // bra
+ code[1] = 0x00000780; // always
+
+ switch (i->subOp) {
+ case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
+ break;
+ case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
+ pos += 8;
+ break;
+ default:
+ assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
+ code[0] = 0x20000003; // call
+ code[1] = 0x00000000; // no predicate
+ break;
+ }
+ addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
+ addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
+}
+
+void
CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
{
const FlowInstruction *f = i->asFlow();
+ bool hasPred = false;
+ bool hasTarg = false;
code[0] = 0x00000003 | (flowOp << 28);
code[1] = 0x00000000;
- emitFlagsRd(i);
+ switch (i->op) {
+ case OP_BRA:
+ hasPred = true;
+ hasTarg = true;
+ break;
+ case OP_BREAK:
+ case OP_BRKPT:
+ case OP_DISCARD:
+ case OP_RET:
+ hasPred = true;
+ break;
+ case OP_CALL:
+ case OP_PREBREAK:
+ case OP_JOINAT:
+ hasTarg = true;
+ break;
+ case OP_PRERET:
+ hasTarg = true;
+ if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
+ emitPRERETEmu(f);
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (hasPred)
+ emitFlagsRd(i);
- if (f && f->target.bb) {
+ if (hasTarg && f) {
uint32_t pos;
if (f->op == OP_CALL) {
if (f->builtin) {
- pos = 0; // XXX: TODO
+ pos = targ->getBuiltinOffset(f->target.builtin);
} else {
pos = f->target.fn->binPos;
}
@@ -1149,6 +1471,13 @@ CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
code[0] |= ((pos >> 2) & 0xffff) << 11;
code[1] |= ((pos >> 18) & 0x003f) << 14;
+
+ RelocEntry::Type relocTy;
+
+ relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
+
+ addReloc(relocTy, 0, pos, 0x07fff800, 9);
+ addReloc(relocTy, 1, pos, 0x000fc000, -4);
}
}
@@ -1164,10 +1493,15 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
return false;
}
+ if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
+ INFO("EMIT: "); insn->print();
+ }
+
switch (insn->op) {
case OP_MOV:
emitMOV(insn);
break;
+ case OP_EXIT:
case OP_NOP:
case OP_JOIN:
emitNOP();
@@ -1191,6 +1525,8 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
case OP_SUB:
if (isFloatType(insn->dType))
emitFADD(insn);
+ else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
+ emitAADD(insn);
else
emitUADD(insn);
break;
@@ -1198,18 +1534,30 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
if (isFloatType(insn->dType))
emitFMUL(insn);
else
- emitUMUL(insn);
+ emitIMUL(insn);
break;
case OP_MAD:
case OP_FMA:
- emitFMAD(insn);
+ if (isFloatType(insn->dType))
+ emitFMAD(insn);
+ else
+ emitIMAD(insn);
break;
+ case OP_NOT:
+ emitNOT(insn);
break;
case OP_AND:
case OP_OR:
case OP_XOR:
emitLogicOp(insn);
break;
+ case OP_SHL:
+ case OP_SHR:
+ emitShift(insn);
+ break;
+ case OP_SET:
+ emitSET(insn);
+ break;
case OP_MIN:
case OP_MAX:
emitMINMAX(insn);
@@ -1217,9 +1565,22 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
- case OP_CVT:
+ case OP_ABS:
+ case OP_NEG:
+ case OP_SAT:
emitCVT(insn);
break;
+ case OP_CVT:
+ if (insn->def(0).getFile() == FILE_ADDRESS)
+ emitARL(insn, 0);
+ else
+ if (insn->def(0).getFile() == FILE_FLAGS ||
+ insn->src(0).getFile() == FILE_FLAGS ||
+ insn->src(0).getFile() == FILE_ADDRESS)
+ emitMOV(insn);
+ else
+ emitCVT(insn);
+ break;
case OP_RCP:
emitSFnOp(insn, 0);
break;
@@ -1245,8 +1606,12 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
case OP_TEX:
case OP_TXB:
case OP_TXL:
+ case OP_TXF:
emitTEX(insn->asTex());
break;
+ case OP_TXQ:
+ emitTXQ(insn->asTex());
+ break;
case OP_EMIT:
case OP_RESTART:
emitOUT(insn);
@@ -1285,15 +1650,15 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
emitQUADOP(insn, insn->lanes, insn->subOp);
break;
case OP_DFDX:
- emitQUADOP(insn, 4, insn->src[0].mod.neg() ? 0x66 : 0x99);
+ emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
break;
case OP_DFDY:
- emitQUADOP(insn, 5, insn->src[0].mod.neg() ? 0x5a : 0xa5);
+ emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
break;
case OP_PHI:
case OP_UNION:
case OP_CONSTRAINT:
- ERROR("operation should have been eliminated");
+ ERROR("operation should have been eliminated\n");
return false;
case OP_EXP:
case OP_LOG:
@@ -1310,16 +1675,16 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
ERROR("operation should have been lowered\n");
return false;
default:
- ERROR("unknow op\n");
+ ERROR("unknown op: %u\n", insn->op);
return false;
}
- if (insn->join)
+ if (insn->join || insn->op == OP_JOIN)
code[1] |= 0x2;
else
- if (insn->exit)
+ if (insn->exit || insn->op == OP_EXIT)
code[1] |= 0x1;
- assert((insn->encSize == 8) == (code[1] & 1));
+ assert((insn->encSize == 8) == (code[0] & 1));
code += insn->encSize / 4;
codeSize += insn->encSize;
@@ -1331,20 +1696,147 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
{
const Target::OpInfo &info = targ->getOpInfo(i);
- if (info.minEncSize == 8)
+ if (info.minEncSize > 4)
+ return 8;
+
+ // check constraints on dst and src operands
+ for (int d = 0; i->defExists(d); ++d) {
+ if (i->def(d).rep()->reg.data.id > 63 ||
+ i->def(d).rep()->reg.file != FILE_GPR)
+ return 8;
+ }
+
+ for (int s = 0; i->srcExists(s); ++s) {
+ DataFile sf = i->src(s).getFile();
+ if (sf != FILE_GPR)
+ if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
+ return 8;
+ if (i->src(s).rep()->reg.data.id > 63)
+ return 8;
+ }
+
+ // check modifiers & rounding
+ if (i->join || i->lanes != 0xf || i->exit)
+ return 8;
+ if (i->op == OP_MUL && i->rnd != ROUND_N)
return 8;
- return 4;
+ if (i->asTex())
+ return 8; // TODO: short tex encoding
+
+ // check constraints on short MAD
+ if (info.srcNr >= 2 && i->srcExists(2)) {
+ if (i->saturate || i->src(2).mod)
+ return 8;
+ if ((i->src(0).mod ^ i->src(1).mod) ||
+ (i->src(0).mod | i->src(1).mod).abs())
+ return 8;
+ if (!i->defExists(0) ||
+ i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
+ return 8;
+ }
+
+ return info.minEncSize;
+}
+
+// Change the encoding size of an instruction after BBs have been scheduled.
+static void
+makeInstructionLong(Instruction *insn)
+{
+ if (insn->encSize == 8)
+ return;
+ Function *fn = insn->bb->getFunction();
+ int n = 0;
+ int adj = 4;
+
+ for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
+
+ if (n & 1) {
+ adj = 8;
+ insn->next->encSize = 8;
+ } else
+ if (insn->prev && insn->prev->encSize == 4) {
+ adj = 8;
+ insn->prev->encSize = 8;
+ }
+ insn->encSize = 8;
+
+ for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
+ fn->bbArray[i]->binPos += 4;
+ }
+ fn->binSize += adj;
+ insn->bb->binSize += adj;
+}
+
+static bool
+trySetExitModifier(Instruction *insn)
+{
+ if (insn->op == OP_DISCARD ||
+ insn->op == OP_QUADON ||
+ insn->op == OP_QUADPOP)
+ return false;
+ for (int s = 0; insn->srcExists(s); ++s)
+ if (insn->src(s).getFile() == FILE_IMMEDIATE)
+ return false;
+ if (insn->asFlow()) {
+ if (insn->op == OP_CALL) // side effects !
+ return false;
+ if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
+ return false;
+ insn->op = OP_EXIT;
+ }
+ insn->exit = 1;
+ makeInstructionLong(insn);
+ return true;
+}
+
+static void
+replaceExitWithModifier(Function *func)
+{
+ BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
+
+ if (!epilogue->getExit() ||
+ epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
+ return;
+
+ if (epilogue->getEntry()->op != OP_EXIT) {
+ Instruction *insn = epilogue->getExit()->prev;
+ if (!insn || !trySetExitModifier(insn))
+ return;
+ insn->exit = 1;
+ } else {
+ for (Graph::EdgeIterator ei = func->cfgExit->incident();
+ !ei.end(); ei.next()) {
+ BasicBlock *bb = BasicBlock::get(ei.getNode());
+ Instruction *i = bb->getExit();
+
+ if (!i || !trySetExitModifier(i))
+ return;
+ }
+ }
+ epilogue->binSize -= 8;
+ func->binSize -= 8;
+ delete_Instruction(func->getProgram(), epilogue->getExit());
+}
+
+void
+CodeEmitterNV50::prepareEmission(Function *func)
+{
+ CodeEmitter::prepareEmission(func);
+
+ replaceExitWithModifier(func);
}
-CodeEmitterNV50::CodeEmitterNV50(const Target *target) : targ(target)
+CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) : CodeEmitter(target)
{
+ targ = target; // specialized
code = NULL;
codeSize = codeSizeLimit = 0;
+ relocInfo = NULL;
}
CodeEmitter *
-Target::getCodeEmitter(Program::Type type)
+TargetNV50::getCodeEmitter(Program::Type type)
{
CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
emit->setProgramType(type);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
new file mode 100644
index 00000000000..30d8acee3bc
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
@@ -0,0 +1,1118 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50/codegen/nv50_ir.h"
+#include "nv50/codegen/nv50_ir_build_util.h"
+
+#include "nv50_ir_target_nv50.h"
+
+namespace nv50_ir {
+
+// nv50 doesn't support 32 bit integer multiplication
+//
+// ah al * bh bl = LO32: (al * bh + ah * bl) << 16 + (al * bl)
+// -------------------
+// al*bh 00 HI32: (al * bh + ah * bl) >> 16 + (ah * bh) +
+// ah*bh 00 00 ( carry1) << 16 + ( carry2)
+// al*bl
+// ah*bl 00
+//
+// fffe0001 + fffe0001
+static bool
+expandIntegerMUL(BuildUtil *bld, Instruction *mul)
+{
+ const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH;
+
+ DataType fTy = mul->sType; // full type
+ DataType hTy;
+ switch (fTy) {
+ case TYPE_S32: hTy = TYPE_S16; break;
+ case TYPE_U32: hTy = TYPE_U16; break;
+ case TYPE_U64: hTy = TYPE_U32; break;
+ case TYPE_S64: hTy = TYPE_S32; break;
+ default:
+ return false;
+ }
+ unsigned int fullSize = typeSizeof(fTy);
+ unsigned int halfSize = typeSizeof(hTy);
+
+ Instruction *i[9];
+
+ Value *a[2] = { bld->getSSA(halfSize), bld->getSSA(halfSize) };
+ Value *b[2] = { bld->getSSA(halfSize), bld->getSSA(halfSize) };
+ Value *c[2];
+ Value *t[4];
+ for (int j = 0; j < 4; ++j)
+ t[j] = bld->getSSA(fullSize);
+
+ (i[0] = bld->mkOp1(OP_SPLIT, fTy, a[0], mul->getSrc(0)))->setDef(1, a[1]);
+ (i[1] = bld->mkOp1(OP_SPLIT, fTy, b[0], mul->getSrc(1)))->setDef(1, b[1]);
+
+ i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]);
+ i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
+ i[7] = bld->mkOp2(OP_SHL, fTy, t[2], t[1], bld->mkImm(halfSize * 8));
+ i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
+
+ if (highResult) {
+ Value *r[3];
+ Value *imm = bld->loadImm(NULL, 1 << (halfSize * 8));
+ c[0] = bld->getSSA(1, FILE_FLAGS);
+ c[1] = bld->getSSA(1, FILE_FLAGS);
+ for (int j = 0; j < 3; ++j)
+ r[j] = bld->getSSA(fullSize);
+
+ i[8] = bld->mkOp2(OP_SHR, fTy, r[0], t[1], bld->mkImm(halfSize * 8));
+ i[6] = bld->mkOp2(OP_ADD, fTy, r[1], r[0], imm);
+ bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[0]);
+ i[5] = bld->mkOp3(OP_MAD, fTy, mul->getDef(0), a[1], b[1], r[2]);
+
+ // set carry defs / sources
+ i[3]->setFlagsDef(1, c[0]);
+ i[4]->setFlagsDef(0, c[1]); // actual result not required, just the carry
+ i[6]->setPredicate(CC_C, c[0]);
+ i[5]->setFlagsSrc(3, c[1]);
+ } else {
+ bld->mkMov(mul->getDef(0), t[3]);
+ }
+ delete_Instruction(bld->getProgram(), mul);
+
+ for (int j = 2; j <= (highResult ? 5 : 4); ++j)
+ i[j]->sType = hTy;
+
+ return true;
+}
+
+#define QOP_ADD 0
+#define QOP_SUBR 1
+#define QOP_SUB 2
+#define QOP_MOV2 3
+
+#define QUADOP(q, r, s, t) \
+ ((QOP_##q << 0) | (QOP_##r << 2) | \
+ (QOP_##s << 4) | (QOP_##t << 6))
+
+class NV50LegalizePostRA : public Pass
+{
+private:
+ virtual bool visit(Function *);
+ virtual bool visit(BasicBlock *);
+
+ void handlePRERET(FlowInstruction *);
+ void replaceZero(Instruction *);
+ void split64BitOp(Instruction *);
+
+ LValue *r63;
+};
+
+bool
+NV50LegalizePostRA::visit(Function *fn)
+{
+ Program *prog = fn->getProgram();
+
+ r63 = new_LValue(fn, FILE_GPR);
+ r63->reg.data.id = 63;
+
+ // this is actually per-program, but we can do it all on visiting main()
+ std::list<Instruction *> *outWrites =
+ reinterpret_cast<std::list<Instruction *> *>(prog->targetPriv);
+
+ if (outWrites) {
+ for (std::list<Instruction *>::iterator it = outWrites->begin();
+ it != outWrites->end(); ++it)
+ (*it)->getSrc(1)->defs.front()->getInsn()->setDef(0, (*it)->getSrc(0));
+ // instructions will be deleted on exit
+ outWrites->clear();
+ }
+
+ return true;
+}
+
+void
+NV50LegalizePostRA::replaceZero(Instruction *i)
+{
+ for (int s = 0; i->srcExists(s); ++s) {
+ ImmediateValue *imm = i->getSrc(s)->asImm();
+ if (imm && imm->reg.data.u64 == 0)
+ i->setSrc(s, r63);
+ }
+}
+
+void
+NV50LegalizePostRA::split64BitOp(Instruction *i)
+{
+ if (i->dType == TYPE_F64) {
+ if (i->op == OP_MAD)
+ i->op = OP_FMA;
+ if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA ||
+ i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX ||
+ i->op == OP_SET)
+ return;
+ i->dType = i->sType = TYPE_U32;
+
+ i->bb->insertAfter(i, cloneForward(func, i));
+ }
+}
+
+// Emulate PRERET: jump to the target and call to the origin from there
+//
+// WARNING: atm only works if BBs are affected by at most a single PRERET
+//
+// BB:0
+// preret BB:3
+// (...)
+// BB:3
+// (...)
+// --->
+// BB:0
+// bra BB:3 + n0 (directly to the call; move to beginning of BB and fixate)
+// (...)
+// BB:3
+// bra BB:3 + n1 (skip the call)
+// call BB:0 + n2 (skip bra at beginning of BB:0)
+// (...)
+void
+NV50LegalizePostRA::handlePRERET(FlowInstruction *pre)
+{
+ BasicBlock *bbE = pre->bb;
+ BasicBlock *bbT = pre->target.bb;
+
+ pre->subOp = NV50_IR_SUBOP_EMU_PRERET + 0;
+ bbE->remove(pre);
+ bbE->insertHead(pre);
+
+ Instruction *skip = new_FlowInstruction(func, OP_PRERET, bbT);
+ Instruction *call = new_FlowInstruction(func, OP_PRERET, bbE);
+
+ bbT->insertHead(call);
+ bbT->insertHead(skip);
+
+ // NOTE: maybe split blocks to prevent the instructions from moving ?
+
+ skip->subOp = NV50_IR_SUBOP_EMU_PRERET + 1;
+ call->subOp = NV50_IR_SUBOP_EMU_PRERET + 2;
+}
+
+bool
+NV50LegalizePostRA::visit(BasicBlock *bb)
+{
+ Instruction *i, *next;
+
+ // remove pseudo operations and non-fixed no-ops, split 64 bit operations
+ for (i = bb->getFirst(); i; i = next) {
+ next = i->next;
+ if (i->isNop()) {
+ bb->remove(i);
+ } else
+ if (i->op == OP_PRERET && prog->getTarget()->getChipset() < 0xa0) {
+ handlePRERET(i->asFlow());
+ } else {
+ if (i->op != OP_MOV && i->op != OP_PFETCH &&
+ (!i->defExists(0) || i->def(0).getFile() != FILE_ADDRESS))
+ replaceZero(i);
+ if (typeSizeof(i->dType) == 8)
+ split64BitOp(i);
+ }
+ }
+ if (!bb->getEntry())
+ return true;
+
+ return true;
+}
+
+class NV50LegalizeSSA : public Pass
+{
+public:
+ NV50LegalizeSSA(Program *);
+
+ virtual bool visit(BasicBlock *bb);
+
+private:
+ void propagateWriteToOutput(Instruction *);
+ void handleDIV(Instruction *);
+ void handleMOD(Instruction *);
+ void handleMUL(Instruction *);
+ void handleAddrDef(Instruction *);
+
+ inline bool isARL(const Instruction *) const;
+
+ BuildUtil bld;
+
+ std::list<Instruction *> *outWrites;
+};
+
+NV50LegalizeSSA::NV50LegalizeSSA(Program *prog)
+{
+ bld.setProgram(prog);
+
+ if (prog->optLevel >= 2 &&
+ (prog->getType() == Program::TYPE_GEOMETRY ||
+ prog->getType() == Program::TYPE_VERTEX))
+ outWrites =
+ reinterpret_cast<std::list<Instruction *> *>(prog->targetPriv);
+ else
+ outWrites = NULL;
+}
+
+void
+NV50LegalizeSSA::propagateWriteToOutput(Instruction *st)
+{
+ if (st->src(0).isIndirect(0) || st->getSrc(1)->refCount() != 1)
+ return;
+
+ // check def instruction can store
+ Instruction *di = st->getSrc(1)->defs.front()->getInsn();
+
+ // TODO: move exports (if beneficial) in common opt pass
+ if (di->isPseudo() || isTextureOp(di->op) || di->defCount(0xff, true) > 1)
+ return;
+ for (int s = 0; di->srcExists(s); ++s)
+ if (di->src(s).getFile() == FILE_IMMEDIATE)
+ return;
+
+ // We cannot set defs to non-lvalues before register allocation, so
+ // save & remove (to save registers) the exports and replace later.
+ outWrites->push_back(st);
+ st->bb->remove(st);
+}
+
+bool
+NV50LegalizeSSA::isARL(const Instruction *i) const
+{
+ ImmediateValue imm;
+
+ if (i->op != OP_SHL || i->src(0).getFile() != FILE_GPR)
+ return false;
+ if (!i->src(1).getImmediate(imm))
+ return false;
+ return imm.isInteger(0);
+}
+
+void
+NV50LegalizeSSA::handleAddrDef(Instruction *i)
+{
+ Instruction *arl;
+
+ i->getDef(0)->reg.size = 2; // $aX are only 16 bit
+
+ // only ADDR <- SHL(GPR, IMM) and ADDR <- ADD(ADDR, IMM) are valid
+ if (i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE) {
+ if (i->op == OP_SHL && i->src(0).getFile() == FILE_GPR)
+ return;
+ if (i->op == OP_ADD && i->src(0).getFile() == FILE_ADDRESS)
+ return;
+ }
+
+ // turn $a sources into $r sources (can't operate on $a)
+ for (int s = 0; i->srcExists(s); ++s) {
+ Value *a = i->getSrc(s);
+ Value *r;
+ if (a->reg.file == FILE_ADDRESS) {
+ if (a->getInsn() && isARL(a->getInsn())) {
+ i->setSrc(s, a->getInsn()->getSrc(0));
+ } else {
+ bld.setPosition(i, false);
+ r = bld.getSSA();
+ bld.mkMov(r, a);
+ i->setSrc(s, r);
+ }
+ }
+ }
+ if (i->op == OP_SHL && i->src(1).getFile() == FILE_IMMEDIATE)
+ return;
+
+ // turn result back into $a
+ bld.setPosition(i, true);
+ arl = bld.mkOp2(OP_SHL, TYPE_U32, i->getDef(0), bld.getSSA(), bld.mkImm(0));
+ i->setDef(0, arl->getSrc(0));
+}
+
+void
+NV50LegalizeSSA::handleMUL(Instruction *mul)
+{
+ if (isFloatType(mul->sType) || typeSizeof(mul->sType) <= 2)
+ return;
+ Value *def = mul->getDef(0);
+ Value *pred = mul->getPredicate();
+ CondCode cc = mul->cc;
+ if (pred)
+ mul->setPredicate(CC_ALWAYS, NULL);
+
+ if (mul->op == OP_MAD) {
+ Instruction *add = mul;
+ bld.setPosition(add, false);
+ Value *res = cloneShallow(func, mul->getDef(0));
+ mul = bld.mkOp2(OP_MUL, add->sType, res, add->getSrc(0), add->getSrc(1));
+ add->op = OP_ADD;
+ add->setSrc(0, mul->getDef(0));
+ add->setSrc(1, add->getSrc(2));
+ for (int s = 2; add->srcExists(s); ++s)
+ add->setSrc(s, NULL);
+ mul->subOp = add->subOp;
+ add->subOp = 0;
+ }
+ expandIntegerMUL(&bld, mul);
+ if (pred)
+ def->getInsn()->setPredicate(cc, pred);
+}
+
+// Use f32 division: first compute an approximate result, use it to reduce
+// the dividend, which should then be representable as f32, divide the reduced
+// dividend, and add the quotients.
+void
+NV50LegalizeSSA::handleDIV(Instruction *div)
+{
+ const DataType ty = div->sType;
+
+ if (ty != TYPE_U32 && ty != TYPE_S32)
+ return;
+
+ Value *q, *q0, *qf, *aR, *aRf, *qRf, *qR, *t, *s, *m, *cond;
+
+ bld.setPosition(div, false);
+
+ Value *a, *af = bld.getSSA();
+ Value *b, *bf = bld.getSSA();
+
+ bld.mkCvt(OP_CVT, TYPE_F32, af, ty, div->getSrc(0));
+ bld.mkCvt(OP_CVT, TYPE_F32, bf, ty, div->getSrc(1));
+
+ if (isSignedType(ty)) {
+ af->getInsn()->src(0).mod = Modifier(NV50_IR_MOD_ABS);
+ bf->getInsn()->src(0).mod = Modifier(NV50_IR_MOD_ABS);
+ a = bld.getSSA();
+ b = bld.getSSA();
+ bld.mkOp1(OP_ABS, ty, a, div->getSrc(0));
+ bld.mkOp1(OP_ABS, ty, b, div->getSrc(1));
+ } else {
+ a = div->getSrc(0);
+ b = div->getSrc(1);
+ }
+
+ bf = bld.mkOp1v(OP_RCP, TYPE_F32, bld.getSSA(), bf);
+ bf = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), bf, bld.mkImm(-2));
+
+ bld.mkOp2(OP_MUL, TYPE_F32, (qf = bld.getSSA()), af, bf)->rnd = ROUND_Z;
+ bld.mkCvt(OP_CVT, ty, (q0 = bld.getSSA()), TYPE_F32, qf)->rnd = ROUND_Z;
+
+ // get error of 1st result
+ expandIntegerMUL(&bld,
+ bld.mkOp2(OP_MUL, TYPE_U32, (t = bld.getSSA()), q0, b));
+ bld.mkOp2(OP_SUB, TYPE_U32, (aRf = bld.getSSA()), a, t);
+
+ bld.mkCvt(OP_CVT, TYPE_F32, (aR = bld.getSSA()), TYPE_U32, aRf);
+
+ bld.mkOp2(OP_MUL, TYPE_F32, (qRf = bld.getSSA()), aR, bf)->rnd = ROUND_Z;
+ bld.mkCvt(OP_CVT, TYPE_U32, (qR = bld.getSSA()), TYPE_F32, qRf)
+ ->rnd = ROUND_Z;
+ bld.mkOp2(OP_ADD, ty, (q = bld.getSSA()), q0, qR); // add quotients
+
+ // correction: if modulus >= divisor, add 1
+ expandIntegerMUL(&bld,
+ bld.mkOp2(OP_MUL, TYPE_U32, (t = bld.getSSA()), q, b));
+ bld.mkOp2(OP_SUB, TYPE_U32, (m = bld.getSSA()), a, t);
+ bld.mkCmp(OP_SET, CC_GE, TYPE_U32, (s = bld.getSSA()), m, b);
+ if (!isSignedType(ty)) {
+ div->op = OP_SUB;
+ div->setSrc(0, q);
+ div->setSrc(1, s);
+ } else {
+ t = q;
+ bld.mkOp2(OP_SUB, TYPE_U32, (q = bld.getSSA()), t, s);
+ s = bld.getSSA();
+ t = bld.getSSA();
+ // fix the sign
+ bld.mkOp2(OP_XOR, TYPE_U32, NULL, div->getSrc(0), div->getSrc(1))
+ ->setFlagsDef(0, (cond = bld.getSSA(1, FILE_FLAGS)));
+ bld.mkOp1(OP_NEG, ty, s, q)->setPredicate(CC_S, cond);
+ bld.mkOp1(OP_MOV, ty, t, q)->setPredicate(CC_NS, cond);
+
+ div->op = OP_UNION;
+ div->setSrc(0, s);
+ div->setSrc(1, t);
+ }
+}
+
+void
+NV50LegalizeSSA::handleMOD(Instruction *mod)
+{
+ if (mod->dType != TYPE_U32 && mod->dType != TYPE_S32)
+ return;
+ bld.setPosition(mod, false);
+
+ Value *q = bld.getSSA();
+ Value *m = bld.getSSA();
+
+ bld.mkOp2(OP_DIV, mod->dType, q, mod->getSrc(0), mod->getSrc(1));
+ handleDIV(q->getInsn());
+
+ bld.setPosition(mod, false);
+ expandIntegerMUL(&bld, bld.mkOp2(OP_MUL, TYPE_U32, m, q, mod->getSrc(1)));
+
+ mod->op = OP_SUB;
+ mod->setSrc(1, m);
+}
+
+bool
+NV50LegalizeSSA::visit(BasicBlock *bb)
+{
+ Instruction *insn, *next;
+ // skipping PHIs (don't pass them to handleAddrDef) !
+ for (insn = bb->getEntry(); insn; insn = next) {
+ next = insn->next;
+
+ switch (insn->op) {
+ case OP_EXPORT:
+ if (outWrites)
+ propagateWriteToOutput(insn);
+ break;
+ case OP_DIV:
+ handleDIV(insn);
+ break;
+ case OP_MOD:
+ handleMOD(insn);
+ break;
+ case OP_MAD:
+ case OP_MUL:
+ handleMUL(insn);
+ break;
+ default:
+ break;
+ }
+
+ if (insn->defExists(0) && insn->getDef(0)->reg.file == FILE_ADDRESS)
+ handleAddrDef(insn);
+ }
+ return true;
+}
+
+class NV50LoweringPreSSA : public Pass
+{
+public:
+ NV50LoweringPreSSA(Program *);
+
+private:
+ virtual bool visit(Instruction *);
+ virtual bool visit(Function *);
+
+ bool handleRDSV(Instruction *);
+ bool handleWRSV(Instruction *);
+
+ bool handleEXPORT(Instruction *);
+
+ bool handleMUL(Instruction *);
+ bool handleDIV(Instruction *);
+ bool handleSQRT(Instruction *);
+ bool handlePOW(Instruction *);
+
+ bool handleSET(Instruction *);
+ bool handleSLCT(CmpInstruction *);
+ bool handleSELP(Instruction *);
+
+ bool handleTEX(TexInstruction *);
+ bool handleTXB(TexInstruction *); // I really
+ bool handleTXL(TexInstruction *); // hate
+ bool handleTXD(TexInstruction *); // these 3
+
+ bool handleCALL(Instruction *);
+ bool handlePRECONT(Instruction *);
+ bool handleCONT(Instruction *);
+
+ void checkPredicate(Instruction *);
+
+private:
+ const Target *const targ;
+
+ BuildUtil bld;
+
+ Value *tid;
+};
+
+NV50LoweringPreSSA::NV50LoweringPreSSA(Program *prog) :
+ targ(prog->getTarget()), tid(NULL)
+{
+ bld.setProgram(prog);
+}
+
+bool
+NV50LoweringPreSSA::visit(Function *f)
+{
+ BasicBlock *root = BasicBlock::get(func->cfg.getRoot());
+
+ if (prog->getType() == Program::TYPE_COMPUTE) {
+ // Add implicit "thread id" argument in $r0 to the function
+ Value *arg = new_LValue(func, FILE_GPR);
+ arg->reg.data.id = 0;
+ f->ins.push_back(arg);
+
+ bld.setPosition(root, false);
+ tid = bld.mkMov(bld.getScratch(), arg, TYPE_U32)->getDef(0);
+ }
+
+ return true;
+}
+
+// move array source to first slot, convert to u16, add indirections
+bool
+NV50LoweringPreSSA::handleTEX(TexInstruction *i)
+{
+ const int arg = i->tex.target.getArgCount();
+ const int dref = arg;
+ const int lod = i->tex.target.isShadow() ? (arg + 1) : arg;
+
+ // dref comes before bias/lod
+ if (i->tex.target.isShadow())
+ if (i->op == OP_TXB || i->op == OP_TXL)
+ i->swapSources(dref, lod);
+
+ // array index must be converted to u32
+ if (i->tex.target.isArray()) {
+ Value *layer = i->getSrc(arg - 1);
+ LValue *src = new_LValue(func, FILE_GPR);
+ bld.mkCvt(OP_CVT, TYPE_U16, src, TYPE_F32, layer);
+ i->setSrc(arg - 1, src);
+
+ if (i->tex.target.isCube()) {
+ // Value *face = layer;
+ Value *x, *y;
+ x = new_LValue(func, FILE_GPR);
+ y = new_LValue(func, FILE_GPR);
+ layer = new_LValue(func, FILE_GPR);
+
+ i->tex.target = TEX_TARGET_2D_ARRAY;
+
+ // TODO: use TEXPREP to convert x,y,z,face -> x,y,layer
+ bld.mkMov(x, i->getSrc(0));
+ bld.mkMov(y, i->getSrc(1));
+ bld.mkMov(layer, i->getSrc(3));
+
+ i->setSrc(0, x);
+ i->setSrc(1, y);
+ i->setSrc(2, layer);
+ i->setSrc(3, i->getSrc(4));
+ i->setSrc(4, NULL);
+ }
+ }
+
+ // texel offsets are 3 immediate fields in the instruction,
+ // nv50 cannot do textureGatherOffsets
+ assert(i->tex.useOffsets <= 1);
+
+ return true;
+}
+
+// Bias must be equal for all threads of a quad or lod calculation will fail.
+//
+// The lanes of a quad are grouped by the bit in the condition register they
+// have set, which is selected by differing bias values.
+// Move the input values for TEX into a new register set for each group and
+// execute TEX only for a specific group.
+// We always need to use 4 new registers for the inputs/outputs because the
+// implicitly calculated derivatives must be correct.
+//
+// TODO: move to SSA phase so we can easily determine whether bias is constant
+bool
+NV50LoweringPreSSA::handleTXB(TexInstruction *i)
+{
+ const CondCode cc[4] = { CC_EQU, CC_S, CC_C, CC_O };
+ int l, d;
+
+ handleTEX(i);
+ Value *bias = i->getSrc(i->tex.target.getArgCount());
+ if (bias->isUniform())
+ return true;
+
+ Instruction *cond = bld.mkOp1(OP_UNION, TYPE_U32, bld.getScratch(),
+ bld.loadImm(NULL, 1));
+ bld.setPosition(cond, false);
+
+ for (l = 1; l < 4; ++l) {
+ const uint8_t qop = QUADOP(SUBR, SUBR, SUBR, SUBR);
+ Value *bit = bld.getSSA();
+ Value *pred = bld.getScratch(1, FILE_FLAGS);
+ Value *imm = bld.loadImm(NULL, (1 << l));
+ bld.mkQuadop(qop, pred, l, bias, bias)->flagsDef = 0;
+ bld.mkMov(bit, imm)->setPredicate(CC_EQ, pred);
+ cond->setSrc(l, bit);
+ }
+ Value *flags = bld.getScratch(1, FILE_FLAGS);
+ bld.setPosition(cond, true);
+ bld.mkCvt(OP_CVT, TYPE_U8, flags, TYPE_U32, cond->getDef(0));
+
+ Instruction *tex[4];
+ for (l = 0; l < 4; ++l) {
+ (tex[l] = cloneForward(func, i))->setPredicate(cc[l], flags);
+ bld.insert(tex[l]);
+ }
+
+ Value *res[4][4];
+ for (d = 0; i->defExists(d); ++d)
+ res[0][d] = tex[0]->getDef(d);
+ for (l = 1; l < 4; ++l) {
+ for (d = 0; tex[l]->defExists(d); ++d) {
+ res[l][d] = cloneShallow(func, res[0][d]);
+ bld.mkMov(res[l][d], tex[l]->getDef(d))->setPredicate(cc[l], flags);
+ }
+ }
+
+ for (d = 0; i->defExists(d); ++d) {
+ Instruction *dst = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(d));
+ for (l = 0; l < 4; ++l)
+ dst->setSrc(l, res[l][d]);
+ }
+ delete_Instruction(prog, i);
+ return true;
+}
+
+// LOD must be equal for all threads of a quad.
+// Unlike with TXB, here we can just diverge since there's no LOD calculation
+// that would require all 4 threads' sources to be set up properly.
+bool
+NV50LoweringPreSSA::handleTXL(TexInstruction *i)
+{
+ handleTEX(i);
+ Value *lod = i->getSrc(i->tex.target.getArgCount());
+ if (lod->isUniform())
+ return true;
+
+ BasicBlock *currBB = i->bb;
+ BasicBlock *texiBB = i->bb->splitBefore(i, false);
+ BasicBlock *joinBB = i->bb->splitAfter(i);
+
+ currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
+
+ for (int l = 0; l <= 3; ++l) {
+ const uint8_t qop = QUADOP(SUBR, SUBR, SUBR, SUBR);
+ Value *pred = bld.getScratch(1, FILE_FLAGS);
+ bld.setPosition(currBB, true);
+ bld.mkQuadop(qop, pred, l, lod, lod)->flagsDef = 0;
+ bld.mkFlow(OP_BRA, texiBB, CC_EQ, pred)->fixed = 1;
+ currBB->cfg.attach(&texiBB->cfg, Graph::Edge::FORWARD);
+ if (l <= 2) {
+ BasicBlock *laneBB = new BasicBlock(func);
+ currBB->cfg.attach(&laneBB->cfg, Graph::Edge::TREE);
+ currBB = laneBB;
+ }
+ }
+ bld.setPosition(joinBB, false);
+ bld.mkOp(OP_JOIN, TYPE_NONE, NULL);
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handleTXD(TexInstruction *i)
+{
+ static const uint8_t qOps[4][2] =
+ {
+ { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0
+ { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1
+ { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
+ { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
+ };
+ Value *def[4][4];
+ Value *crd[3];
+ Instruction *tex;
+ Value *zero = bld.loadImm(bld.getSSA(), 0);
+ int l, c;
+ const int dim = i->tex.target.getDim();
+
+ handleTEX(i);
+ i->op = OP_TEX; // no need to clone dPdx/dPdy later
+
+ for (c = 0; c < dim; ++c)
+ crd[c] = bld.getScratch();
+
+ bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
+ for (l = 0; l < 4; ++l) {
+ // mov coordinates from lane l to all lanes
+ for (c = 0; c < dim; ++c)
+ bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
+ // add dPdx from lane l to lanes dx
+ for (c = 0; c < dim; ++c)
+ bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]);
+ // add dPdy from lane l to lanes dy
+ for (c = 0; c < dim; ++c)
+ bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
+ // texture
+ bld.insert(tex = cloneForward(func, i));
+ for (c = 0; c < dim; ++c)
+ tex->setSrc(c, crd[c]);
+ // save results
+ for (c = 0; i->defExists(c); ++c) {
+ Instruction *mov;
+ def[c][l] = bld.getSSA();
+ mov = bld.mkMov(def[c][l], tex->getDef(c));
+ mov->fixed = 1;
+ mov->lanes = 1 << l;
+ }
+ }
+ bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
+
+ for (c = 0; i->defExists(c); ++c) {
+ Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
+ for (l = 0; l < 4; ++l)
+ u->setSrc(l, def[c][l]);
+ }
+
+ i->bb->remove(i);
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handleSET(Instruction *i)
+{
+ if (i->dType == TYPE_F32) {
+ bld.setPosition(i, true);
+ i->dType = TYPE_U32;
+ bld.mkOp1(OP_ABS, TYPE_S32, i->getDef(0), i->getDef(0));
+ bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), TYPE_S32, i->getDef(0));
+ }
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handleSLCT(CmpInstruction *i)
+{
+ Value *src0 = bld.getSSA();
+ Value *src1 = bld.getSSA();
+ Value *pred = bld.getScratch(1, FILE_FLAGS);
+
+ Value *v0 = i->getSrc(0);
+ Value *v1 = i->getSrc(1);
+ // XXX: these probably shouldn't be immediates in the first place ...
+ if (v0->asImm())
+ v0 = bld.mkMov(bld.getSSA(), v0)->getDef(0);
+ if (v1->asImm())
+ v1 = bld.mkMov(bld.getSSA(), v1)->getDef(0);
+
+ bld.setPosition(i, true);
+ bld.mkMov(src0, v0)->setPredicate(CC_NE, pred);
+ bld.mkMov(src1, v1)->setPredicate(CC_EQ, pred);
+ bld.mkOp2(OP_UNION, i->dType, i->getDef(0), src0, src1);
+
+ bld.setPosition(i, false);
+ i->op = OP_SET;
+ i->setFlagsDef(0, pred);
+ i->dType = TYPE_U8;
+ i->setSrc(0, i->getSrc(2));
+ i->setSrc(2, NULL);
+ i->setSrc(1, bld.loadImm(NULL, 0));
+
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handleSELP(Instruction *i)
+{
+ Value *src0 = bld.getSSA();
+ Value *src1 = bld.getSSA();
+
+ Value *v0 = i->getSrc(0);
+ Value *v1 = i->getSrc(1);
+ if (v0->asImm())
+ v0 = bld.mkMov(bld.getSSA(), v0)->getDef(0);
+ if (v1->asImm())
+ v1 = bld.mkMov(bld.getSSA(), v1)->getDef(0);
+
+ bld.mkMov(src0, v0)->setPredicate(CC_NE, i->getSrc(2));
+ bld.mkMov(src1, v1)->setPredicate(CC_EQ, i->getSrc(2));
+ bld.mkOp2(OP_UNION, i->dType, i->getDef(0), src0, src1);
+ delete_Instruction(prog, i);
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handleWRSV(Instruction *i)
+{
+ Symbol *sym = i->getSrc(0)->asSym();
+
+ // these are all shader outputs, $sreg are not writeable
+ uint32_t addr = targ->getSVAddress(FILE_SHADER_OUTPUT, sym);
+ if (addr >= 0x400)
+ return false;
+ sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr);
+
+ bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0), i->getSrc(1));
+
+ bld.getBB()->remove(i);
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handleCALL(Instruction *i)
+{
+ if (prog->getType() == Program::TYPE_COMPUTE) {
+ // Add implicit "thread id" argument in $r0 to the function
+ i->setSrc(i->srcCount(), tid);
+ }
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handlePRECONT(Instruction *i)
+{
+ delete_Instruction(prog, i);
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handleCONT(Instruction *i)
+{
+ i->op = OP_BRA;
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handleRDSV(Instruction *i)
+{
+ Symbol *sym = i->getSrc(0)->asSym();
+ uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym);
+ Value *def = i->getDef(0);
+ SVSemantic sv = sym->reg.data.sv.sv;
+ int idx = sym->reg.data.sv.index;
+
+ if (addr >= 0x400) // mov $sreg
+ return true;
+
+ switch (sv) {
+ case SV_POSITION:
+ assert(prog->getType() == Program::TYPE_FRAGMENT);
+ bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL);
+ break;
+ case SV_FACE:
+ bld.mkInterp(NV50_IR_INTERP_FLAT, def, addr, NULL);
+ if (i->dType == TYPE_F32) {
+ bld.mkOp2(OP_AND, TYPE_U32, def, def, bld.mkImm(0x80000000));
+ bld.mkOp2(OP_XOR, TYPE_U32, def, def, bld.mkImm(0xbf800000));
+ }
+ break;
+ case SV_NCTAID:
+ case SV_CTAID:
+ case SV_NTID:
+ if ((sv == SV_NCTAID && idx >= 2) ||
+ (sv == SV_NTID && idx >= 3)) {
+ bld.mkMov(def, bld.mkImm(1));
+ } else if (sv == SV_CTAID && idx >= 2) {
+ bld.mkMov(def, bld.mkImm(0));
+ } else {
+ Value *x = bld.getSSA(2);
+ bld.mkOp1(OP_LOAD, TYPE_U16, x,
+ bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U16, addr));
+ bld.mkCvt(OP_CVT, TYPE_U32, def, TYPE_U16, x);
+ }
+ break;
+ case SV_TID:
+ if (idx == 0) {
+ bld.mkOp2(OP_AND, TYPE_U32, def, tid, bld.mkImm(0x0000ffff));
+ } else if (idx == 1) {
+ bld.mkOp2(OP_AND, TYPE_U32, def, tid, bld.mkImm(0x03ff0000));
+ bld.mkOp2(OP_SHR, TYPE_U32, def, def, bld.mkImm(16));
+ } else if (idx == 2) {
+ bld.mkOp2(OP_SHR, TYPE_U32, def, tid, bld.mkImm(26));
+ } else {
+ bld.mkMov(def, bld.mkImm(0));
+ }
+ break;
+ default:
+ bld.mkFetch(i->getDef(0), i->dType,
+ FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), NULL);
+ break;
+ }
+ bld.getBB()->remove(i);
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handleMUL(Instruction *i)
+{
+ if (!isFloatType(i->dType) && typeSizeof(i->sType) > 2)
+ return expandIntegerMUL(&bld, i);
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handleDIV(Instruction *i)
+{
+ if (!isFloatType(i->dType))
+ return true;
+ bld.setPosition(i, false);
+ Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1));
+ i->op = OP_MUL;
+ i->setSrc(1, rcp->getDef(0));
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handleSQRT(Instruction *i)
+{
+ Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
+ bld.getSSA(), i->getSrc(0));
+ i->op = OP_MUL;
+ i->setSrc(1, rsq->getDef(0));
+
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handlePOW(Instruction *i)
+{
+ LValue *val = bld.getScratch();
+
+ bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0));
+ bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1;
+ bld.mkOp1(OP_PREEX2, TYPE_F32, val, val);
+
+ i->op = OP_EX2;
+ i->setSrc(0, val);
+ i->setSrc(1, NULL);
+
+ return true;
+}
+
+bool
+NV50LoweringPreSSA::handleEXPORT(Instruction *i)
+{
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+ if (i->getIndirect(0, 0)) {
+ // TODO: redirect to l[] here, load to GPRs at exit
+ return false;
+ } else {
+ int id = i->getSrc(0)->reg.data.offset / 4; // in 32 bit reg units
+
+ i->op = OP_MOV;
+ i->src(0).set(i->src(1));
+ i->setSrc(1, NULL);
+ i->setDef(0, new_LValue(func, FILE_GPR));
+ i->getDef(0)->reg.data.id = id;
+
+ prog->maxGPR = MAX2(prog->maxGPR, id);
+ }
+ }
+ return true;
+}
+
+// Set flags according to predicate and make the instruction read $cX.
+void
+NV50LoweringPreSSA::checkPredicate(Instruction *insn)
+{
+ Value *pred = insn->getPredicate();
+ Value *cdst;
+
+ if (!pred || pred->reg.file == FILE_FLAGS)
+ return;
+ cdst = bld.getSSA(1, FILE_FLAGS);
+
+ bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, cdst, bld.loadImm(NULL, 0), pred);
+
+ insn->setPredicate(insn->cc, cdst);
+}
+
+//
+// - add quadop dance for texturing
+// - put FP outputs in GPRs
+// - convert instruction sequences
+//
+bool
+NV50LoweringPreSSA::visit(Instruction *i)
+{
+ if (i->prev)
+ bld.setPosition(i->prev, true);
+ else
+ if (i->next)
+ bld.setPosition(i->next, false);
+ else
+ bld.setPosition(i->bb, true);
+
+ if (i->cc != CC_ALWAYS)
+ checkPredicate(i);
+
+ switch (i->op) {
+ case OP_TEX:
+ case OP_TXF:
+ case OP_TXG:
+ return handleTEX(i->asTex());
+ case OP_TXB:
+ return handleTXB(i->asTex());
+ case OP_TXL:
+ return handleTXL(i->asTex());
+ case OP_TXD:
+ return handleTXD(i->asTex());
+ case OP_EX2:
+ bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
+ i->setSrc(0, i->getDef(0));
+ break;
+ case OP_SET:
+ return handleSET(i);
+ case OP_SLCT:
+ return handleSLCT(i->asCmp());
+ case OP_SELP:
+ return handleSELP(i);
+ case OP_POW:
+ return handlePOW(i);
+ case OP_MUL:
+ return handleMUL(i);
+ case OP_DIV:
+ return handleDIV(i);
+ case OP_SQRT:
+ return handleSQRT(i);
+ case OP_EXPORT:
+ return handleEXPORT(i);
+ case OP_RDSV:
+ return handleRDSV(i);
+ case OP_WRSV:
+ return handleWRSV(i);
+ case OP_CALL:
+ return handleCALL(i);
+ case OP_PRECONT:
+ return handlePRECONT(i);
+ case OP_CONT:
+ return handleCONT(i);
+ default:
+ break;
+ }
+ return true;
+}
+
+bool
+TargetNV50::runLegalizePass(Program *prog, CGStage stage) const
+{
+ bool ret = false;
+
+ if (stage == CG_STAGE_PRE_SSA) {
+ NV50LoweringPreSSA pass(prog);
+ ret = pass.run(prog, false, true);
+ } else
+ if (stage == CG_STAGE_SSA) {
+ if (!prog->targetPriv)
+ prog->targetPriv = new std::list<Instruction *>();
+ NV50LegalizeSSA pass(prog);
+ ret = pass.run(prog, false, true);
+ } else
+ if (stage == CG_STAGE_POST_RA) {
+ NV50LegalizePostRA pass;
+ ret = pass.run(prog, false, true);
+ if (prog->targetPriv)
+ delete reinterpret_cast<std::list<Instruction *> *>(prog->targetPriv);
+ }
+ return ret;
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
index 61382336bc4..c6134465996 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
@@ -265,7 +265,7 @@ int Modifier::print(char *buf, size_t size) const
return pos;
}
-
+
int LValue::print(char *buf, size_t size, DataType ty) const
{
const char *postFix = "";
@@ -278,14 +278,23 @@ int LValue::print(char *buf, size_t size, DataType ty) const
switch (reg.file) {
case FILE_GPR:
r = 'r'; col = TXT_GPR;
- if (reg.size == 8)
+ if (reg.size == 2) {
+ if (p == '$') {
+ postFix = (idx & 1) ? "h" : "l";
+ idx /= 2;
+ } else {
+ postFix = "s";
+ }
+ } else
+ if (reg.size == 8) {
postFix = "d";
- else
- if (reg.size == 16)
+ } else
+ if (reg.size == 16) {
postFix = "q";
- else
- if (reg.size == 12)
+ } else
+ if (reg.size == 12) {
postFix = "t";
+ }
break;
case FILE_PREDICATE:
r = 'p'; col = TXT_REGISTER;
@@ -419,7 +428,7 @@ void Instruction::print() const
} else {
PRINT("%s", CondCodeStr[cc]);
}
- if (pos > pre + 1)
+ if (pos > pre)
SPACE();
pos += getSrc(predSrc)->print(&buf[pos], BUFSZ - pos);
PRINT(" %s", colour[TXT_INSN]);
@@ -489,6 +498,8 @@ void Instruction::print() const
else
pos += getSrc(s)->print(&buf[pos], BUFSZ - pos, sType);
}
+ if (exit)
+ PRINT("%s exit", colour[TXT_INSN]);
PRINT("%s", colour[TXT_DEFAULT]);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
index 598e0d26384..27b9610ed52 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
@@ -54,6 +54,7 @@ const uint8_t Target::operationSrcNr[OP_LAST + 1] =
extern Target *getTargetNVC0(unsigned int chipset);
+extern Target *getTargetNV50(unsigned int chipset);
Target *Target::create(unsigned int chipset)
{
@@ -65,6 +66,7 @@ Target *Target::create(unsigned int chipset)
case 0x80:
case 0x90:
case 0xa0:
+ return getTargetNV50(chipset);
default:
ERROR("unsupported target: NV%x\n", chipset);
return 0;
@@ -76,6 +78,10 @@ void Target::destroy(Target *targ)
delete targ;
}
+CodeEmitter::CodeEmitter(const Target *target) : targ(target)
+{
+}
+
void
CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
{
@@ -261,6 +267,10 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
emitSymbolTable(info);
+ // the nvc0 driver will print the binary iself together with the header
+ if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
+ emit->printBinary();
+
delete emit;
return true;
}
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.h b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h
index 6640198f090..88996ebbde3 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h
@@ -61,6 +61,8 @@ struct RelocInfo
class CodeEmitter
{
public:
+ CodeEmitter(const Target *);
+
// returns whether the instruction was encodable and written
virtual bool emitInstruction(Instruction *) = 0;
@@ -76,12 +78,14 @@ public:
inline void *getRelocInfo() const { return relocInfo; }
void prepareEmission(Program *);
- void prepareEmission(Function *);
+ virtual void prepareEmission(Function *);
virtual void prepareEmission(BasicBlock *);
void printBinary() const;
protected:
+ const Target *targ;
+
uint32_t *code;
uint32_t codeSize;
uint32_t codeSizeLimit;
@@ -105,6 +109,8 @@ public:
// The address chosen is supplied to the relocation routine.
virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const = 0;
+ virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) { }
+
virtual bool runLegalizePass(Program *, CGStage stage) const = 0;
public:
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp
new file mode 100644
index 00000000000..a64f7f72255
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp
@@ -0,0 +1,531 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_ir_target_nv50.h"
+
+namespace nv50_ir {
+
+Target *getTargetNV50(unsigned int chipset)
+{
+ return new TargetNV50(chipset);
+}
+
+TargetNV50::TargetNV50(unsigned int card)
+{
+ chipset = card;
+
+ wposMask = 0;
+ for (unsigned int i = 0; i <= SV_LAST; ++i)
+ sysvalLocation[i] = ~0;
+
+ initOpInfo();
+}
+
+#if 0
+// BULTINS / LIBRARY FUNCTIONS:
+
+// TODO
+static const uint32_t nvc0_builtin_code[] =
+{
+};
+
+static const uint16_t nvc0_builtin_offsets[NV50_BUILTIN_COUNT] =
+{
+};
+#endif
+
+void
+TargetNV50::getBuiltinCode(const uint32_t **code, uint32_t *size) const
+{
+ *code = NULL;
+ *size = 0;
+}
+
+uint32_t
+TargetNV50::getBuiltinOffset(int builtin) const
+{
+ return 0;
+}
+
+struct opProperties
+{
+ operation op;
+ unsigned int mNeg : 4;
+ unsigned int mAbs : 4;
+ unsigned int mNot : 4;
+ unsigned int mSat : 4;
+ unsigned int fConst : 3;
+ unsigned int fShared : 3;
+ unsigned int fAttrib : 3;
+ unsigned int fImm : 3;
+};
+
+static const struct opProperties _initProps[] =
+{
+ // neg abs not sat c[] s[], a[], imm
+ { OP_ADD, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
+ { OP_SUB, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
+ { OP_MUL, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
+ { OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
+ { OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
+ { OP_MAD, 0x7, 0x0, 0x0, 0x0, 0x6, 0x1, 0x1, 0x0 }, // special constraint
+ { OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0 },
+ { OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0 },
+ { OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x0, 0x1, 0x1, 0x0 },
+ { OP_AND, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 },
+ { OP_OR, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 },
+ { OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 },
+ { OP_SHL, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2 },
+ { OP_SHR, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2 },
+ { OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
+ { OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
+ { OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
+ { OP_LG2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
+ { OP_RCP, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
+ { OP_RSQ, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
+ { OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
+ { OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
+};
+
+void TargetNV50::initOpInfo()
+{
+ unsigned int i, j;
+
+ static const uint32_t commutative[(OP_LAST + 31) / 32] =
+ {
+ // ADD,MAD,MUL,AND,OR,XOR,MAX,MIN
+ 0x0670ca00, 0x0000003f, 0x00000000
+ };
+ static const uint32_t shortForm[(OP_LAST + 31) / 32] =
+ {
+ // MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF
+ 0x00010e40, 0x00000040, 0x00000498
+ };
+ static const operation noDestList[] =
+ {
+ OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
+ OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
+ OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
+ OP_QUADON, OP_QUADPOP
+ };
+ static const operation noPredList[] =
+ {
+ OP_CALL, OP_PREBREAK, OP_PRERET, OP_QUADON, OP_QUADPOP, OP_JOINAT
+ };
+
+ joinAnterior = true;
+
+ for (i = 0; i < DATA_FILE_COUNT; ++i)
+ nativeFileMap[i] = (DataFile)i;
+ nativeFileMap[FILE_PREDICATE] = FILE_FLAGS;
+
+ for (i = 0; i < OP_LAST; ++i) {
+ opInfo[i].variants = NULL;
+ opInfo[i].op = (operation)i;
+ opInfo[i].srcTypes = 1 << (int)TYPE_F32;
+ opInfo[i].dstTypes = 1 << (int)TYPE_F32;
+ opInfo[i].immdBits = 0xffffffff;
+ opInfo[i].srcNr = operationSrcNr[i];
+
+ for (j = 0; j < opInfo[i].srcNr; ++j) {
+ opInfo[i].srcMods[j] = 0;
+ opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
+ }
+ opInfo[i].dstMods = 0;
+ opInfo[i].dstFiles = 1 << (int)FILE_GPR;
+
+ opInfo[i].hasDest = 1;
+ opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
+ opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
+ opInfo[i].pseudo = (i < OP_MOV);
+ opInfo[i].predicate = !opInfo[i].pseudo;
+ opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
+ opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
+ }
+ for (i = 0; i < sizeof(noDestList) / sizeof(noDestList[0]); ++i)
+ opInfo[noDestList[i]].hasDest = 0;
+ for (i = 0; i < sizeof(noPredList) / sizeof(noPredList[0]); ++i)
+ opInfo[noPredList[i]].predicate = 0;
+
+ for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) {
+ const struct opProperties *prop = &_initProps[i];
+
+ for (int s = 0; s < 3; ++s) {
+ if (prop->mNeg & (1 << s))
+ opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG;
+ if (prop->mAbs & (1 << s))
+ opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS;
+ if (prop->mNot & (1 << s))
+ opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT;
+ if (prop->fConst & (1 << s))
+ opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST;
+ if (prop->fShared & (1 << s))
+ opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_SHARED;
+ if (prop->fAttrib & (1 << s))
+ opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_SHADER_INPUT;
+ if (prop->fImm & (1 << s))
+ opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE;
+ }
+ if (prop->mSat & 8)
+ opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
+ }
+}
+
+unsigned int
+TargetNV50::getFileSize(DataFile file) const
+{
+ switch (file) {
+ case FILE_NULL: return 0;
+ case FILE_GPR: return 256; // in 16-bit units **
+ case FILE_PREDICATE: return 0;
+ case FILE_FLAGS: return 4;
+ case FILE_ADDRESS: return 4;
+ case FILE_IMMEDIATE: return 0;
+ case FILE_MEMORY_CONST: return 65536;
+ case FILE_SHADER_INPUT: return 0x200;
+ case FILE_SHADER_OUTPUT: return 0x200;
+ case FILE_MEMORY_GLOBAL: return 0xffffffff;
+ case FILE_MEMORY_SHARED: return 16 << 10;
+ case FILE_MEMORY_LOCAL: return 48 << 10;
+ case FILE_SYSTEM_VALUE: return 16;
+ default:
+ assert(!"invalid file");
+ return 0;
+ }
+ // ** only first 128 units encodable for 16-bit regs
+}
+
+unsigned int
+TargetNV50::getFileUnit(DataFile file) const
+{
+ if (file == FILE_GPR || file == FILE_ADDRESS)
+ return 1;
+ if (file == FILE_SYSTEM_VALUE)
+ return 2;
+ return 0;
+}
+
+uint32_t
+TargetNV50::getSVAddress(DataFile shaderFile, const Symbol *sym) const
+{
+ switch (sym->reg.data.sv.sv) {
+ case SV_FACE:
+ return 0x3fc;
+ case SV_POSITION:
+ {
+ uint32_t addr = sysvalLocation[sym->reg.data.sv.sv];
+ for (int c = 0; c < sym->reg.data.sv.index; ++c)
+ if (wposMask & (1 << c))
+ addr += 4;
+ return addr;
+ }
+ case SV_NCTAID:
+ return 0x8 + 2 * sym->reg.data.sv.index;
+ case SV_CTAID:
+ return 0xc + 2 * sym->reg.data.sv.index;
+ case SV_NTID:
+ return 0x2 + 2 * sym->reg.data.sv.index;
+ case SV_TID:
+ return 0;
+ default:
+ return sysvalLocation[sym->reg.data.sv.sv];
+ }
+}
+
+// long: rrr, arr, rcr, acr, rrc, arc, gcr, grr
+// short: rr, ar, rc, gr
+// immd: ri, gi
+bool
+TargetNV50::insnCanLoad(const Instruction *i, int s,
+ const Instruction *ld) const
+{
+ DataFile sf = ld->src(0).getFile();
+
+ if (sf == FILE_IMMEDIATE && (i->predSrc >= 0 || i->flagsDef >= 0))
+ return false;
+ if (s >= opInfo[i->op].srcNr)
+ return false;
+ if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf)))
+ return false;
+ if (s == 2 && i->src(1).getFile() != FILE_GPR)
+ return false;
+
+ // NOTE: don't rely on flagsDef
+ for (int d = 0; i->defExists(d); ++d)
+ if (i->def(d).getFile() == FILE_FLAGS)
+ return false;
+
+ unsigned mode = 0;
+
+ for (int z = 0; z < Target::operationSrcNr[i->op]; ++z) {
+ DataFile zf = (z == s) ? sf : i->src(z).getFile();
+ switch (zf) {
+ case FILE_GPR:
+ break;
+ case FILE_MEMORY_SHARED:
+ case FILE_SHADER_INPUT:
+ mode |= 1 << (z * 2);
+ break;
+ case FILE_MEMORY_CONST:
+ mode |= 2 << (z * 2);
+ break;
+ case FILE_IMMEDIATE:
+ mode |= 3 << (z * 2);
+ default:
+ break;
+ }
+ }
+
+ switch (mode) {
+ case 0x00:
+ case 0x01:
+ case 0x03:
+ case 0x08:
+ case 0x09:
+ case 0x0c:
+ case 0x20:
+ case 0x21:
+ break;
+ case 0x0d:
+ if (ld->bb->getProgram()->getType() != Program::TYPE_GEOMETRY)
+ return false;
+ default:
+ return false;
+ }
+
+ if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * typeSizeof(ld->dType)))
+ return false;
+
+ if (ld->src(0).isIndirect(0)) {
+ for (int z = 0; i->srcExists(z); ++z)
+ if (i->src(z).isIndirect(0))
+ return false;
+
+ // s[] access only possible in CP, $aX always applies
+ if (sf == FILE_MEMORY_SHARED)
+ return true;
+ if (!ld->bb) // can't check type ...
+ return false;
+ Program::Type pt = ld->bb->getProgram()->getType();
+
+ // $aX applies to c[] only in VP, FP, GP if p[] is not accessed
+ if (pt == Program::TYPE_COMPUTE)
+ return false;
+ if (pt == Program::TYPE_GEOMETRY) {
+ if (sf == FILE_MEMORY_CONST)
+ return i->src(s).getFile() != FILE_SHADER_INPUT;
+ return sf == FILE_SHADER_INPUT;
+ }
+ return sf == FILE_MEMORY_CONST;
+ }
+ return true;
+}
+
+bool
+TargetNV50::isAccessSupported(DataFile file, DataType ty) const
+{
+ if (ty == TYPE_B96 || ty == TYPE_NONE)
+ return false;
+ if (typeSizeof(ty) > 4)
+ return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL);
+ return true;
+}
+
+bool
+TargetNV50::isOpSupported(operation op, DataType ty) const
+{
+ if (ty == TYPE_F64 && chipset < 0xa0)
+ return false;
+
+ switch (op) {
+ case OP_PRERET:
+ return chipset >= 0xa0;
+ case OP_TXG:
+ return chipset >= 0xa3;
+ case OP_POW:
+ case OP_SQRT:
+ case OP_DIV:
+ case OP_MOD:
+ case OP_SET_AND:
+ case OP_SET_OR:
+ case OP_SET_XOR:
+ case OP_SLCT:
+ case OP_SELP:
+ case OP_POPCNT:
+ case OP_INSBF:
+ case OP_EXTBF:
+ case OP_EXIT: // want exit modifier instead (on NOP if required)
+ return false;
+ case OP_SAD:
+ return ty == TYPE_S32;
+ default:
+ return true;
+ }
+}
+
+bool
+TargetNV50::isModSupported(const Instruction *insn, int s, Modifier mod) const
+{
+ if (!isFloatType(insn->dType)) {
+ switch (insn->op) {
+ case OP_ABS:
+ case OP_NEG:
+ case OP_CVT:
+ case OP_CEIL:
+ case OP_FLOOR:
+ case OP_TRUNC:
+ case OP_AND:
+ case OP_OR:
+ case OP_XOR:
+ break;
+ case OP_ADD:
+ if (insn->src(s ? 0 : 1).mod.neg())
+ return false;
+ break;
+ case OP_SUB:
+ if (s == 0)
+ return insn->src(1).mod.neg() ? false : true;
+ break;
+ case OP_SET:
+ if (insn->sType != TYPE_F32)
+ return false;
+ break;
+ default:
+ return false;
+ }
+ }
+ if (s > 3)
+ return false;
+ return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
+}
+
+bool
+TargetNV50::mayPredicate(const Instruction *insn, const Value *pred) const
+{
+ if (insn->getPredicate() || insn->flagsSrc >= 0)
+ return false;
+ for (int s = 0; insn->srcExists(s); ++s)
+ if (insn->src(s).getFile() == FILE_IMMEDIATE)
+ return false;
+ return opInfo[insn->op].predicate;
+}
+
+bool
+TargetNV50::isSatSupported(const Instruction *insn) const
+{
+ if (insn->op == OP_CVT)
+ return true;
+ if (insn->dType != TYPE_F32)
+ return false;
+ return opInfo[insn->op].dstMods & NV50_IR_MOD_SAT;
+}
+
+int TargetNV50::getLatency(const Instruction *i) const
+{
+ // TODO: tune these values
+ if (i->op == OP_LOAD) {
+ switch (i->src(0).getFile()) {
+ case FILE_MEMORY_LOCAL:
+ case FILE_MEMORY_GLOBAL:
+ return 100; // really 400 to 800
+ default:
+ return 22;
+ }
+ }
+ return 22;
+}
+
+// These are "inverse" throughput values, i.e. the number of cycles required
+// to issue a specific instruction for a full warp (32 threads).
+//
+// Assuming we have more than 1 warp in flight, a higher issue latency results
+// in a lower result latency since the MP will have spent more time with other
+// warps.
+// This also helps to determine the number of cycles between instructions in
+// a single warp.
+//
+int TargetNV50::getThroughput(const Instruction *i) const
+{
+ // TODO: tune these values
+ if (i->dType == TYPE_F32) {
+ switch (i->op) {
+ case OP_RCP:
+ case OP_RSQ:
+ case OP_LG2:
+ case OP_SIN:
+ case OP_COS:
+ case OP_PRESIN:
+ case OP_PREEX2:
+ return 16;
+ default:
+ return 4;
+ }
+ } else
+ if (i->dType == TYPE_U32 || i->dType == TYPE_S32) {
+ return 4;
+ } else
+ if (i->dType == TYPE_F64) {
+ return 32;
+ } else {
+ return 1;
+ }
+}
+
+static void
+recordLocation(uint16_t *locs, uint8_t *masks,
+ const struct nv50_ir_varying *var)
+{
+ uint16_t addr = var->slot[0] * 4;
+
+ switch (var->sn) {
+ case TGSI_SEMANTIC_POSITION: locs[SV_POSITION] = addr; break;
+ case TGSI_SEMANTIC_INSTANCEID: locs[SV_INSTANCE_ID] = addr; break;
+ case TGSI_SEMANTIC_VERTEXID: locs[SV_VERTEX_ID] = addr; break;
+ case TGSI_SEMANTIC_PRIMID: locs[SV_PRIMITIVE_ID] = addr; break;
+ case NV50_SEMANTIC_LAYER: locs[SV_LAYER] = addr; break;
+ case NV50_SEMANTIC_VIEWPORTINDEX: locs[SV_VIEWPORT_INDEX] = addr; break;
+ default:
+ break;
+ }
+ if (var->sn == TGSI_SEMANTIC_POSITION && masks)
+ masks[0] = var->mask;
+}
+
+void
+TargetNV50::parseDriverInfo(const struct nv50_ir_prog_info *info)
+{
+ unsigned int i;
+ for (i = 0; i < info->numOutputs; ++i)
+ recordLocation(sysvalLocation, NULL, &info->out[i]);
+ for (i = 0; i < info->numInputs; ++i)
+ recordLocation(sysvalLocation, &wposMask, &info->in[i]);
+ for (i = 0; i < info->numSysVals; ++i)
+ recordLocation(sysvalLocation, NULL, &info->sv[i]);
+
+ if (sysvalLocation[SV_POSITION] >= 0x200) {
+ // not assigned by driver, but we need it internally
+ wposMask = 0x8;
+ sysvalLocation[SV_POSITION] = 0;
+ }
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.h b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.h
new file mode 100644
index 00000000000..99e6f565612
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50/codegen/nv50_ir_target.h"
+
+namespace nv50_ir {
+
+#define NVC0_BUILTIN_DIV_U32 0
+#define NVC0_BUILTIN_DIV_S32 1
+#define NVC0_BUILTIN_RCP_F64 2
+#define NVC0_BUILTIN_RSQ_F64 3
+
+#define NVC0_BUILTIN_COUNT 4
+
+class TargetNV50 : public Target
+{
+public:
+ TargetNV50(unsigned int chipset);
+
+ virtual CodeEmitter *getCodeEmitter(Program::Type);
+
+ virtual bool runLegalizePass(Program *, CGStage stage) const;
+
+ virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const;
+
+ virtual void parseDriverInfo(const struct nv50_ir_prog_info *);
+
+ virtual bool insnCanLoad(const Instruction *insn, int s,
+ const Instruction *ld) const;
+ virtual bool isOpSupported(operation, DataType) const;
+ virtual bool isAccessSupported(DataFile, DataType) const;
+ virtual bool isModSupported(const Instruction *, int s, Modifier) const;
+ virtual bool isSatSupported(const Instruction *) const;
+ virtual bool mayPredicate(const Instruction *, const Value *) const;
+
+ virtual int getLatency(const Instruction *) const;
+ virtual int getThroughput(const Instruction *) const;
+
+ virtual unsigned int getFileSize(DataFile) const;
+ virtual unsigned int getFileUnit(DataFile) const;
+
+ virtual uint32_t getSVAddress(DataFile shaderFile, const Symbol *sv) const;
+
+ uint32_t getBuiltinOffset(int builtin) const;
+
+private:
+ void initOpInfo();
+
+ uint16_t sysvalLocation[SV_LAST + 1];
+ uint8_t wposMask;
+};
+
+} // namespace nv50_ir