summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2011-09-14 16:18:23 +0200
committerChristoph Bumiller <[email protected]>2011-09-14 16:19:52 +0200
commit57594065c30feec9376be9b2132659f7d87362ee (patch)
tree7e6808e0c5240b513851b7925c5be6678663b5e5 /src
parenta42eca84c56f6860e67c0c57f4765a5530cc5f81 (diff)
nv50/ir: import new shader backend code
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nv50/Makefile2
-rw-r--r--src/gallium/drivers/nv50/Makefile.sources14
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.cpp1008
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.h1049
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_bb.cpp409
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp501
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h245
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_driver.h149
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp1333
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp2288
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp381
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_graph.h207
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h328
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp2192
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp558
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp963
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_ssa.cpp463
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp304
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_target.h164
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_util.cpp253
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_util.h585
-rw-r--r--src/gallium/drivers/nvc0/Makefile2
-rw-r--r--src/gallium/drivers/nvc0/Makefile.sources5
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp1714
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp705
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp568
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h46
-rw-r--r--src/gallium/targets/gbm/Makefile1
28 files changed, 16435 insertions, 2 deletions
diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile
index 220adf696b3..18e30b0a54f 100644
--- a/src/gallium/drivers/nv50/Makefile
+++ b/src/gallium/drivers/nv50/Makefile
@@ -3,7 +3,7 @@ include $(TOP)/configs/current
LIBNAME = nv50
-# get C_SOURCES
+# get C/CPP_SOURCES
include Makefile.sources
LIBRARY_INCLUDES = \
diff --git a/src/gallium/drivers/nv50/Makefile.sources b/src/gallium/drivers/nv50/Makefile.sources
index 756f90be979..cc9321bef7e 100644
--- a/src/gallium/drivers/nv50/Makefile.sources
+++ b/src/gallium/drivers/nv50/Makefile.sources
@@ -21,3 +21,17 @@ C_SOURCES := \
nv50_pc_regalloc.c \
nv50_push.c \
nv50_query.c
+
+CPP_SOURCES := \
+ codegen/nv50_ir.cpp \
+ codegen/nv50_ir_bb.cpp \
+ codegen/nv50_ir_build_util.cpp \
+ codegen/nv50_ir_emit_nv50.cpp \
+ codegen/nv50_ir_from_tgsi.cpp \
+ codegen/nv50_ir_graph.cpp \
+ codegen/nv50_ir_peephole.cpp \
+ codegen/nv50_ir_print.cpp \
+ codegen/nv50_ir_ra.cpp \
+ codegen/nv50_ir_ssa.cpp \
+ codegen/nv50_ir_target.cpp \
+ codegen/nv50_ir_util.cpp
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
new file mode 100644
index 00000000000..ff2e6ef3401
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
@@ -0,0 +1,1008 @@
+
+#include "nv50_ir.h"
+#include "nv50_ir_target.h"
+#include "nv50_ir_driver.h"
+
+extern "C" {
+#include "nv50/nv50_program.h"
+#include "nv50/nv50_debug.h"
+}
+
+namespace nv50_ir {
+
+Modifier::Modifier(operation op)
+{
+ switch (op) {
+ case OP_NEG: bits = NV50_IR_MOD_NEG; break;
+ case OP_ABS: bits = NV50_IR_MOD_ABS; break;
+ case OP_SAT: bits = NV50_IR_MOD_SAT; break;
+ case OP_NOP: bits = NV50_IR_MOD_NOT; break;
+ default:
+ bits = 0;
+ break;
+ }
+}
+
+Modifier Modifier::operator*(const Modifier m) const
+{
+ unsigned int a, b, c;
+
+ b = m.bits;
+ if (this->bits & NV50_IR_MOD_ABS)
+ b &= ~NV50_IR_MOD_NEG;
+
+ a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
+ c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
+
+ return Modifier(a | c);
+}
+
+ValueRef::ValueRef() : value(0), insn(0), next(this), prev(this)
+{
+ indirect[0] = -1;
+ indirect[1] = -1;
+ usedAsPtr = false;
+}
+
+ValueRef::~ValueRef()
+{
+ this->set(NULL);
+}
+
+ImmediateValue *ValueRef::getImmediate() const
+{
+ Value *src = value;
+
+ while (src) {
+ if (src->reg.file == FILE_IMMEDIATE)
+ return src->asImm();
+
+ Instruction *insn = src->getUniqueInsn();
+
+ src = (insn && insn->op == OP_MOV) ? insn->getSrc(0) : NULL;
+ }
+ return NULL;
+}
+
+ValueDef::ValueDef() : value(0), insn(0), next(this), prev(this)
+{
+ // nothing to do
+}
+
+ValueDef::~ValueDef()
+{
+ this->set(NULL);
+}
+
+void
+ValueRef::set(const ValueRef &ref)
+{
+ this->set(ref.get());
+ mod = ref.mod;
+ indirect[0] = ref.indirect[0];
+ indirect[1] = ref.indirect[1];
+}
+
+void
+ValueRef::set(Value *refVal)
+{
+ if (value == refVal)
+ return;
+ if (value) {
+ if (value->uses == this)
+ value->uses = (next == this) ? NULL : next;
+ value->unref();
+ DLLIST_DEL(this);
+ }
+
+ if (refVal) {
+ if (refVal->uses)
+ DLLIST_ADDTAIL(refVal->uses, this);
+ else
+ refVal->uses = this;
+ refVal->ref();
+ }
+ value = refVal;
+}
+
+void
+ValueDef::set(Value *defVal)
+{
+ assert(next != this || prev == this); // check that SSA hack isn't active
+
+ if (value == defVal)
+ return;
+ if (value) {
+ if (value->defs == this)
+ value->defs = (next == this) ? NULL : next;
+ DLLIST_DEL(this);
+ }
+
+ if (defVal) {
+ if (defVal->defs)
+ DLLIST_ADDTAIL(defVal->defs, this);
+ else
+ defVal->defs = this;
+ }
+ value = defVal;
+}
+
+// TODO: make me faster by using a safe iterator
+void
+ValueDef::replace(Value *repVal, bool doSet)
+{
+ ValueRef **refs = new ValueRef * [value->refCount()];
+ int n = 0;
+
+ if (!refs && value->refCount())
+ FATAL("memory allocation failed");
+
+ for (ValueRef::Iterator iter = value->uses->iterator(); !iter.end();
+ iter.next()) {
+ assert(n < value->refCount());
+ refs[n++] = iter.get();
+ }
+ while (n)
+ refs[--n]->set(repVal);
+
+ if (doSet)
+ this->set(repVal);
+
+ if (refs)
+ delete[] refs;
+}
+
+void
+ValueDef::mergeDefs(ValueDef *join)
+{
+ DLLIST_MERGE(this, join, ValueDef *);
+}
+
+Value::Value()
+{
+ refCnt = 0;
+ uses = NULL;
+ defs = NULL;
+ join = this;
+
+ memset(&reg, 0, sizeof(reg));
+ reg.size = 4;
+}
+
+bool
+Value::coalesce(Value *jval, bool force)
+{
+ Value *repr = this->join; // new representative
+ Value *jrep = jval->join;
+
+ if (reg.file != jval->reg.file || reg.size != jval->reg.size) {
+ if (!force)
+ return false;
+ ERROR("forced coalescing of values of different sizes/files");
+ }
+
+ if (!force && (repr->reg.data.id != jrep->reg.data.id)) {
+ if (repr->reg.data.id >= 0 &&
+ jrep->reg.data.id >= 0)
+ return false;
+ if (jrep->reg.data.id >= 0) {
+ repr = jval->join;
+ jrep = this->join;
+ jval = this;
+ }
+
+ // need to check all fixed register values of the program for overlap
+ Function *func = defs->getInsn()->bb->getFunction();
+
+ // TODO: put values in by register-id bins per function
+ ArrayList::Iterator iter = func->allLValues.iterator();
+ for (; !iter.end(); iter.next()) {
+ Value *fixed = reinterpret_cast<Value *>(iter.get());
+ assert(fixed);
+ if (fixed->reg.data.id == repr->reg.data.id)
+ if (fixed->livei.overlaps(jrep->livei))
+ return false;
+ }
+ }
+ if (repr->livei.overlaps(jrep->livei)) {
+ if (!force)
+ return false;
+ // do we really want this ? if at all, only for constraint ops
+ INFO("NOTE: forced coalescing with live range overlap\n");
+ }
+
+ ValueDef::Iterator iter = jrep->defs->iterator();
+ for (; !iter.end(); iter.next())
+ iter.get()->get()->join = repr;
+
+ repr->defs->mergeDefs(jrep->defs);
+ repr->livei.unify(jrep->livei);
+
+ assert(repr->join == repr && jval->join == repr);
+ return true;
+}
+
+LValue::LValue(Function *fn, DataFile file)
+{
+ reg.file = file;
+ reg.size = (file != FILE_PREDICATE) ? 4 : 1;
+ reg.data.id = -1;
+
+ affinity = -1;
+
+ fn->add(this, this->id);
+}
+
+LValue::LValue(Function *fn, LValue *lval)
+{
+ assert(lval);
+
+ reg.file = lval->reg.file;
+ reg.size = lval->reg.size;
+ reg.data.id = -1;
+
+ affinity = -1;
+
+ fn->add(this, this->id);
+}
+
+Value *LValue::clone(Function *func) const
+{
+ LValue *that = new_LValue(func, reg.file);
+
+ that->reg.size = this->reg.size;
+ that->reg.type = this->reg.type;
+ that->reg.data = this->reg.data;
+
+ return that;
+}
+
+Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
+{
+ baseSym = NULL;
+
+ reg.file = f;
+ reg.fileIndex = fidx;
+ reg.data.offset = 0;
+
+ prog->add(this, this->id);
+}
+
+Value *
+Symbol::clone(Function *func) const
+{
+ Program *prog = func->getProgram();
+
+ Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
+
+ that->reg.size = this->reg.size;
+ that->reg.type = this->reg.type;
+ that->reg.data = this->reg.data;
+
+ that->baseSym = this->baseSym;
+
+ return that;
+}
+
+ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
+{
+ memset(&reg, 0, sizeof(reg));
+
+ reg.file = FILE_IMMEDIATE;
+ reg.size = 4;
+ reg.type = TYPE_U32;
+
+ reg.data.u32 = uval;
+
+ prog->add(this, this->id);
+}
+
+ImmediateValue::ImmediateValue(Program *prog, float fval)
+{
+ memset(&reg, 0, sizeof(reg));
+
+ reg.file = FILE_IMMEDIATE;
+ reg.size = 4;
+ reg.type = TYPE_F32;
+
+ reg.data.f32 = fval;
+
+ prog->add(this, this->id);
+}
+
+ImmediateValue::ImmediateValue(Program *prog, double dval)
+{
+ memset(&reg, 0, sizeof(reg));
+
+ reg.file = FILE_IMMEDIATE;
+ reg.size = 8;
+ reg.type = TYPE_F64;
+
+ reg.data.f64 = dval;
+
+ prog->add(this, this->id);
+}
+
+ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
+{
+ reg = proto->reg;
+
+ reg.type = ty;
+ reg.size = typeSizeof(ty);
+}
+
+bool
+ImmediateValue::isInteger(const int i) const
+{
+ switch (reg.type) {
+ case TYPE_S8:
+ return reg.data.s8 == i;
+ case TYPE_U8:
+ return reg.data.u8 == i;
+ case TYPE_S16:
+ return reg.data.s16 == i;
+ case TYPE_U16:
+ return reg.data.u16 == i;
+ case TYPE_S32:
+ case TYPE_U32:
+ return reg.data.s32 == i; // as if ...
+ case TYPE_F32:
+ return reg.data.f32 == static_cast<float>(i);
+ case TYPE_F64:
+ return reg.data.f64 == static_cast<double>(i);
+ default:
+ return false;
+ }
+}
+
+bool
+ImmediateValue::isNegative() const
+{
+ switch (reg.type) {
+ case TYPE_S8: return reg.data.s8 < 0;
+ case TYPE_S16: return reg.data.s16 < 0;
+ case TYPE_S32:
+ case TYPE_U32: return reg.data.s32 < 0;
+ case TYPE_F32: return reg.data.u32 & (1 << 31);
+ case TYPE_F64: return reg.data.u64 & (1ULL << 63);
+ default:
+ return false;
+ }
+}
+
+bool
+ImmediateValue::isPow2() const
+{
+ switch (reg.type) {
+ case TYPE_U8:
+ case TYPE_U16:
+ case TYPE_U32: return util_is_power_of_two(reg.data.u32);
+ default:
+ return false;
+ }
+}
+
+void
+ImmediateValue::applyLog2()
+{
+ switch (reg.type) {
+ case TYPE_S8:
+ case TYPE_S16:
+ case TYPE_S32:
+ assert(!this->isNegative());
+ // fall through
+ case TYPE_U8:
+ case TYPE_U16:
+ case TYPE_U32:
+ reg.data.u32 = util_logbase2(reg.data.u32);
+ break;
+ case TYPE_F32:
+ reg.data.f32 = log2f(reg.data.f32);
+ break;
+ case TYPE_F64:
+ reg.data.f64 = log2(reg.data.f64);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+bool
+ImmediateValue::compare(CondCode cc, float fval) const
+{
+ if (reg.type != TYPE_F32)
+ ERROR("immediate value is not of type f32");
+
+ switch (static_cast<CondCode>(cc & 7)) {
+ case CC_TR: return true;
+ case CC_FL: return false;
+ case CC_LT: return reg.data.f32 < fval;
+ case CC_LE: return reg.data.f32 <= fval;
+ case CC_GT: return reg.data.f32 > fval;
+ case CC_GE: return reg.data.f32 >= fval;
+ case CC_EQ: return reg.data.f32 == fval;
+ case CC_NE: return reg.data.f32 != fval;
+ default:
+ assert(0);
+ return false;
+ }
+}
+
+bool
+Value::interfers(const Value *that) const
+{
+ uint32_t idA, idB;
+
+ if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
+ return false;
+ if (this->asImm())
+ return false;
+
+ if (this->asSym()) {
+ idA = this->join->reg.data.offset;
+ idB = that->join->reg.data.offset;
+ } else {
+ idA = this->join->reg.data.id * this->reg.size;
+ idB = that->join->reg.data.id * that->reg.size;
+ }
+
+ if (idA < idB)
+ return (idA + this->reg.size > idB);
+ else
+ if (idA > idB)
+ return (idB + that->reg.size > idA);
+ else
+ return (idA == idB);
+}
+
+bool
+Value::equals(const Value *that, bool strict) const
+{
+ that = that->join;
+
+ if (strict)
+ return this == that;
+
+ if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
+ return false;
+ if (that->reg.size != this->reg.size)
+ return false;
+
+ if (that->reg.data.id != this->reg.data.id)
+ return false;
+
+ return true;
+}
+
+bool
+ImmediateValue::equals(const Value *that, bool strict) const
+{
+ const ImmediateValue *imm = that->asImm();
+ if (!imm)
+ return false;
+ return reg.data.u64 == imm->reg.data.u64;
+}
+
+bool
+Symbol::equals(const Value *that, bool strict) const
+{
+ if (this->reg.file != that->reg.file)
+ return false;
+ assert(that->asSym());
+
+ if (this->baseSym != that->asSym()->baseSym)
+ return false;
+
+ return this->reg.data.offset == that->reg.data.offset;
+}
+
+void Instruction::init()
+{
+ next = prev = 0;
+
+ cc = CC_ALWAYS;
+ rnd = ROUND_N;
+ cache = CACHE_CA;
+ subOp = 0;
+
+ saturate = 0;
+ join = terminator = 0;
+ ftz = dnz = 0;
+ atomic = 0;
+ perPatch = 0;
+ fixed = 0;
+ encSize = 0;
+ ipa = 0;
+
+ lanes = 0xf;
+
+ postFactor = 0;
+
+ for (int p = 0; p < NV50_IR_MAX_DEFS; ++p)
+ def[p].setInsn(this);
+ for (int p = 0; p < NV50_IR_MAX_SRCS; ++p)
+ src[p].setInsn(this);
+
+ predSrc = -1;
+ flagsDef = -1;
+ flagsSrc = -1;
+}
+
+Instruction::Instruction()
+{
+ init();
+
+ op = OP_NOP;
+ dType = sType = TYPE_F32;
+
+ id = -1;
+ bb = 0;
+}
+
+Instruction::Instruction(Function *fn, operation opr, DataType ty)
+{
+ init();
+
+ op = opr;
+ dType = sType = ty;
+
+ fn->add(this, id);
+}
+
+Instruction::~Instruction()
+{
+ if (bb) {
+ Function *fn = bb->getFunction();
+ bb->remove(this);
+ fn->allInsns.remove(id);
+ }
+
+ for (int s = 0; srcExists(s); ++s)
+ setSrc(s, NULL);
+ // must unlink defs too since the list pointers will get deallocated
+ for (int d = 0; defExists(d); ++d)
+ setDef(d, NULL);
+}
+
+void
+Instruction::setSrc(int s, ValueRef& ref)
+{
+ setSrc(s, ref.get());
+ src[s].mod = ref.mod;
+}
+
+void
+Instruction::swapSources(int a, int b)
+{
+ Value *value = src[a].get();
+ Modifier m = src[a].mod;
+
+ setSrc(a, src[b]);
+
+ src[b].set(value);
+ src[b].mod = m;
+}
+
+void
+Instruction::takeExtraSources(int s, Value *values[3])
+{
+ values[0] = getIndirect(s, 0);
+ if (values[0])
+ setIndirect(s, 0, NULL);
+
+ values[1] = getIndirect(s, 1);
+ if (values[1])
+ setIndirect(s, 1, NULL);
+
+ values[2] = getPredicate();
+ if (values[2])
+ setPredicate(cc, NULL);
+}
+
+void
+Instruction::putExtraSources(int s, Value *values[3])
+{
+ if (values[0])
+ setIndirect(s, 0, values[0]);
+ if (values[1])
+ setIndirect(s, 1, values[1]);
+ if (values[2])
+ setPredicate(cc, values[2]);
+}
+
+Instruction *
+Instruction::clone(bool deep) const
+{
+ Instruction *insn = new_Instruction(bb->getFunction(), op, dType);
+ assert(!asCmp() && !asFlow());
+ cloneBase(insn, deep);
+ return insn;
+}
+
+void
+Instruction::cloneBase(Instruction *insn, bool deep) const
+{
+ insn->sType = this->sType;
+
+ insn->cc = this->cc;
+ insn->rnd = this->rnd;
+ insn->cache = this->cache;
+ insn->subOp = this->subOp;
+
+ insn->saturate = this->saturate;
+ insn->atomic = this->atomic;
+ insn->ftz = this->ftz;
+ insn->dnz = this->dnz;
+ insn->ipa = this->ipa;
+ insn->lanes = this->lanes;
+ insn->perPatch = this->perPatch;
+
+ insn->postFactor = this->postFactor;
+
+ if (deep) {
+ if (!bb)
+ return;
+ Function *fn = bb->getFunction();
+ for (int d = 0; this->defExists(d); ++d)
+ insn->setDef(d, this->getDef(d)->clone(fn));
+ } else {
+ for (int d = 0; this->defExists(d); ++d)
+ insn->setDef(d, this->getDef(d));
+ }
+
+ for (int s = 0; this->srcExists(s); ++s)
+ insn->src[s].set(this->src[s]);
+
+ insn->predSrc = this->predSrc;
+ insn->flagsDef = this->flagsDef;
+ insn->flagsSrc = this->flagsSrc;
+}
+
+unsigned int
+Instruction::defCount(unsigned int mask) const
+{
+ unsigned int i, n;
+
+ for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
+ n += mask & 1;
+ return n;
+}
+
+unsigned int
+Instruction::srcCount(unsigned int mask) const
+{
+ unsigned int i, n;
+
+ for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
+ n += mask & 1;
+ return n;
+}
+
+bool
+Instruction::setIndirect(int s, int dim, Value *value)
+{
+ int p = src[s].indirect[dim];
+
+ assert(this->srcExists(s));
+ if (p < 0) {
+ if (!value)
+ return true;
+ for (p = s + 1; this->srcExists(p); ++p);
+ }
+ assert(p < NV50_IR_MAX_SRCS);
+
+ src[p] = value;
+ src[p].usedAsPtr = (value != 0);
+ src[s].indirect[dim] = value ? p : -1;
+ return true;
+}
+
+bool
+Instruction::setPredicate(CondCode ccode, Value *value)
+{
+ cc = ccode;
+
+ if (!value) {
+ if (predSrc >= 0) {
+ src[predSrc] = 0;
+ predSrc = -1;
+ }
+ return true;
+ }
+
+ if (predSrc < 0) {
+ int s;
+ for (s = 0; this->srcExists(s); ++s)
+ assert(s < NV50_IR_MAX_SRCS);
+ predSrc = s;
+ }
+ src[predSrc] = value;
+ return true;
+}
+
+bool
+Instruction::writesPredicate() const
+{
+ for (int d = 0; d < 2 && def[d].exists(); ++d)
+ if (def[d].exists() &&
+ (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS)))
+ return true;
+ return false;
+}
+
+static bool
+insnCheckCommutation(const Instruction *a, const Instruction *b)
+{
+ for (int d = 0; a->defExists(d); ++d)
+ for (int s = 0; b->srcExists(s); ++s)
+ if (a->getDef(d)->interfers(b->getSrc(s)))
+ return false;
+ return true;
+}
+
+bool
+Instruction::isCommutationLegal(const Instruction *i) const
+{
+ bool ret = true;
+ ret = ret && insnCheckCommutation(this, i);
+ ret = ret && insnCheckCommutation(i, this);
+ return ret;
+}
+
+TexInstruction::TexInstruction(Function *fn, operation op)
+ : Instruction(fn, op, TYPE_F32)
+{
+ memset(&tex, 0, sizeof(tex));
+
+ tex.rIndirectSrc = -1;
+ tex.sIndirectSrc = -1;
+}
+
+TexInstruction::~TexInstruction()
+{
+ for (int c = 0; c < 3; ++c) {
+ dPdx[c].set(NULL);
+ dPdy[c].set(NULL);
+ }
+}
+
+Instruction *
+TexInstruction::clone(bool deep) const
+{
+ TexInstruction *tex = new_TexInstruction(bb->getFunction(), op);
+ cloneBase(tex, deep);
+
+ tex->tex = this->tex;
+
+ if (op == OP_TXD) {
+ for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
+ tex->dPdx[c].set(dPdx[c]);
+ tex->dPdy[c].set(dPdy[c]);
+ }
+ }
+
+ return tex;
+}
+
+const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
+{
+ { "1D", 1, 1, false, false, false },
+ { "2D", 2, 2, false, false, false },
+ { "2D_MS", 2, 2, false, false, false },
+ { "3D", 3, 3, false, false, false },
+ { "CUBE", 2, 3, false, true, false },
+ { "1D_SHADOW", 1, 1, false, false, true },
+ { "2D_SHADOW", 2, 2, false, false, true },
+ { "CUBE_SHADOW", 2, 3, false, true, true },
+ { "1D_ARRAY", 1, 2, true, false, false },
+ { "2D_ARRAY", 2, 3, true, false, false },
+ { "2D_MS_ARRAY", 2, 3, true, false, false },
+ { "CUBE_ARRAY", 2, 3, true, true, false },
+ { "1D_ARRAY_SHADOW", 1, 2, true, false, true },
+ { "2D_ARRAY_SHADOW", 2, 3, true, false, true },
+ { "RECT", 2, 2, false, false, false },
+ { "RECT_SHADOW", 2, 2, false, false, true },
+ { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true },
+ { "BUFFER", 1, 1, false, false, false },
+};
+
+CmpInstruction::CmpInstruction(Function *fn, operation op)
+ : Instruction(fn, op, TYPE_F32)
+{
+ setCond = CC_ALWAYS;
+}
+
+Instruction *
+CmpInstruction::clone(bool deep) const
+{
+ CmpInstruction *cmp = new_CmpInstruction(bb->getFunction(), op);
+ cloneBase(cmp, deep);
+ cmp->setCond = setCond;
+ cmp->dType = dType;
+ return cmp;
+}
+
+FlowInstruction::FlowInstruction(Function *fn, operation op,
+ BasicBlock *targ)
+ : Instruction(fn, op, TYPE_NONE)
+{
+ target.bb = targ;
+
+ if (op == OP_BRA ||
+ op == OP_CONT || op == OP_BREAK ||
+ op == OP_RET || op == OP_EXIT)
+ terminator = 1;
+ else
+ if (op == OP_JOIN)
+ terminator = targ ? 1 : 0;
+
+ allWarp = absolute = limit = 0;
+}
+
+Program::Program(Type type, Target *arch)
+ : progType(type),
+ target(arch),
+ mem_Instruction(sizeof(Instruction), 6),
+ mem_CmpInstruction(sizeof(CmpInstruction), 4),
+ mem_TexInstruction(sizeof(TexInstruction), 4),
+ mem_FlowInstruction(sizeof(FlowInstruction), 4),
+ mem_LValue(sizeof(LValue), 8),
+ mem_Symbol(sizeof(Symbol), 7),
+ mem_ImmediateValue(sizeof(ImmediateValue), 7)
+{
+ code = NULL;
+ binSize = 0;
+
+ maxGPR = -1;
+
+ main = new Function(this, "MAIN");
+
+ dbgFlags = 0;
+}
+
+Program::~Program()
+{
+ if (main)
+ delete main;
+}
+
+void Program::releaseInstruction(Instruction *insn)
+{
+ // TODO: make this not suck so much
+
+ insn->~Instruction();
+
+ if (insn->asCmp())
+ mem_CmpInstruction.release(insn);
+ else
+ if (insn->asTex())
+ mem_TexInstruction.release(insn);
+ else
+ if (insn->asFlow())
+ mem_FlowInstruction.release(insn);
+ else
+ mem_Instruction.release(insn);
+}
+
+void Program::releaseValue(Value *value)
+{
+ if (value->asLValue())
+ mem_LValue.release(value);
+ else
+ if (value->asImm())
+ mem_ImmediateValue.release(value);
+ else
+ if (value->asSym())
+ mem_Symbol.release(value);
+}
+
+
+} // namespace nv50_ir
+
+extern "C" {
+
+static void
+nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
+{
+ info->io.clipDistance = 0xff;
+ info->io.pointSize = 0xff;
+ info->io.edgeFlagIn = 0xff;
+ info->io.edgeFlagOut = 0xff;
+ info->io.fragDepth = 0xff;
+ info->io.sampleMask = 0xff;
+ info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
+}
+
+int
+nv50_ir_generate_code(struct nv50_ir_prog_info *info)
+{
+ int ret = 0;
+
+ nv50_ir::Program::Type type;
+
+ nv50_ir_init_prog_info(info);
+
+#define PROG_TYPE_CASE(a, b) \
+ case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
+
+ switch (info->type) {
+ PROG_TYPE_CASE(VERTEX, VERTEX);
+// PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL);
+// PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL);
+ PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
+ PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
+ default:
+ type = nv50_ir::Program::TYPE_COMPUTE;
+ break;
+ }
+ INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
+
+ nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
+ if (!targ)
+ return -1;
+
+ nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
+ if (!prog)
+ return -1;
+ prog->dbgFlags = info->dbgFlags;
+
+ switch (info->bin.sourceRep) {
+#if 0
+ case PIPE_IR_LLVM:
+ case PIPE_IR_GLSL:
+ return -1;
+ case PIPE_IR_SM4:
+ ret = prog->makeFromSM4(info) ? 0 : -2;
+ break;
+ case PIPE_IR_TGSI:
+#endif
+ default:
+ ret = prog->makeFromTGSI(info) ? 0 : -2;
+ break;
+ }
+ if (ret < 0)
+ goto out;
+ if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
+ prog->print();
+
+ prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
+
+ prog->convertToSSA();
+
+ if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
+ prog->print();
+
+ prog->optimizeSSA(info->optLevel);
+ prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
+
+ if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
+ prog->print();
+
+ if (!prog->registerAllocation()) {
+ ret = -4;
+ goto out;
+ }
+ prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
+
+ prog->optimizePostRA(info->optLevel);
+
+ if (!prog->emitBinary(info)) {
+ ret = -5;
+ goto out;
+ }
+
+out:
+ INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
+
+ info->bin.maxGPR = prog->maxGPR;
+ info->bin.code = prog->code;
+ info->bin.codeSize = prog->binSize;
+
+ delete prog;
+ nv50_ir::Target::destroy(targ);
+
+ return ret;
+}
+
+} // extern "C"
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h
new file mode 100644
index 00000000000..6eef1abb69d
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -0,0 +1,1049 @@
+
+#ifndef __NV50_IR_H__
+#define __NV50_IR_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#include "nv50_ir_util.h"
+#include "nv50_ir_graph.h"
+
+#include "nv50_ir_driver.h"
+
+namespace nv50_ir {
+
+enum operation
+{
+ OP_NOP = 0,
+ OP_PHI,
+ OP_UNION, // unify a new definition and several source values
+ OP_SPLIT, // $r0d -> { $r0, $r1 } ($r0d and $r0/$r1 will be coalesced)
+ OP_MERGE, // opposite of split, e.g. combine 2 32 bit into a 64 bit value
+ OP_CONSTRAINT, // copy values into consecutive registers
+ OP_MOV,
+ OP_LOAD,
+ OP_STORE,
+ OP_ADD,
+ OP_SUB,
+ OP_MUL,
+ OP_DIV,
+ OP_MOD,
+ OP_MAD,
+ OP_FMA,
+ OP_SAD, // abs(src0 - src1) + src2
+ OP_ABS,
+ OP_NEG,
+ OP_NOT,
+ OP_AND,
+ OP_OR,
+ OP_XOR,
+ OP_SHL,
+ OP_SHR,
+ OP_MAX,
+ OP_MIN,
+ OP_SAT, // CLAMP(f32, 0.0, 1.0)
+ OP_CEIL,
+ OP_FLOOR,
+ OP_TRUNC,
+ OP_CVT,
+ OP_SET_AND, // dst = (src0 CMP src1) & src2
+ OP_SET_OR,
+ OP_SET_XOR,
+ OP_SET,
+ OP_SELP, // dst = src2 ? src0 : src1
+ OP_SLCT, // dst = (src2 CMP 0) ? src0 : src1
+ OP_RCP,
+ OP_RSQ,
+ OP_LG2,
+ OP_SIN,
+ OP_COS,
+ OP_EX2,
+ OP_EXP, // exponential (base M_E)
+ OP_LOG, // natural logarithm
+ OP_PRESIN,
+ OP_PREEX2,
+ OP_SQRT,
+ OP_POW,
+ OP_BRA,
+ OP_CALL,
+ OP_RET,
+ OP_CONT,
+ OP_BREAK,
+ OP_PRERET,
+ OP_PRECONT,
+ OP_PREBREAK,
+ OP_BRKPT, // breakpoint (not related to loops)
+ OP_JOINAT, // push control flow convergence point
+ OP_JOIN, // converge
+ OP_DISCARD,
+ OP_EXIT,
+ OP_MEMBAR,
+ OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
+ OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
+ OP_EXPORT,
+ OP_LINTERP,
+ OP_PINTERP,
+ OP_EMIT, // emit vertex
+ OP_RESTART, // restart primitive
+ OP_TEX,
+ OP_TXB, // texture bias
+ OP_TXL, // texure lod
+ OP_TXF, // texel fetch
+ OP_TXQ, // texture size query
+ OP_TXD, // texture derivatives
+ OP_TXG, // texture gather
+ OP_TEXCSAA,
+ OP_SULD, // surface load
+ OP_SUST, // surface store
+ OP_DFDX,
+ OP_DFDY,
+ OP_RDSV, // read system value
+ OP_WRSV, // write system value
+ OP_PIXLD,
+ OP_QUADOP,
+ OP_QUADON,
+ OP_QUADPOP,
+ OP_POPCNT, // bitcount(src0 & src1)
+ OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
+ OP_EXTBF,
+ OP_LAST
+};
+
+#define NV50_IR_SUBOP_MUL_HIGH 1
+#define NV50_IR_SUBOP_EMIT_RESTART 1
+#define NV50_IR_SUBOP_LDC_IL 1
+#define NV50_IR_SUBOP_LDC_IS 2
+#define NV50_IR_SUBOP_LDC_ISL 3
+
+enum DataType
+{
+ TYPE_NONE,
+ TYPE_U8,
+ TYPE_S8,
+ TYPE_U16,
+ TYPE_S16,
+ TYPE_U32,
+ TYPE_S32,
+ TYPE_U64, // 64 bit operations are only lowered after register allocation
+ TYPE_S64,
+ TYPE_F16,
+ TYPE_F32,
+ TYPE_F64,
+ TYPE_B96,
+ TYPE_B128
+};
+
+enum CondCode
+{
+ CC_FL = 0,
+ CC_NEVER = CC_FL, // when used with FILE_FLAGS
+ CC_LT = 1,
+ CC_EQ = 2,
+ CC_NOT_P = CC_EQ, // when used with FILE_PREDICATE
+ CC_LE = 3,
+ CC_GT = 4,
+ CC_NE = 5,
+ CC_P = CC_NE,
+ CC_GE = 6,
+ CC_TR = 7,
+ CC_ALWAYS = CC_TR,
+ CC_U = 8,
+ CC_LTU = 9,
+ CC_EQU = 10,
+ CC_LEU = 11,
+ CC_GTU = 12,
+ CC_NEU = 13,
+ CC_GEU = 14,
+ CC_NO = 0x10,
+ CC_NC = 0x11,
+ CC_NS = 0x12,
+ CC_NA = 0x13,
+ CC_A = 0x14,
+ CC_S = 0x15,
+ CC_C = 0x16,
+ CC_O = 0x17
+};
+
+enum RoundMode
+{
+ ROUND_N, // nearest
+ ROUND_M, // towards -inf
+ ROUND_Z, // towards 0
+ ROUND_P, // towards +inf
+ ROUND_NI, // nearest integer
+ ROUND_MI, // to integer towards -inf
+ ROUND_ZI, // to integer towards 0
+ ROUND_PI, // to integer towards +inf
+};
+
+enum CacheMode
+{
+ CACHE_CA, // cache at all levels
+ CACHE_WB = CACHE_CA, // cache write back
+ CACHE_CG, // cache at global level
+ CACHE_CS, // cache streaming
+ CACHE_CV, // cache as volatile
+ CACHE_WT = CACHE_CV // cache write-through
+};
+
+enum DataFile
+{
+ FILE_NULL = 0,
+ FILE_GPR,
+ FILE_PREDICATE, // boolean predicate
+ FILE_FLAGS, // zero/sign/carry/overflow bits
+ FILE_ADDRESS,
+ FILE_IMMEDIATE,
+ FILE_MEMORY_CONST,
+ FILE_SHADER_INPUT,
+ FILE_SHADER_OUTPUT,
+ FILE_MEMORY_GLOBAL,
+ FILE_MEMORY_SHARED,
+ FILE_MEMORY_LOCAL,
+ FILE_SYSTEM_VALUE,
+ DATA_FILE_COUNT
+};
+
+enum TexTarget
+{
+ TEX_TARGET_1D,
+ TEX_TARGET_2D,
+ TEX_TARGET_2D_MS,
+ TEX_TARGET_3D,
+ TEX_TARGET_CUBE,
+ TEX_TARGET_1D_SHADOW,
+ TEX_TARGET_2D_SHADOW,
+ TEX_TARGET_CUBE_SHADOW,
+ TEX_TARGET_1D_ARRAY,
+ TEX_TARGET_2D_ARRAY,
+ TEX_TARGET_2D_MS_ARRAY,
+ TEX_TARGET_CUBE_ARRAY,
+ TEX_TARGET_1D_ARRAY_SHADOW,
+ TEX_TARGET_2D_ARRAY_SHADOW,
+ TEX_TARGET_RECT,
+ TEX_TARGET_RECT_SHADOW,
+ TEX_TARGET_CUBE_ARRAY_SHADOW,
+ TEX_TARGET_BUFFER,
+ TEX_TARGET_COUNT
+};
+
+enum SVSemantic
+{
+ SV_POSITION, // WPOS
+ SV_VERTEX_ID,
+ SV_INSTANCE_ID,
+ SV_INVOCATION_ID,
+ SV_PRIMITIVE_ID,
+ SV_VERTEX_COUNT, // gl_PatchVerticesIn
+ SV_LAYER,
+ SV_VIEWPORT_INDEX,
+ SV_YDIR,
+ SV_FACE,
+ SV_POINT_SIZE,
+ SV_POINT_COORD,
+ SV_CLIP_DISTANCE,
+ SV_SAMPLE_INDEX,
+ SV_TESS_FACTOR,
+ SV_TESS_COORD,
+ SV_TID,
+ SV_CTAID,
+ SV_NTID,
+ SV_GRIDID,
+ SV_NCTAID,
+ SV_LANEID,
+ SV_PHYSID,
+ SV_NPHYSID,
+ SV_CLOCK,
+ SV_LBASE,
+ SV_SBASE,
+ SV_UNDEFINED,
+ SV_LAST
+};
+
+class Program;
+class Function;
+class BasicBlock;
+
+class Target;
+
+class Instruction;
+class CmpInstruction;
+class TexInstruction;
+class FlowInstruction;
+
+class Value;
+class LValue;
+class Symbol;
+class ImmediateValue;
+
+struct Storage
+{
+ DataFile file;
+ int8_t fileIndex; // signed, may be indirect for CONST[]
+ uint8_t size; // this should match the Instruction type's size
+ DataType type; // mainly for pretty printing
+ union {
+ uint64_t u64; // immediate values
+ uint32_t u32;
+ uint16_t u16;
+ uint8_t u8;
+ int64_t s64;
+ int32_t s32;
+ int16_t s16;
+ int8_t s8;
+ float f32;
+ double f64;
+ int32_t offset; // offset from 0 (base of address space)
+ int32_t id; // register id (< 0 if virtual/unassigned)
+ struct {
+ SVSemantic sv;
+ int index;
+ } sv;
+ } data;
+};
+
+// precedence: NOT after SAT after NEG after ABS
+#define NV50_IR_MOD_ABS (1 << 0)
+#define NV50_IR_MOD_NEG (1 << 1)
+#define NV50_IR_MOD_SAT (1 << 2)
+#define NV50_IR_MOD_NOT (1 << 3)
+#define NV50_IR_MOD_NEG_ABS (NV50_IR_MOD_NEG | NV50_IR_MOD_ABS)
+
+#define NV50_IR_INTERP_MODE_MASK 0x3
+#define NV50_IR_INTERP_LINEAR (0 << 0)
+#define NV50_IR_INTERP_PERSPECTIVE (1 << 0)
+#define NV50_IR_INTERP_FLAT (2 << 0)
+#define NV50_IR_INTERP_SC (3 << 0) // what exactly is that ?
+#define NV50_IR_INTERP_SAMPLE_MASK 0xc
+#define NV50_IR_INTERP_DEFAULT (0 << 2)
+#define NV50_IR_INTERP_CENTROID (1 << 2)
+#define NV50_IR_INTERP_OFFSET (2 << 2)
+#define NV50_IR_INTERP_SAMPLEID (3 << 2)
+
+// do we really want this to be a class ?
+class Modifier
+{
+public:
+ Modifier() : bits(0) { }
+ Modifier(unsigned int m) : bits(m) { }
+ Modifier(operation op);
+
+ // @return new Modifier applying a after b (asserts if unrepresentable)
+ Modifier operator*(const Modifier) const;
+ Modifier operator==(const Modifier m) const { return m.bits == bits; }
+ Modifier operator!=(const Modifier m) const { return m.bits != bits; }
+
+ inline Modifier operator&(const Modifier m) const { return bits & m.bits; }
+ inline Modifier operator|(const Modifier m) const { return bits | m.bits; }
+ inline Modifier operator^(const Modifier m) const { return bits ^ m.bits; }
+
+ operation getOp() const;
+
+ inline int neg() const { return (bits & NV50_IR_MOD_NEG) ? 1 : 0; }
+ inline int abs() const { return (bits & NV50_IR_MOD_ABS) ? 1 : 0; }
+
+ inline operator bool() { return bits ? true : false; }
+
+ void applyTo(ImmediateValue &imm) const;
+
+ int print(char *buf, size_t size) const;
+
+private:
+ uint8_t bits;
+};
+
+class ValueRef
+{
+public:
+ ValueRef();
+ ~ValueRef();
+
+ inline ValueRef& operator=(Value *val) { this->set(val); return *this; }
+
+ inline bool exists() const { return value != NULL; }
+
+ void set(Value *);
+ void set(const ValueRef&);
+ inline Value *get() const { return value; }
+ inline Value *rep() const;
+
+ inline Instruction *getInsn() const { return insn; }
+ inline void setInsn(Instruction *inst) { insn = inst; }
+
+ inline bool isIndirect(int dim) const { return indirect[dim] >= 0; }
+ inline const ValueRef *getIndirect(int dim) const;
+
+ inline DataFile getFile() const;
+ inline unsigned getSize() const;
+
+ // SSA: return eventual (traverse MOVs) literal value, if it exists
+ ImmediateValue *getImmediate() const;
+
+ class Iterator
+ {
+ public:
+ Iterator(ValueRef *ref) : pos(ref), ini(ref) { }
+
+ inline ValueRef *get() const { return pos; }
+ inline bool end() const { return pos == NULL; }
+ inline void next() { pos = (pos->next != ini) ? pos->next : 0; }
+
+ private:
+ ValueRef *pos, *ini;
+ };
+
+ inline Iterator iterator() { return Iterator(this); }
+
+public:
+ Modifier mod;
+ int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src[indirect[i]]
+ uint8_t swizzle;
+
+ bool usedAsPtr; // for printing
+
+private:
+ Value *value;
+ Instruction *insn;
+ ValueRef *next; // to link uses of the value
+ ValueRef *prev;
+};
+
+class ValueDef
+{
+public:
+ ValueDef();
+ ~ValueDef();
+
+ inline ValueDef& operator=(Value *val) { this->set(val); return *this; }
+
+ inline bool exists() const { return value != NULL; }
+
+ inline Value *get() const { return value; }
+ inline Value *rep() const;
+ void set(Value *);
+ void replace(Value *, bool doSet); // replace all uses of the old value
+
+ inline Instruction *getInsn() const { return insn; }
+ inline void setInsn(Instruction *inst) { insn = inst; }
+
+ inline DataFile getFile() const;
+ inline unsigned getSize() const;
+
+ // HACK: save the pre-SSA value in 'prev', in SSA we don't need the def list
+ // but we'll use it again for coalescing in register allocation
+ inline void setSSA(LValue *);
+ inline const LValue *preSSA() const;
+ inline void restoreDefList(); // after having been abused for SSA hack
+ void mergeDefs(ValueDef *);
+
+ class Iterator
+ {
+ public:
+ Iterator(ValueDef *def) : pos(def), ini(def) { }
+
+ inline ValueDef *get() const { return pos; }
+ inline bool end() const { return pos == NULL; }
+ inline void next() { pos = (pos->next != ini) ? pos->next : NULL; }
+
+ private:
+ ValueDef *pos, *ini;
+ };
+
+ inline Iterator iterator() { return Iterator(this); }
+
+private:
+ Value *value; // should make this LValue * ...
+ Instruction *insn;
+ ValueDef *next; // circular list of all definitions of the same value
+ ValueDef *prev;
+};
+
+class Value
+{
+public:
+ Value();
+
+ virtual Value *clone(Function *) const { return NULL; }
+
+ virtual int print(char *, size_t, DataType ty = TYPE_NONE) const = 0;
+
+ virtual bool equals(const Value *, bool strict = false) const;
+ virtual bool interfers(const Value *) const;
+
+ inline Instruction *getUniqueInsn() const;
+ inline Instruction *getInsn() const; // use when uniqueness is certain
+
+ inline int refCount() { return refCnt; }
+ inline int ref() { return ++refCnt; }
+ inline int unref() { --refCnt; assert(refCnt >= 0); return refCnt; }
+
+ inline LValue *asLValue();
+ inline Symbol *asSym();
+ inline ImmediateValue *asImm();
+ inline const Symbol *asSym() const;
+ inline const ImmediateValue *asImm() const;
+
+ bool coalesce(Value *, bool force = false);
+
+ inline bool inFile(DataFile f) { return reg.file == f; }
+
+ static inline Value *get(Iterator&);
+
+protected:
+ int refCnt;
+
+ friend class ValueDef;
+ friend class ValueRef;
+
+public:
+ int id;
+ ValueRef *uses;
+ ValueDef *defs;
+ Storage reg;
+
+ // TODO: these should be in LValue:
+ Interval livei;
+ Value *join;
+};
+
+class LValue : public Value
+{
+public:
+ LValue(Function *, DataFile file);
+ LValue(Function *, LValue *);
+
+ virtual Value *clone(Function *) const;
+
+ virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
+
+public:
+ unsigned ssa : 1;
+
+ int affinity;
+};
+
+class Symbol : public Value
+{
+public:
+ Symbol(Program *, DataFile file = FILE_MEMORY_CONST, ubyte fileIdx = 0);
+
+ virtual Value *clone(Function *) const;
+
+ virtual bool equals(const Value *that, bool strict) const;
+
+ virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
+
+ // print with indirect values
+ int print(char *, size_t, Value *, Value *, DataType ty = TYPE_NONE) const;
+
+ inline void setFile(DataFile file, ubyte fileIndex = 0)
+ {
+ reg.file = file;
+ reg.fileIndex = fileIndex;
+ }
+
+ inline void setOffset(int32_t offset);
+ inline void setAddress(Symbol *base, int32_t offset);
+ inline void setSV(SVSemantic sv, uint32_t idx = 0);
+
+ inline const Symbol *getBase() const { return baseSym; }
+
+private:
+ Symbol *baseSym; // array base for Symbols representing array elements
+};
+
+class ImmediateValue : public Value
+{
+public:
+ ImmediateValue(Program *, uint32_t);
+ ImmediateValue(Program *, float);
+ ImmediateValue(Program *, double);
+
+ // NOTE: not added to program with
+ ImmediateValue(const ImmediateValue *, DataType ty);
+
+ virtual bool equals(const Value *that, bool strict) const;
+
+ // these only work if 'type' is valid (we mostly use untyped literals):
+ bool isInteger(const int ival) const; // ival is cast to this' type
+ bool isNegative() const;
+ bool isPow2() const;
+
+ void applyLog2();
+
+ // for constant folding:
+ ImmediateValue operator+(const ImmediateValue&) const;
+ ImmediateValue operator-(const ImmediateValue&) const;
+ ImmediateValue operator*(const ImmediateValue&) const;
+ ImmediateValue operator/(const ImmediateValue&) const;
+
+ bool compare(CondCode cc, float fval) const;
+
+ virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
+};
+
+
+#define NV50_IR_MAX_DEFS 4
+#define NV50_IR_MAX_SRCS 8
+
+class Instruction
+{
+public:
+ Instruction();
+ Instruction(Function *, operation, DataType);
+ virtual ~Instruction();
+
+ virtual Instruction *clone(bool deep) const;
+
+ inline void setDef(int i, Value *val) { def[i].set(val); }
+ inline void setSrc(int s, Value *val) { src[s].set(val); }
+ void setSrc(int s, ValueRef&);
+ void swapSources(int a, int b);
+ bool setIndirect(int s, int dim, Value *);
+
+ inline Value *getDef(int d) const { return def[d].get(); }
+ inline Value *getSrc(int s) const { return src[s].get(); }
+ inline Value *getIndirect(int s, int dim) const;
+
+ inline bool defExists(int d) const { return d < 4 && def[d].exists(); }
+ inline bool srcExists(int s) const { return s < 8 && src[s].exists(); }
+
+ inline bool constrainedDefs() const { return def[1].exists(); }
+
+ bool setPredicate(CondCode ccode, Value *);
+ inline Value *getPredicate() const;
+ bool writesPredicate() const;
+
+ unsigned int defCount(unsigned int mask) const;
+ unsigned int srcCount(unsigned int mask) const;
+
+ // save & remove / set indirect[0,1] and predicate source
+ void takeExtraSources(int s, Value *[3]);
+ void putExtraSources(int s, Value *[3]);
+
+ inline void setType(DataType type) { dType = sType = type; }
+
+ inline void setType(DataType dtype, DataType stype)
+ {
+ dType = dtype;
+ sType = stype;
+ }
+
+ inline bool isPseudo() const { return op < OP_MOV; }
+ bool isDead() const;
+ bool isNop() const;
+ bool isCommutationLegal(const Instruction *) const; // must be adjacent !
+ bool isActionEqual(const Instruction *) const;
+ bool isResultEqual(const Instruction *) const;
+
+ void print() const;
+
+ inline CmpInstruction *asCmp();
+ inline TexInstruction *asTex();
+ inline FlowInstruction *asFlow();
+ inline const TexInstruction *asTex() const;
+ inline const CmpInstruction *asCmp() const;
+ inline const FlowInstruction *asFlow() const;
+
+public:
+ Instruction *next;
+ Instruction *prev;
+ int id;
+ int serial; // CFG order
+
+ operation op;
+ DataType dType; // destination or defining type
+ DataType sType; // source or secondary type
+ CondCode cc;
+ RoundMode rnd;
+ CacheMode cache;
+
+ uint8_t subOp; // quadop, 1 for mul-high, etc.
+
+ unsigned encSize : 4; // encoding size in bytes
+ unsigned saturate : 1; // to [0.0f, 1.0f]
+ unsigned join : 1; // converge control flow (use OP_JOIN until end)
+ unsigned fixed : 1; // prevent dead code elimination
+ unsigned terminator : 1; // end of basic block
+ unsigned atomic : 1;
+ unsigned ftz : 1; // flush denormal to zero
+ unsigned dnz : 1; // denormals, NaN are zero
+ unsigned ipa : 4; // interpolation mode
+ unsigned lanes : 4;
+ unsigned perPatch : 1;
+ unsigned exit : 1; // terminate program after insn
+
+ int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor
+
+ int8_t predSrc;
+ int8_t flagsDef;
+ int8_t flagsSrc;
+
+ // NOTE: should make these pointers, saves space and work on shuffling
+ ValueDef def[NV50_IR_MAX_DEFS]; // no gaps !
+ ValueRef src[NV50_IR_MAX_SRCS]; // no gaps !
+
+ BasicBlock *bb;
+
+ // instruction specific methods:
+ // (don't want to subclass, would need more constructors and memory pools)
+public:
+ inline void setInterpolate(unsigned int mode) { ipa = mode; }
+
+ unsigned int getInterpMode() const { return ipa & 0x3; }
+ unsigned int getSampleMode() const { return ipa & 0xc; }
+
+private:
+ void init();
+protected:
+ void cloneBase(Instruction *clone, bool deep) const;
+};
+
+enum TexQuery
+{
+ TXQ_DIMS,
+ TXQ_TYPE,
+ TXQ_SAMPLE_POSITION,
+ TXQ_FILTER,
+ TXQ_LOD,
+ TXQ_WRAP,
+ TXQ_BORDER_COLOUR
+};
+
+class TexInstruction : public Instruction
+{
+public:
+ class Target
+ {
+ public:
+ Target(TexTarget targ = TEX_TARGET_2D) : target(targ) { }
+
+ const char *getName() const { return descTable[target].name; }
+ unsigned int getArgCount() const { return descTable[target].argc; }
+ unsigned int getDim() const { return descTable[target].dim; }
+ int isArray() const { return descTable[target].array ? 1 : 0; }
+ int isCube() const { return descTable[target].cube ? 1 : 0; }
+ int isShadow() const { return descTable[target].shadow ? 1 : 0; }
+
+ Target& operator=(TexTarget targ)
+ {
+ assert(targ < TEX_TARGET_COUNT);
+ return *this;
+ }
+
+ inline bool operator==(TexTarget targ) const { return target == targ; }
+
+ private:
+ struct Desc
+ {
+ char name[19];
+ uint8_t dim;
+ uint8_t argc;
+ bool array;
+ bool cube;
+ bool shadow;
+ };
+
+ static const struct Desc descTable[TEX_TARGET_COUNT];
+
+ private:
+ enum TexTarget target;
+ };
+
+public:
+ TexInstruction(Function *, operation);
+ virtual ~TexInstruction();
+
+ virtual Instruction *clone(bool deep) const;
+
+ inline void setTexture(Target targ, uint8_t r, uint8_t s)
+ {
+ tex.r = r;
+ tex.s = s;
+ tex.target = targ;
+ }
+
+ inline Value *getIndirectR() const;
+ inline Value *getIndirectS() const;
+
+public:
+ struct {
+ Target target;
+
+ uint8_t r;
+ int8_t rIndirectSrc;
+ uint8_t s;
+ int8_t sIndirectSrc;
+
+ uint8_t mask;
+ uint8_t gatherComp;
+
+ bool liveOnly; // only execute on live pixels of a quad (optimization)
+ bool levelZero;
+
+ int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets
+ int8_t offset[4][3];
+
+ enum TexQuery query;
+ } tex;
+
+ ValueRef dPdx[3];
+ ValueRef dPdy[3];
+};
+
+class CmpInstruction : public Instruction
+{
+public:
+ CmpInstruction(Function *, operation);
+
+ virtual Instruction *clone(bool deep) const;
+
+ void setCondition(CondCode cond) { setCond = cond; }
+ CondCode getCondition() const { return setCond; }
+
+public:
+ CondCode setCond;
+};
+
+class FlowInstruction : public Instruction
+{
+public:
+ FlowInstruction(Function *, operation, BasicBlock *target);
+
+public:
+ unsigned allWarp : 1;
+ unsigned absolute : 1;
+ unsigned limit : 1;
+ unsigned builtin : 1; // true for calls to emulation code
+
+ union {
+ BasicBlock *bb;
+ int builtin;
+ Function *fn;
+ } target;
+};
+
+class BasicBlock
+{
+public:
+ BasicBlock(Function *);
+ ~BasicBlock();
+
+ inline int getId() const { return id; }
+ inline unsigned int getInsnCount() const { return numInsns; }
+ inline bool isTerminated() const { return exit && exit->terminator; }
+
+ bool dominatedBy(BasicBlock *bb);
+ inline bool reachableBy(BasicBlock *by, BasicBlock *term);
+
+ // returns mask of conditional out blocks
+ // e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF
+ unsigned int initiatesSimpleConditional() const;
+
+public:
+ Function *getFunction() const { return func; }
+ Program *getProgram() const { return program; }
+
+ Instruction *getEntry() const { return entry; } // first non-phi instruction
+ Instruction *getPhi() const { return phi; }
+ Instruction *getFirst() const { return phi ? phi : entry; }
+ Instruction *getExit() const { return exit; }
+
+ void insertHead(Instruction *);
+ void insertTail(Instruction *);
+ void insertBefore(Instruction *, Instruction *);
+ void insertAfter(Instruction *, Instruction *);
+ void remove(Instruction *);
+ void permuteAdjacent(Instruction *, Instruction *);
+
+ BasicBlock *idom() const;
+
+ DLList& getDF() { return df; }
+ DLList::Iterator iterDF() { return df.iterator(); }
+
+ static inline BasicBlock *get(Iterator&);
+ static inline BasicBlock *get(Graph::Node *);
+
+public:
+ Graph::Node cfg; // first edge is branch *taken* (the ELSE branch)
+ Graph::Node dom;
+
+ BitSet liveSet;
+
+ uint32_t binPos;
+ uint32_t binSize;
+
+ Instruction *joinAt; // for quick reference
+
+ bool explicitCont; // loop headers: true if loop contains continue stmts
+
+private:
+ int id;
+ DLList df;
+
+ Instruction *phi;
+ Instruction *entry;
+ Instruction *exit;
+
+ unsigned int numInsns;
+
+private:
+ Function *func;
+ Program *program;
+};
+
+class Function
+{
+public:
+ Function(Program *, const char *name);
+ ~Function();
+
+ inline Program *getProgram() const { return prog; }
+ inline const char *getName() const { return name; }
+ inline int getId() const { return id; }
+
+ void print();
+ void printLiveIntervals() const;
+ void printCFGraph(const char *filePath);
+
+ bool setEntry(BasicBlock *);
+ bool setExit(BasicBlock *);
+
+ unsigned int orderInstructions(ArrayList&);
+
+ inline void add(BasicBlock *bb, int& id) { allBBlocks.insert(bb, id); }
+ inline void add(Instruction *insn, int& id) { allInsns.insert(insn, id); }
+ inline void add(LValue *lval, int& id) { allLValues.insert(lval, id); }
+
+ inline LValue *getLValue(int id);
+
+ bool convertToSSA();
+
+public:
+ Graph cfg;
+ Graph::Node *cfgExit;
+ Graph *domTree;
+ Graph::Node call; // node in the call graph
+
+ BasicBlock **bbArray; // BBs in emission order
+ int bbCount;
+
+ unsigned int loopNestingBound;
+ int regClobberMax;
+
+ uint32_t binPos;
+ uint32_t binSize;
+
+ ArrayList allBBlocks;
+ ArrayList allInsns;
+ ArrayList allLValues;
+
+private:
+ void buildLiveSetsPreSSA(BasicBlock *, const int sequence);
+
+private:
+ int id;
+ const char *const name;
+ Program *prog;
+};
+
+enum CGStage
+{
+ CG_STAGE_PRE_SSA,
+ CG_STAGE_SSA, // expected directly before register allocation
+ CG_STAGE_POST_RA
+};
+
+class Program
+{
+public:
+ enum Type
+ {
+ TYPE_VERTEX,
+ TYPE_TESSELLATION_CONTROL,
+ TYPE_TESSELLATION_EVAL,
+ TYPE_GEOMETRY,
+ TYPE_FRAGMENT,
+ TYPE_COMPUTE
+ };
+
+ Program(Type type, Target *targ);
+ ~Program();
+
+ void print();
+
+ Type getType() const { return progType; }
+
+ inline void add(Function *fn, int& id) { allFuncs.insert(fn, id); }
+ inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }
+
+ bool makeFromTGSI(struct nv50_ir_prog_info *);
+ bool makeFromSM4(struct nv50_ir_prog_info *);
+ bool convertToSSA();
+ bool optimizeSSA(int level);
+ bool optimizePostRA(int level);
+ bool registerAllocation();
+ bool emitBinary(struct nv50_ir_prog_info *);
+
+ const Target *getTarget() const { return target; }
+
+private:
+ Type progType;
+ Target *target;
+
+public:
+ Function *main;
+ Graph calls;
+
+ ArrayList allFuncs;
+ ArrayList allRValues;
+
+ uint32_t *code;
+ uint32_t binSize;
+
+ int maxGPR;
+
+ MemoryPool mem_Instruction;
+ MemoryPool mem_CmpInstruction;
+ MemoryPool mem_TexInstruction;
+ MemoryPool mem_FlowInstruction;
+ MemoryPool mem_LValue;
+ MemoryPool mem_Symbol;
+ MemoryPool mem_ImmediateValue;
+
+ uint32_t dbgFlags;
+
+ void releaseInstruction(Instruction *);
+ void releaseValue(Value *);
+};
+
+// TODO: add const version
+class Pass
+{
+public:
+ bool run(Program *, bool ordered = false, bool skipPhi = false);
+ bool run(Function *, bool ordered = false, bool skipPhi = false);
+
+private:
+ // return false to continue with next entity on next higher level
+ virtual bool visit(Function *) { return true; }
+ virtual bool visit(BasicBlock *) { return true; }
+ virtual bool visit(Instruction *) { return false; }
+
+ bool doRun(Program *, bool ordered, bool skipPhi);
+ bool doRun(Function *, bool ordered, bool skipPhi);
+
+protected:
+ bool err;
+ Function *func;
+ Program *prog;
+};
+
+// =============================================================================
+
+#include "nv50_ir_inlines.h"
+
+} // namespace nv50_ir
+
+#endif // __NV50_IR_H__
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_bb.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_bb.cpp
new file mode 100644
index 00000000000..5bf08b37c51
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_bb.cpp
@@ -0,0 +1,409 @@
+
+#include "nv50_ir.h"
+
+namespace nv50_ir {
+
+Function::Function(Program *p, const char *fnName)
+ : call(this),
+ name(fnName),
+ prog(p)
+{
+ cfgExit = NULL;
+ domTree = NULL;
+
+ bbArray = NULL;
+ bbCount = 0;
+ loopNestingBound = 0;
+ regClobberMax = 0;
+
+ binPos = 0;
+ binSize = 0;
+
+ prog->add(this, id);
+}
+
+Function::~Function()
+{
+ if (domTree)
+ delete domTree;
+ if (bbArray)
+ delete[] bbArray;
+
+ for (ArrayList::Iterator BBs = allBBlocks.iterator(); !BBs.end(); BBs.next())
+ delete reinterpret_cast<BasicBlock *>(BBs.get());
+}
+
+BasicBlock::BasicBlock(Function *fn) : cfg(this), dom(this), func(fn)
+{
+ program = func->getProgram();
+
+ joinAt = phi = entry = exit = NULL;
+
+ numInsns = 0;
+ binPos = 0;
+ binSize = 0;
+
+ explicitCont = false;
+
+ func->add(this, this->id);
+}
+
+BasicBlock::~BasicBlock()
+{
+ // nothing yet
+}
+
+BasicBlock *
+BasicBlock::idom() const
+{
+ Graph::Node *dn = dom.parent();
+ return dn ? BasicBlock::get(dn) : NULL;
+}
+
+void
+BasicBlock::insertHead(Instruction *inst)
+{
+ assert(inst->next == 0 && inst->prev == 0);
+
+ if (inst->op == OP_PHI) {
+ if (phi) {
+ insertBefore(phi, inst);
+ } else {
+ if (entry) {
+ insertBefore(entry, phi);
+ } else {
+ assert(!exit);
+ phi = exit = inst;
+ inst->bb = this;
+ ++numInsns;
+ }
+ }
+ } else {
+ if (entry) {
+ insertBefore(entry, inst);
+ } else {
+ if (phi) {
+ insertAfter(phi, inst);
+ } else {
+ assert(!exit);
+ entry = exit = inst;
+ inst->bb = this;
+ ++numInsns;
+ }
+ }
+ }
+}
+
+void
+BasicBlock::insertTail(Instruction *inst)
+{
+ assert(inst->next == 0 && inst->prev == 0);
+
+ if (inst->op == OP_PHI) {
+ if (entry) {
+ insertBefore(entry, inst);
+ } else
+ if (exit) {
+ assert(phi);
+ insertAfter(exit, inst);
+ } else {
+ assert(!phi);
+ phi = exit = inst;
+ inst->bb = this;
+ ++numInsns;
+ }
+ } else {
+ if (exit) {
+ insertAfter(exit, inst);
+ } else {
+ assert(!phi);
+ entry = exit = inst;
+ inst->bb = this;
+ ++numInsns;
+ }
+ }
+}
+
+void
+BasicBlock::insertBefore(Instruction *q, Instruction *p)
+{
+ assert(p && q);
+
+ assert(p->next == 0 && p->prev == 0);
+
+ if (q == entry) {
+ if (p->op == OP_PHI) {
+ if (!phi)
+ phi = p;
+ } else {
+ entry = p;
+ }
+ } else
+ if (q == phi) {
+ assert(p->op == OP_PHI);
+ phi = p;
+ }
+
+ p->next = q;
+ p->prev = q->prev;
+ if (p->prev)
+ p->prev->next = p;
+ q->prev = p;
+
+ p->bb = this;
+ ++numInsns;
+}
+
+void
+BasicBlock::insertAfter(Instruction *p, Instruction *q)
+{
+ assert(p && q);
+ assert(q->op != OP_PHI || p->op == OP_PHI);
+
+ assert(q->next == 0 && q->prev == 0);
+
+ if (p == exit)
+ exit = q;
+ if (p->op == OP_PHI && q->op != OP_PHI)
+ entry = q;
+
+ q->prev = p;
+ q->next = p->next;
+ if (q->next)
+ q->next->prev = q;
+ p->next = q;
+
+ q->bb = this;
+ ++numInsns;
+}
+
+void
+BasicBlock::remove(Instruction *insn)
+{
+ assert(insn->bb == this);
+
+ if (insn->prev)
+ insn->prev->next = insn->next;
+
+ if (insn->next)
+ insn->next->prev = insn->prev;
+ else
+ exit = insn->prev;
+
+ if (insn == entry)
+ entry = insn->next ? insn->next : insn->prev;
+
+ if (insn == phi)
+ phi = (insn->next && insn->next->op == OP_PHI) ? insn->next : 0;
+
+ --numInsns;
+ insn->bb = NULL;
+ insn->next =
+ insn->prev = NULL;
+}
+
+void BasicBlock::permuteAdjacent(Instruction *a, Instruction *b)
+{
+ assert(a->bb == b->bb);
+
+ if (a->next != b) {
+ Instruction *i = a;
+ a = b;
+ b = i;
+ }
+ assert(a->next == b);
+ assert(a->op != OP_PHI && b->op != OP_PHI);
+
+ if (b == exit)
+ exit = a;
+ if (a == entry)
+ entry = b;
+
+ b->prev = a->prev;
+ a->next = b->next;
+ b->next = a;
+ a->prev = b;
+
+ if (b->prev)
+ b->prev->next = b;
+ if (a->prev)
+ a->next->prev = a;
+}
+
+bool
+BasicBlock::dominatedBy(BasicBlock *that)
+{
+ Graph::Node *bn = &that->dom;
+ Graph::Node *dn = &this->dom;
+
+ while (dn && dn != bn)
+ dn = dn->parent();
+
+ return dn != NULL;
+}
+
+unsigned int
+BasicBlock::initiatesSimpleConditional() const
+{
+ Graph::Node *out[2];
+ int n;
+ Graph::Edge::Type eR;
+
+ if (cfg.outgoingCount() != 2) // -> if and -> else/endif
+ return false;
+
+ n = 0;
+ for (Graph::EdgeIterator ei = cfg.outgoing(); !ei.end(); ei.next())
+ out[n++] = ei.getNode();
+ eR = out[1]->outgoing().getType();
+
+ // IF block is out edge to the right
+ if (eR == Graph::Edge::CROSS || eR == Graph::Edge::BACK)
+ return 0x2;
+
+ if (out[1]->outgoingCount() != 1) // 0 is IF { RET; }, >1 is more divergence
+ return 0x0;
+ // do they reconverge immediately ?
+ if (out[1]->outgoing().getNode() == out[0])
+ return 0x1;
+ if (out[0]->outgoingCount() == 1)
+ if (out[0]->outgoing().getNode() == out[1]->outgoing().getNode())
+ return 0x3;
+
+ return 0x0;
+}
+
+bool
+Function::setEntry(BasicBlock *bb)
+{
+ if (cfg.getRoot())
+ return false;
+ cfg.insert(&bb->cfg);
+ return true;
+}
+
+bool
+Function::setExit(BasicBlock *bb)
+{
+ if (cfgExit)
+ return false;
+ cfgExit = &bb->cfg;
+ return true;
+}
+
+unsigned int
+Function::orderInstructions(ArrayList &result)
+{
+ Iterator *iter;
+ for (iter = cfg.iteratorCFG(); !iter->end(); iter->next())
+ for (Instruction *insn = BasicBlock::get(*iter)->getFirst();
+ insn; insn = insn->next)
+ result.insert(insn, insn->serial);
+ cfg.putIterator(iter);
+ return result.getSize();
+}
+
+bool
+Pass::run(Program *prog, bool ordered, bool skipPhi)
+{
+ this->prog = prog;
+ err = false;
+ return doRun(prog, ordered, skipPhi);
+}
+
+bool
+Pass::doRun(Program *prog, bool ordered, bool skipPhi)
+{
+ for (ArrayList::Iterator fi = prog->allFuncs.iterator();
+ !fi.end(); fi.next()) {
+ Function *fn = reinterpret_cast<Function *>(fi.get());
+ if (!doRun(fn, ordered, skipPhi))
+ return false;
+ }
+ return !err;
+}
+
+bool
+Pass::run(Function *func, bool ordered, bool skipPhi)
+{
+ prog = func->getProgram();
+ err = false;
+ return doRun(func, ordered, skipPhi);
+}
+
+bool
+Pass::doRun(Function *func, bool ordered, bool skipPhi)
+{
+ Iterator *bbIter;
+ BasicBlock *bb;
+ Instruction *insn, *next;
+
+ this->func = func;
+ if (!visit(func))
+ return false;
+
+ bbIter = ordered ? func->cfg.iteratorCFG() : func->cfg.iteratorDFS();
+
+ for (; !bbIter->end(); bbIter->next()) {
+ bb = BasicBlock::get(reinterpret_cast<Graph::Node *>(bbIter->get()));
+ if (!visit(bb))
+ break;
+ for (insn = skipPhi ? bb->getEntry() : bb->getFirst(); insn != NULL;
+ insn = next) {
+ next = insn->next;
+ if (!visit(insn))
+ break;
+ }
+ }
+ func->cfg.putIterator(bbIter);
+ return !err;
+}
+
+void
+Function::printCFGraph(const char *filePath)
+{
+ FILE *out = fopen(filePath, "a");
+ if (!out) {
+ ERROR("failed to open file: %s\n", filePath);
+ return;
+ }
+ INFO("printing control flow graph to: %s\n", filePath);
+
+ fprintf(out, "digraph G {\n");
+
+ Iterator *iter;
+ for (iter = cfg.iteratorDFS(); !iter->end(); iter->next()) {
+ BasicBlock *bb = BasicBlock::get(
+ reinterpret_cast<Graph::Node *>(iter->get()));
+ int idA = bb->getId();
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ int idB = BasicBlock::get(ei.getNode())->getId();
+ switch (ei.getType()) {
+ case Graph::Edge::TREE:
+ fprintf(out, "\t%i -> %i;\n", idA, idB);
+ break;
+ case Graph::Edge::FORWARD:
+ fprintf(out, "\t%i -> %i [color=green];\n", idA, idB);
+ break;
+ case Graph::Edge::CROSS:
+ fprintf(out, "\t%i -> %i [color=red];\n", idA, idB);
+ break;
+ case Graph::Edge::BACK:
+ fprintf(out, "\t%i -> %i;\n", idA, idB);
+ break;
+ case Graph::Edge::DUMMY:
+ fprintf(out, "\t%i -> %i [style=dotted];\n", idA, idB);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ }
+ cfg.putIterator(iter);
+
+ fprintf(out, "}\n");
+ fclose(out);
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
new file mode 100644
index 00000000000..284736838ab
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
@@ -0,0 +1,501 @@
+
+#include "nv50_ir.h"
+#include "nv50_ir_build_util.h"
+
+namespace nv50_ir {
+
+BuildUtil::BuildUtil()
+{
+ prog = NULL;
+ func = NULL;
+ bb = NULL;
+ pos = NULL;
+
+ memset(imms, 0, sizeof(imms));
+ immCount = 0;
+}
+
+void
+BuildUtil::addImmediate(ImmediateValue *imm)
+{
+ if (immCount > (NV50_IR_BUILD_IMM_HT_SIZE * 3) / 4)
+ return;
+
+ unsigned int pos = u32Hash(imm->reg.data.u32);
+
+ while (imms[pos])
+ pos = (pos + 1) % NV50_IR_BUILD_IMM_HT_SIZE;
+ imms[pos] = imm;
+ immCount++;
+}
+
+Instruction *
+BuildUtil::mkOp1(operation op, DataType ty, Value *dst, Value *src)
+{
+ Instruction *insn = new_Instruction(func, op, ty);
+
+ insn->setDef(0, dst);
+ insn->setSrc(0, src);
+
+ insert(insn);
+ return insn;
+}
+
+Instruction *
+BuildUtil::mkOp2(operation op, DataType ty, Value *dst,
+ Value *src0, Value *src1)
+{
+ Instruction *insn = new_Instruction(func, op, ty);
+
+ insn->setDef(0, dst);
+ insn->setSrc(0, src0);
+ insn->setSrc(1, src1);
+
+ insert(insn);
+ return insn;
+}
+
+Instruction *
+BuildUtil::mkOp3(operation op, DataType ty, Value *dst,
+ Value *src0, Value *src1, Value *src2)
+{
+ Instruction *insn = new_Instruction(func, op, ty);
+
+ insn->setDef(0, dst);
+ insn->setSrc(0, src0);
+ insn->setSrc(1, src1);
+ insn->setSrc(2, src2);
+
+ insert(insn);
+ return insn;
+}
+
+LValue *
+BuildUtil::mkLoad(DataType ty, Symbol *mem, Value *ptr)
+{
+ Instruction *insn = new_Instruction(func, OP_LOAD, ty);
+ LValue *def = getScratch();
+
+ insn->setDef(0, def);
+ insn->setSrc(0, mem);
+ if (ptr)
+ insn->setIndirect(0, 0, ptr);
+
+ insert(insn);
+ return def;
+}
+
+Instruction *
+BuildUtil::mkStore(operation op, DataType ty, Symbol *mem, Value *ptr,
+ Value *stVal)
+{
+ Instruction *insn = new_Instruction(func, op, ty);
+
+ insn->setSrc(0, mem);
+ insn->setSrc(1, stVal);
+ if (ptr)
+ insn->setIndirect(0, 0, ptr);
+
+ insert(insn);
+ return insn;
+}
+
+Instruction *
+BuildUtil::mkFetch(Value *dst, DataType ty, DataFile file, int32_t offset,
+ Value *attrRel, Value *primRel)
+{
+ Symbol *sym = mkSymbol(file, 0, ty, offset);
+
+ Instruction *insn = mkOp1(OP_VFETCH, ty, dst, sym);
+
+ insn->setIndirect(0, 0, attrRel);
+ insn->setIndirect(0, 1, primRel);
+
+ // already inserted
+ return insn;
+}
+
+Instruction *
+BuildUtil::mkMov(Value *dst, Value *src, DataType ty)
+{
+ Instruction *insn = new_Instruction(func, OP_MOV, ty);
+
+ insn->setDef(0, dst);
+ insn->setSrc(0, src);
+
+ insert(insn);
+ return insn;
+}
+
+Instruction *
+BuildUtil::mkMovToReg(int id, Value *src)
+{
+ Instruction *insn = new_Instruction(func, OP_MOV, typeOfSize(src->reg.size));
+
+ insn->setDef(0, new_LValue(func, FILE_GPR));
+ insn->getDef(0)->reg.data.id = id;
+ insn->setSrc(0, src);
+
+ insert(insn);
+ return insn;
+}
+
+Instruction *
+BuildUtil::mkMovFromReg(Value *dst, int id)
+{
+ Instruction *insn = new_Instruction(func, OP_MOV, typeOfSize(dst->reg.size));
+
+ insn->setDef(0, dst);
+ insn->setSrc(0, new_LValue(func, FILE_GPR));
+ insn->getSrc(0)->reg.data.id = id;
+
+ insert(insn);
+ return insn;
+}
+
+Instruction *
+BuildUtil::mkCvt(operation op,
+ DataType dstTy, Value *dst, DataType srcTy, Value *src)
+{
+ Instruction *insn = new_Instruction(func, op, dstTy);
+
+ insn->setType(dstTy, srcTy);
+ insn->setDef(0, dst);
+ insn->setSrc(0, src);
+
+ insert(insn);
+ return insn;
+}
+
+Instruction *
+BuildUtil::mkCmp(operation op, CondCode cc, DataType ty, Value *dst,
+ Value *src0, Value *src1, Value *src2)
+{
+ CmpInstruction *insn = new_CmpInstruction(func, op);
+
+ insn->setType(dst->reg.file == FILE_PREDICATE ? TYPE_U8 : ty, ty);
+ insn->setCondition(cc);
+ insn->setDef(0, dst);
+ insn->setSrc(0, src0);
+ insn->setSrc(1, src1);
+ if (src2)
+ insn->setSrc(2, src2);
+
+ insert(insn);
+ return insn;
+}
+
+Instruction *
+BuildUtil::mkTex(operation op, TexTarget targ, uint8_t tic, uint8_t tsc,
+ Value **def, Value **src)
+{
+ TexInstruction *tex = new_TexInstruction(func, op);
+
+ for (int d = 0; d < 4 && def[d]; ++d)
+ tex->setDef(d, def[d]);
+ for (int s = 0; s < 4 && src[s]; ++s)
+ tex->setSrc(s, src[s]);
+
+ tex->setTexture(targ, tic, tsc);
+
+ return tex;
+}
+
+Instruction *
+BuildUtil::mkQuadop(uint8_t q, Value *def, uint8_t l, Value *src0, Value *src1)
+{
+ Instruction *quadop = mkOp2(OP_QUADOP, TYPE_F32, def, src0, src1);
+ quadop->subOp = q;
+ quadop->lanes = l;
+ return quadop;
+}
+
+Instruction *
+BuildUtil::mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc)
+{
+ Instruction *insn;
+ LValue *def0 = getSSA();
+ LValue *def1 = getSSA();
+
+ mkMov(def0, trSrc)->setPredicate(CC_P, pred);
+ mkMov(def1, flSrc)->setPredicate(CC_NOT_P, pred);
+
+ insn = mkOp2(OP_UNION, typeOfSize(dst->reg.size), dst, def0, def1);
+
+ insert(insn);
+ return insn;
+}
+
+FlowInstruction *
+BuildUtil::mkFlow(operation op, BasicBlock *targ, CondCode cc, Value *pred)
+{
+ FlowInstruction *insn = new_FlowInstruction(func, op, targ);
+
+ if (pred)
+ insn->setPredicate(cc, pred);
+
+ insert(insn);
+ return insn;
+}
+
+void
+BuildUtil::mkClobber(DataFile f, uint32_t rMask, int unit)
+{
+ static const uint16_t baseSize2[16] =
+ {
+ 0x0000, 0x0010, 0x0011, 0x0020, 0x0012, 0x1210, 0x1211, 0x1220,
+ 0x0013, 0x1310, 0x1311, 0x0020, 0x1320, 0x0022, 0x2210, 0x0040,
+ };
+
+ int base = 0;
+
+ for (; rMask; rMask >>= 4, base += 4) {
+ const uint32_t mask = rMask & 0xf;
+ if (!mask)
+ continue;
+ int base1 = (baseSize2[mask] >> 0) & 0xf;
+ int size1 = (baseSize2[mask] >> 4) & 0xf;
+ int base2 = (baseSize2[mask] >> 8) & 0xf;
+ int size2 = (baseSize2[mask] >> 12) & 0xf;
+ Instruction *insn = mkOp(OP_NOP, TYPE_NONE, NULL);
+ if (1) { // size1 can't be 0
+ LValue *reg = new_LValue(func, f);
+ reg->reg.size = size1 << unit;
+ reg->reg.data.id = base + base1;
+ insn->setDef(0, reg);
+ }
+ if (size2) {
+ LValue *reg = new_LValue(func, f);
+ reg->reg.size = size2 << unit;
+ reg->reg.data.id = base + base2;
+ insn->setDef(1, reg);
+ }
+ }
+}
+
+ImmediateValue *
+BuildUtil::mkImm(uint32_t u)
+{
+ unsigned int pos = u32Hash(u);
+
+ while (imms[pos] && imms[pos]->reg.data.u32 != u)
+ pos = (pos + 1) % NV50_IR_BUILD_IMM_HT_SIZE;
+
+ ImmediateValue *imm = imms[pos];
+ if (!imm) {
+ imm = new_ImmediateValue(prog, u);
+ addImmediate(imm);
+ }
+ return imm;
+}
+
+ImmediateValue *
+BuildUtil::mkImm(uint64_t u)
+{
+ ImmediateValue *imm = new_ImmediateValue(prog, (uint32_t)0);
+
+ imm->reg.size = 8;
+ imm->reg.type = TYPE_U64;
+ imm->reg.data.u64 = u;
+
+ return imm;
+}
+
+ImmediateValue *
+BuildUtil::mkImm(float f)
+{
+ union {
+ float f32;
+ uint32_t u32;
+ } u;
+ u.f32 = f;
+ return mkImm(u.u32);
+}
+
+Value *
+BuildUtil::loadImm(Value *dst, float f)
+{
+ return mkOp1v(OP_MOV, TYPE_F32, dst ? dst : getScratch(), mkImm(f));
+}
+
+Value *
+BuildUtil::loadImm(Value *dst, uint32_t u)
+{
+ return mkOp1v(OP_MOV, TYPE_U32, dst ? dst : getScratch(), mkImm(u));
+}
+
+Value *
+BuildUtil::loadImm(Value *dst, uint64_t u)
+{
+ return mkOp1v(OP_MOV, TYPE_U64, dst ? dst : getScratch(8), mkImm(u));
+}
+
+Symbol *
+BuildUtil::mkSymbol(DataFile file, int8_t fileIndex, DataType ty,
+ uint32_t baseAddr)
+{
+ Symbol *sym = new_Symbol(prog, file, fileIndex);
+
+ sym->setOffset(baseAddr);
+ sym->reg.type = ty;
+ sym->reg.size = typeSizeof(ty);
+
+ return sym;
+}
+
+Symbol *
+BuildUtil::mkSysVal(SVSemantic svName, uint32_t svIndex)
+{
+ Symbol *sym = new_Symbol(prog, FILE_SYSTEM_VALUE, 0);
+
+ assert(svIndex < 4 ||
+ (svName == SV_CLIP_DISTANCE || svName == SV_TESS_FACTOR));
+
+ switch (svName) {
+ case SV_POSITION:
+ case SV_FACE:
+ case SV_YDIR:
+ case SV_POINT_SIZE:
+ case SV_POINT_COORD:
+ case SV_CLIP_DISTANCE:
+ case SV_TESS_FACTOR:
+ sym->reg.type = TYPE_F32;
+ break;
+ default:
+ sym->reg.type = TYPE_U32;
+ break;
+ }
+ sym->reg.size = typeSizeof(sym->reg.type);
+
+ sym->reg.data.sv.sv = svName;
+ sym->reg.data.sv.index = svIndex;
+
+ return sym;
+}
+
+void
+BuildUtil::DataArray::init()
+{
+ values = NULL;
+ baseAddr = 0;
+ arrayLen = 0;
+
+ vecDim = 4;
+ eltSize = 2;
+
+ file = FILE_GPR;
+ regOnly = true;
+}
+
+BuildUtil::DataArray::DataArray()
+{
+ init();
+}
+
+BuildUtil::DataArray::DataArray(BuildUtil *bld) : up(bld)
+{
+ init();
+}
+
+BuildUtil::DataArray::~DataArray()
+{
+ if (values)
+ delete[] values;
+}
+
+void
+BuildUtil::DataArray::setup(uint32_t base, int len, int v, int size,
+ DataFile f, int8_t fileIndex)
+{
+ baseAddr = base;
+ arrayLen = len;
+
+ vecDim = v;
+ eltSize = size;
+
+ file = f;
+ regOnly = !isMemoryFile(f);
+
+ values = new Value * [arrayLen * vecDim];
+ if (values)
+ memset(values, 0, arrayLen * vecDim * sizeof(Value *));
+
+ if (!regOnly) {
+ baseSym = new_Symbol(up->getProgram(), file, fileIndex);
+ baseSym->setOffset(baseAddr);
+ baseSym->reg.size = size;
+ }
+}
+
+Value *
+BuildUtil::DataArray::acquire(int i, int c)
+{
+ const unsigned int idx = i * vecDim + c;
+
+ assert(idx < arrayLen * vecDim);
+
+ if (regOnly) {
+ const unsigned int idx = i * 4 + c; // vecDim always 4 if regOnly
+ if (!values[idx])
+ values[idx] = new_LValue(up->getFunction(), file);
+ return values[idx];
+ } else {
+ return up->getScratch();
+ }
+}
+
+Value *
+BuildUtil::DataArray::load(int i, int c, Value *ptr)
+{
+ const unsigned int idx = i * vecDim + c;
+
+ assert(idx < arrayLen * vecDim);
+
+ if (regOnly) {
+ if (!values[idx])
+ values[idx] = new_LValue(up->getFunction(), file);
+ return values[idx];
+ } else {
+ Symbol *sym = reinterpret_cast<Symbol *>(values[idx]);
+ if (!sym)
+ values[idx] = sym = this->mkSymbol(i, c, baseSym);
+ return up->mkLoad(typeOfSize(eltSize), sym, ptr);
+ }
+}
+
+void
+BuildUtil::DataArray::store(int i, int c, Value *ptr, Value *value)
+{
+ const unsigned int idx = i * vecDim + c;
+
+ assert(idx < arrayLen * vecDim);
+
+ if (regOnly) {
+ assert(!ptr);
+ assert(!values[idx] || values[idx] == value);
+ values[idx] = value;
+ } else {
+ Symbol *sym = reinterpret_cast<Symbol *>(values[idx]);
+ if (!sym)
+ values[idx] = sym = this->mkSymbol(i, c, baseSym);
+ up->mkStore(OP_STORE, typeOfSize(value->reg.size), sym, ptr, value);
+ }
+}
+
+Symbol *
+BuildUtil::DataArray::mkSymbol(int i, int c, Symbol *base)
+{
+ const unsigned int idx = i * vecDim + c;
+
+ Symbol *sym = new_Symbol(up->getProgram(), file, 0);
+
+ assert(base || (idx < arrayLen && c < vecDim));
+
+ sym->reg.size = eltSize;
+ sym->reg.type = typeOfSize(eltSize);
+
+ sym->setAddress(base, baseAddr + idx * eltSize);
+ return sym;
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
new file mode 100644
index 00000000000..4c3addb27e4
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
@@ -0,0 +1,245 @@
+
+#ifndef __NV50_IR_BUILD_UTIL__
+#define __NV50_IR_BUILD_UTIL__
+
+namespace nv50_ir {
+
+class BuildUtil
+{
+public:
+ BuildUtil();
+
+ inline void setProgram(Program *);
+ inline Program *getProgram() const { return prog; }
+ inline Function *getFunction() const { return func; }
+
+ // keeps inserting at head/tail of block
+ inline void setPosition(BasicBlock *, bool tail);
+ // position advances only if @after is true
+ inline void setPosition(Instruction *, bool after);
+
+ inline BasicBlock *getBB() { return bb; }
+
+ inline void insert(Instruction *);
+ inline void remove(Instruction *i) { assert(i->bb == bb); bb->remove(i); }
+
+ inline LValue *getScratch(int size = 4);
+ inline LValue *getSSA(int size = 4); // scratch value for a single assignment
+
+ inline Instruction *mkOp(operation, DataType, Value *);
+ Instruction *mkOp1(operation, DataType, Value *, Value *);
+ Instruction *mkOp2(operation, DataType, Value *, Value *, Value *);
+ Instruction *mkOp3(operation, DataType, Value *, Value *, Value *, Value *);
+
+ LValue *mkOp1v(operation, DataType, Value *, Value *);
+ LValue *mkOp2v(operation, DataType, Value *, Value *, Value *);
+ LValue *mkOp3v(operation, DataType, Value *, Value *, Value *, Value *);
+
+ LValue *mkLoad(DataType, Symbol *, Value *ptr);
+ Instruction *mkStore(operation, DataType, Symbol *, Value *ptr, Value *val);
+
+ Instruction *mkMov(Value *, Value *, DataType = TYPE_U32);
+ Instruction *mkMovToReg(int id, Value *);
+ Instruction *mkMovFromReg(Value *, int id);
+
+ Instruction *mkFetch(Value *, DataType, DataFile, int32_t offset,
+ Value *attrRel, Value *primRel);
+
+ Instruction *mkCvt(operation, DataType, Value *, DataType, Value *);
+ Instruction *mkCmp(operation, CondCode, DataType,
+ Value *,
+ Value *, Value *, Value * = NULL);
+ Instruction *mkTex(operation, TexTarget, uint8_t tic, uint8_t tsc,
+ Value **def, Value **src);
+ Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *);
+
+ FlowInstruction *mkFlow(operation, BasicBlock *target,
+ CondCode, Value *pred);
+
+ Instruction *mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc);
+
+ void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2);
+
+ ImmediateValue *mkImm(float);
+ ImmediateValue *mkImm(uint32_t);
+ ImmediateValue *mkImm(uint64_t);
+
+ ImmediateValue *mkImm(int i) { return mkImm((uint32_t)i); }
+
+ Value *loadImm(Value *dst, float);
+ Value *loadImm(Value *dst, uint32_t);
+ Value *loadImm(Value *dst, uint64_t);
+
+ Value *loadImm(Value *dst, int i) { return loadImm(dst, (uint32_t)i); }
+
+ class DataArray
+ {
+ public:
+ DataArray();
+ DataArray(BuildUtil *);
+ ~DataArray();
+
+ inline void setParent(BuildUtil *bld) { assert(!up); up = bld; }
+
+ void setup(uint32_t base, int len, int vecDim, int size,
+ DataFile, int8_t fileIndex = 0);
+
+ inline bool exists(unsigned int i, unsigned int c);
+
+ Value *load(int i, int c, Value *ptr);
+ void store(int i, int c, Value *ptr, Value *value);
+ Value *acquire(int i, int c);
+
+ private:
+ Symbol *mkSymbol(int i, int c, Symbol *base);
+
+ private:
+ Value **values;
+ uint32_t baseAddr;
+ uint32_t arrayLen;
+ Symbol *baseSym;
+
+ uint8_t vecDim;
+ uint8_t eltSize; // in bytes
+
+ DataFile file;
+ bool regOnly;
+
+ BuildUtil *up;
+
+ void init();
+ };
+
+ Symbol *mkSymbol(DataFile file, int8_t fileIndex,
+ DataType ty, uint32_t baseAddress);
+
+ Symbol *mkSysVal(SVSemantic svName, uint32_t svIndex);
+
+private:
+ void addImmediate(ImmediateValue *);
+ inline unsigned int u32Hash(uint32_t);
+
+protected:
+ Program *prog;
+ Function *func;
+ Instruction *pos;
+ BasicBlock *bb;
+ bool tail;
+
+#define NV50_IR_BUILD_IMM_HT_SIZE 256
+
+ ImmediateValue *imms[NV50_IR_BUILD_IMM_HT_SIZE];
+ unsigned int immCount;
+};
+
+unsigned int BuildUtil::u32Hash(uint32_t u)
+{
+ return (u % 273) % NV50_IR_BUILD_IMM_HT_SIZE;
+}
+
+void BuildUtil::setProgram(Program *program)
+{
+ prog = program;
+}
+
+void
+BuildUtil::setPosition(BasicBlock *block, bool atTail)
+{
+ bb = block;
+ prog = bb->getProgram();
+ func = bb->getFunction();
+ pos = NULL;
+ tail = atTail;
+}
+
+void
+BuildUtil::setPosition(Instruction *i, bool after)
+{
+ bb = i->bb;
+ prog = bb->getProgram();
+ func = bb->getFunction();
+ pos = i;
+ tail = after;
+ assert(bb);
+}
+
+LValue *
+BuildUtil::getScratch(int size)
+{
+ LValue *lval = new_LValue(func, FILE_GPR);
+ if (size != 4)
+ lval->reg.size = size;
+ return lval;
+}
+
+LValue *
+BuildUtil::getSSA(int size)
+{
+ LValue *lval = new_LValue(func, FILE_GPR);
+ lval->ssa = 1;
+ if (size != 4)
+ lval->reg.size = size;
+ return lval;
+}
+
+void BuildUtil::insert(Instruction *i)
+{
+ if (!pos) {
+ tail ? bb->insertTail(i) : bb->insertHead(i);
+ } else {
+ if (tail) {
+ bb->insertAfter(pos, i);
+ pos = i;
+ } else {
+ bb->insertBefore(pos, i);
+ }
+ }
+}
+
+Instruction *
+BuildUtil::mkOp(operation op, DataType ty, Value *dst)
+{
+ Instruction *insn = new_Instruction(func, op, ty);
+ insn->setDef(0, dst);
+ insert(insn);
+ if (op == OP_DISCARD || op == OP_EXIT ||
+ op == OP_JOIN ||
+ op == OP_QUADON || op == OP_QUADPOP ||
+ op == OP_EMIT || op == OP_RESTART)
+ insn->fixed = 1;
+ return insn;
+}
+
+inline LValue *
+BuildUtil::mkOp1v(operation op, DataType ty, Value *dst, Value *src)
+{
+ mkOp1(op, ty, dst, src);
+ return dst->asLValue();
+}
+
+inline LValue *
+BuildUtil::mkOp2v(operation op, DataType ty, Value *dst,
+ Value *src0, Value *src1)
+{
+ mkOp2(op, ty, dst, src0, src1);
+ return dst->asLValue();
+}
+
+inline LValue *
+BuildUtil::mkOp3v(operation op, DataType ty, Value *dst,
+ Value *src0, Value *src1, Value *src2)
+{
+ mkOp3(op, ty, dst, src0, src1, src2);
+ return dst->asLValue();
+}
+
+bool
+BuildUtil::DataArray::exists(unsigned int i, unsigned int c)
+{
+ assert(i < arrayLen && c < vecDim);
+ return !regOnly || values[i * vecDim + c];
+}
+
+} // namespace nv50_ir
+
+#endif // __NV50_IR_BUILD_UTIL_H__
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
new file mode 100644
index 00000000000..27e435d4ea1
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
@@ -0,0 +1,149 @@
+
+#ifndef __NV50_IR_DRIVER_H__
+#define __NV50_IR_DRIVER_H__
+
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+
+/*
+ * This struct constitutes linkage information in TGSI terminology.
+ *
+ * It is created by the code generator and handed to the pipe driver
+ * for input/output slot assignment.
+ */
+struct nv50_ir_varying
+{
+ uint8_t slot[4]; /* native slots for xyzw (addresses in 32-bit words) */
+
+ unsigned mask : 4; /* vec4 mask */
+ unsigned linear : 1; /* linearly interpolated if true (and not flat) */
+ unsigned flat : 1;
+ unsigned centroid : 1;
+ unsigned patch : 1; /* patch constant value */
+ unsigned regular : 1; /* driver-specific meaning (e.g. input in sreg) */
+ unsigned input : 1; /* indicates direction of system values */
+ unsigned oread : 1; /* true if output is read from parallel TCP */
+
+ ubyte id; /* TGSI register index */
+ ubyte sn; /* TGSI semantic name */
+ ubyte si; /* TGSI semantic index */
+};
+
+#define NV50_PROGRAM_IR_TGSI 0
+#define NV50_PROGRAM_IR_SM4 1
+#define NV50_PROGRAM_IR_GLSL 2
+#define NV50_PROGRAM_IR_LLVM 3
+
+#ifdef DEBUG
+# define NV50_IR_DEBUG_BASIC (1 << 0)
+# define NV50_IR_DEBUG_VERBOSE (2 << 0)
+# define NV50_IR_DEBUG_REG_ALLOC (1 << 2)
+#else
+# define NV50_IR_DEBUG_BASIC 0
+# define NV50_IR_DEBUG_VERBOSE 0
+# define NV50_IR_DEBUG_REG_ALLOC 0
+#endif
+
+struct nv50_ir_prog_info
+{
+ uint16_t target; /* chipset (0x50, 0x84, 0xc0, ...) */
+
+ uint8_t type; /* PIPE_SHADER */
+
+ uint8_t optLevel; /* optimization level (0 to 3) */
+ uint8_t dbgFlags;
+
+ struct {
+ int16_t maxGPR; /* may be -1 if none used */
+ int16_t maxOutput;
+ uint32_t tlsSpace; /* required local memory per thread */
+ uint32_t *code;
+ uint32_t codeSize;
+ uint8_t sourceRep; /* NV50_PROGRAM_IR */
+ const void *source;
+ void *relocData;
+ } bin;
+
+ struct nv50_ir_varying sv[PIPE_MAX_SHADER_INPUTS];
+ struct nv50_ir_varying in[PIPE_MAX_SHADER_INPUTS];
+ struct nv50_ir_varying out[PIPE_MAX_SHADER_OUTPUTS];
+ uint8_t numInputs;
+ uint8_t numOutputs;
+ uint8_t numPatchConstants; /* also included in numInputs/numOutputs */
+ uint8_t numSysVals;
+
+ struct {
+ uint32_t *buf; /* for IMMEDIATE_ARRAY */
+ uint16_t bufSize; /* size of immediate array */
+ uint16_t count; /* count of inline immediates */
+ uint32_t *data; /* inline immediate data */
+ uint8_t *type; /* for each vec4 (128 bit) */
+ } immd;
+
+ union {
+ struct {
+ uint32_t inputMask[4]; /* mask of attributes read (1 bit per scalar) */
+ } vp;
+ struct {
+ uint8_t inputPatchSize;
+ uint8_t outputPatchSize;
+ uint8_t partitioning; /* PIPE_TESS_PART */
+ int8_t winding; /* +1 (clockwise) / -1 (counter-clockwise) */
+ uint8_t domain; /* PIPE_PRIM_{QUADS,TRIANGLES,LINES} */
+ uint8_t outputPrim; /* PIPE_PRIM_{TRIANGLES,LINES,POINTS} */
+ } tp;
+ struct {
+ uint8_t inputPrim;
+ uint8_t outputPrim;
+ unsigned instanceCount;
+ unsigned maxVertices;
+ } gp;
+ struct {
+ unsigned numColourResults;
+ boolean writesDepth;
+ boolean earlyFragTests;
+ boolean separateFragData;
+ boolean usesDiscard;
+ } fp;
+ } prop;
+
+ struct {
+ uint8_t clipDistance; /* index of first clip distance output */
+ uint8_t clipDistanceCount;
+ uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */
+ uint8_t pointSize; /* output index for PointSize */
+ uint8_t edgeFlagIn;
+ uint8_t edgeFlagOut;
+ uint8_t fragDepth; /* output index of FragDepth */
+ uint8_t sampleMask; /* output index of SampleMask */
+ uint8_t backFaceColor[2]; /* input/output indices of back face colour */
+ uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
+ } io;
+
+ /* driver callback to assign input/output locations */
+ int (*assignSlots)(struct nv50_ir_prog_info *);
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int nv50_ir_generate_code(struct nv50_ir_prog_info *);
+
+extern void nv50_ir_relocate_code(void *relocData, uint32_t *code,
+ uint32_t codePos,
+ uint32_t libPos,
+ uint32_t dataPos);
+
+/* obtain code that will be shared among programs */
+extern void nv50_ir_get_target_library(uint32_t chipset,
+ const uint32_t **code, uint32_t *size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __NV50_IR_DRIVER_H__
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
new file mode 100644
index 00000000000..0a61a1ddaef
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
@@ -0,0 +1,1333 @@
+
+#include "nv50_ir.h"
+#include "nv50_ir_target.h"
+
+namespace nv50_ir {
+
+class CodeEmitterNV50 : public CodeEmitter
+{
+public:
+ CodeEmitterNV50(const Target *);
+
+ virtual bool emitInstruction(Instruction *);
+
+ virtual uint32_t getMinEncodingSize(const Instruction *) const;
+
+ inline void setProgramType(Program::Type pType) { progType = pType; }
+
+private:
+ const Target *targ;
+
+ Program::Type progType;
+
+private:
+ inline void defId(const ValueDef&, const int pos);
+ inline void srcId(const ValueRef&, const int pos);
+ inline void srcId(const ValueRef *, const int pos);
+
+ inline void srcAddr16(const ValueRef&, const int pos);
+ inline void srcAddr8(const ValueRef&, const int pos);
+
+ void emitFlagsRd(const Instruction *);
+ void emitFlagsWr(const Instruction *);
+
+ void emitCondCode(CondCode cc, int pos);
+
+ inline void setARegBits(unsigned int);
+
+ void setAReg16(const Instruction *, int s);
+ void setImmediate(const Instruction *, int s);
+
+ void setDst(const Value *);
+ void setDst(const Instruction *, int d);
+ void emitSrc0(const ValueRef&);
+ void emitSrc1(const ValueRef&);
+ void emitSrc2(const ValueRef&);
+
+ void emitForm_MAD(const Instruction *);
+ void emitForm_ADD(const Instruction *);
+ void emitForm_MUL(const Instruction *);
+ void emitForm_IMM(const Instruction *);
+
+ void emitLoadStoreSize(DataType ty, int pos);
+
+ void roundMode_MAD(const Instruction *);
+ void roundMode_CVT(RoundMode);
+
+ void emitMNeg12(const Instruction *);
+
+ void emitLOAD(const Instruction *);
+ void emitSTORE(const Instruction *);
+ void emitMOV(const Instruction *);
+ void emitNOP();
+ void emitINTERP(const Instruction *);
+ void emitPFETCH(const Instruction *);
+ void emitOUT(const Instruction *);
+
+ void emitUADD(const Instruction *);
+ void emitAADD(const Instruction *);
+ void emitFADD(const Instruction *);
+ void emitUMUL(const Instruction *);
+ void emitFMUL(const Instruction *);
+ void emitFMAD(const Instruction *);
+
+ void emitMINMAX(const Instruction *);
+
+ void emitPreOp(const Instruction *);
+ void emitSFnOp(const Instruction *, uint8_t subOp);
+
+ void emitShift(const Instruction *);
+ void emitARL(const Instruction *);
+ void emitLogicOp(const Instruction *);
+
+ void emitCVT(const Instruction *);
+ void emitSET(const Instruction *);
+
+ void emitTEX(const TexInstruction *);
+
+ void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
+
+ void emitFlow(const Instruction *, uint8_t flowOp);
+};
+
+#define SDATA(a) ((a).rep()->reg.data)
+#define DDATA(a) ((a).rep()->reg.data)
+
+void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
+{
+ assert(src.get());
+ code[pos / 32] |= SDATA(src).id << (pos % 32);
+}
+
+void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
+{
+ assert(src->get());
+ code[pos / 32] |= SDATA(*src).id << (pos % 32);
+}
+
+void CodeEmitterNV50::srcAddr16(const ValueRef& src, const int pos)
+{
+ assert(src.get());
+
+ uint32_t offset = SDATA(src).offset;
+
+ assert(offset <= 0xffff && (pos % 32) <= 16);
+
+ code[pos / 32] |= offset << (pos % 32);
+}
+
+void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
+{
+ assert(src.get());
+
+ uint32_t offset = SDATA(src).offset;
+
+ assert(offset <= 0x1fc && !(offset & 0x3));
+
+ code[pos / 32] |= (offset >> 2) << (pos % 32);
+}
+
+void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
+{
+ assert(def.get());
+ code[pos / 32] |= DDATA(def).id << (pos % 32);
+}
+
+void
+CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
+{
+ switch (insn->rnd) {
+ case ROUND_M: code[1] |= 1 << 22; break;
+ case ROUND_P: code[1] |= 2 << 22; break;
+ case ROUND_Z: code[1] |= 3 << 22; break;
+ default:
+ assert(insn->rnd == ROUND_N);
+ break;
+ }
+}
+
+void
+CodeEmitterNV50::emitMNeg12(const Instruction *i)
+{
+ code[1] |= i->src[0].mod.neg() << 26;
+ code[1] |= i->src[1].mod.neg() << 27;
+}
+
+void CodeEmitterNV50::emitCondCode(CondCode cc, int pos)
+{
+ uint8_t enc;
+
+ assert(pos >= 32 || pos <= 27);
+
+ switch (cc) {
+ case CC_LT: enc = 0x1; break;
+ case CC_LTU: enc = 0x9; break;
+ case CC_EQ: enc = 0x2; break;
+ case CC_EQU: enc = 0xa; break;
+ case CC_LE: enc = 0x3; break;
+ case CC_LEU: enc = 0xb; break;
+ case CC_GT: enc = 0x4; break;
+ case CC_GTU: enc = 0xc; break;
+ case CC_NE: enc = 0x5; break;
+ case CC_NEU: enc = 0xd; break;
+ case CC_GE: enc = 0x6; break;
+ case CC_GEU: enc = 0xe; break;
+ case CC_TR: enc = 0xf; break;
+ case CC_FL: enc = 0x0; break;
+
+ case CC_O: enc = 0x10; break;
+ case CC_C: enc = 0x11; break;
+ case CC_A: enc = 0x12; break;
+ case CC_S: enc = 0x13; break;
+ case CC_NS: enc = 0x1c; break;
+ case CC_NA: enc = 0x1d; break;
+ case CC_NC: enc = 0x1e; break;
+ case CC_NO: enc = 0x1f; break;
+
+ default:
+ enc = 0;
+ assert(!"invalid condition code");
+ break;
+ }
+ code[pos / 32] |= enc << (pos % 32);
+}
+
+void
+CodeEmitterNV50::emitFlagsRd(const Instruction *i)
+{
+ int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
+
+ assert(!(code[1] & 0x00003f80));
+
+ if (s >= 0) {
+ assert(i->getSrc(s)->reg.file == FILE_FLAGS);
+ emitCondCode(i->cc, 32 + 7);
+ srcId(i->src[s], 32 + 12);
+ } else {
+ code[1] |= 0x0780;
+ }
+}
+
+void
+CodeEmitterNV50::emitFlagsWr(const Instruction *i)
+{
+ assert(!(code[1] & 0x70));
+
+ if (i->flagsDef >= 0)
+ code[1] |= (DDATA(i->def[i->flagsDef]).id << 4) | 0x40;
+}
+
+void
+CodeEmitterNV50::setARegBits(unsigned int u)
+{
+ code[0] |= (u & 3) << 26;
+ code[1] |= (u & 4);
+}
+
+void
+CodeEmitterNV50::setAReg16(const Instruction *i, int s)
+{
+ s = i->src[s].indirect[0];
+ if (s >= 0)
+ setARegBits(SDATA(i->src[s]).id + 1);
+}
+
+void
+CodeEmitterNV50::setImmediate(const Instruction *i, int s)
+{
+ const ImmediateValue *imm = i->src[s].get()->asImm();
+ assert(imm);
+
+ code[1] |= 3;
+ code[0] |= (imm->reg.data.u32 & 0x3f) << 16;
+ code[1] |= (imm->reg.data.u32 >> 6) << 2;
+}
+
+void
+CodeEmitterNV50::setDst(const Value *dst)
+{
+ const Storage *reg = &dst->join->reg;
+
+ assert(reg->file != FILE_ADDRESS);
+
+ if (reg->data.id < 0) {
+ code[0] |= (127 << 2) | 1;
+ code[1] |= 8;
+ } else {
+ if (reg->file == FILE_SHADER_OUTPUT)
+ code[1] |= 8;
+ code[0] |= reg->data.id << 2;
+ }
+}
+
+void
+CodeEmitterNV50::setDst(const Instruction *i, int d)
+{
+ if (i->defExists(d)) {
+ setDst(i->getDef(d));
+ } else
+ if (!d) {
+ code[0] |= 0x01fc; // bit bucket
+ code[1] |= 0x0008;
+ }
+}
+
+void
+CodeEmitterNV50::emitSrc0(const ValueRef& ref)
+{
+ const Storage *reg = &ref.rep()->reg;
+
+ if (reg->file == FILE_SHADER_INPUT)
+ code[1] |= 0x00200000;
+ else
+ if (reg->file != FILE_GPR)
+ ERROR("invalid src0 register file: %d\n", reg->file);
+
+ assert(reg->data.id < 128);
+ code[0] |= reg->data.id << 9;
+}
+
+void
+CodeEmitterNV50::emitSrc1(const ValueRef& ref)
+{
+ const Storage *reg = &ref.rep()->reg;
+
+ if (reg->file == FILE_MEMORY_CONST) {
+ assert(!(code[1] & 0x01800000));
+ code[0] |= 1 << 23;
+ code[1] |= reg->fileIndex << 22;
+ } else
+ if (reg->file != FILE_GPR) {
+ ERROR("invalid src1 register file: %d\n", reg->file);
+ }
+
+ assert(reg->data.id < 128);
+ code[0] |= reg->data.id << 16;
+}
+
+void
+CodeEmitterNV50::emitSrc2(const ValueRef& ref)
+{
+ const Storage *reg = &ref.rep()->reg;
+
+ if (reg->file == FILE_MEMORY_CONST) {
+ assert(!(code[1] & 0x01800000));
+ code[0] |= 1 << 24;
+ code[1] |= reg->fileIndex << 22;
+ } else
+ if (reg->file != FILE_GPR) {
+ ERROR("invalid src1 register file: %d\n", reg->file);
+ }
+
+ assert(reg->data.id < 128);
+ code[1] |= reg->data.id << 14;
+}
+
+// the default form:
+// - long instruction
+// - 1 to 3 sources in slots 0, 1, 2
+// - address & flags
+void
+CodeEmitterNV50::emitForm_MAD(const Instruction *i)
+{
+ assert(i->encSize == 8);
+ code[0] |= 1;
+
+ emitFlagsRd(i);
+ emitFlagsWr(i);
+
+ setDst(i, 0);
+
+ if (i->srcExists(0))
+ emitSrc0(i->src[0]);
+
+ if (i->srcExists(1))
+ emitSrc1(i->src[1]);
+
+ if (i->srcExists(2))
+ emitSrc2(i->src[2]);
+
+ setAReg16(i, 1);
+}
+
+// like default form, but 2nd source in slot 2, and no 3rd source
+void
+CodeEmitterNV50::emitForm_ADD(const Instruction *i)
+{
+ assert(i->encSize == 8);
+ code[0] |= 1;
+
+ emitFlagsRd(i);
+ emitFlagsWr(i);
+
+ setDst(i, 0);
+
+ if (i->srcExists(0))
+ emitSrc0(i->src[0]);
+
+ if (i->srcExists(1))
+ emitSrc2(i->src[1]);
+
+ setAReg16(i, 1);
+}
+
+// default short form
+void
+CodeEmitterNV50::emitForm_MUL(const Instruction *i)
+{
+ assert(i->encSize == 4 && !(code[0] & 1));
+ assert(i->defExists(0));
+ assert(!i->getPredicate());
+
+ setDst(i, 0);
+
+ if (i->srcExists(0))
+ emitSrc0(i->src[0]);
+
+ if (i->srcExists(1))
+ emitSrc1(i->src[1]);
+}
+
+// usual immediate form
+// - 1 to 3 sources where last is immediate
+// - no address or predicate possible
+void
+CodeEmitterNV50::emitForm_IMM(const Instruction *i)
+{
+ assert(i->encSize == 8);
+ code[0] |= 1;
+
+ assert(i->defExists(0) && i->srcExists(0));
+
+ setDst(i, 0);
+
+ if (i->srcExists(2)) {
+ emitSrc0(i->src[0]);
+ emitSrc1(i->src[1]);
+ setImmediate(i, 2);
+ } else
+ if (i->srcExists(1)) {
+ emitSrc0(i->src[0]);
+ setImmediate(i, 1);
+ } else {
+ setImmediate(i, 0);
+ }
+}
+
+void
+CodeEmitterNV50::emitLoadStoreSize(DataType ty, int pos)
+{
+ uint8_t enc;
+
+ switch (ty) {
+ case TYPE_F32: // fall through
+ case TYPE_S32: // fall through
+ case TYPE_U32: enc = 0x6; break;
+ case TYPE_B128: enc = 0x5; break;
+ case TYPE_F64: enc = 0x4; break;
+ case TYPE_S16: enc = 0x3; break;
+ case TYPE_U16: enc = 0x2; break;
+ case TYPE_S8: enc = 0x1; break;
+ case TYPE_U8: enc = 0x0; break;
+ default:
+ enc = 0;
+ assert(!"invalid load/store type");
+ break;
+ }
+ code[pos / 32] |= enc << (pos % 32);
+}
+
+void
+CodeEmitterNV50::emitLOAD(const Instruction *i)
+{
+ DataFile sf = i->src[0].getFile();
+
+ switch (sf) {
+ case FILE_SHADER_INPUT:
+ code[0] = 0x10000001;
+ code[1] = 0x04200000 | (i->lanes << 14);
+ break;
+ case FILE_MEMORY_CONST:
+ code[0] = 0x10000001;
+ code[1] = 0x24000000 | (i->getSrc(0)->reg.fileIndex << 22);
+ break;
+ case FILE_MEMORY_LOCAL:
+ code[0] = 0xd0000001;
+ code[1] = 0x40000000;
+ break;
+ case FILE_MEMORY_GLOBAL:
+ code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
+ code[1] = 0x80000000;
+ break;
+ default:
+ assert(!"invalid load source file");
+ break;
+ }
+ if (sf == FILE_MEMORY_LOCAL ||
+ sf == FILE_MEMORY_GLOBAL)
+ emitLoadStoreSize(i->sType, 21 + 32);
+
+ setDst(i, 0);
+
+ emitFlagsRd(i);
+ emitFlagsWr(i);
+
+ if (i->src[0].getFile() == FILE_MEMORY_GLOBAL) {
+ srcId(*i->src[0].getIndirect(0), 9);
+ } else {
+ setAReg16(i, 0);
+ srcAddr16(i->src[0], 9);
+ }
+}
+
+void
+CodeEmitterNV50::emitSTORE(const Instruction *i)
+{
+ DataFile f = i->getSrc(0)->reg.file;
+ int32_t offset = i->getSrc(0)->reg.data.offset;
+
+ switch (f) {
+ case FILE_SHADER_OUTPUT:
+ code[0] = 0x00000001 | ((offset >> 2) << 2);
+ code[1] = 0x80c00000;
+ srcId(i->src[1], 32 + 15);
+ break;
+ case FILE_MEMORY_GLOBAL:
+ code[0] = 0xd0000000;
+ code[1] = 0xa0000000;
+ emitLoadStoreSize(i->dType, 21 + 32);
+ break;
+ case FILE_MEMORY_LOCAL:
+ code[0] = 0xd0000001;
+ code[1] = 0x60000000;
+ emitLoadStoreSize(i->dType, 21 + 32);
+ break;
+ case FILE_MEMORY_SHARED:
+ code[0] = 0x00000001;
+ code[1] = 0xe0000000;
+ switch (typeSizeof(i->dType)) {
+ case 1:
+ code[0] |= offset << 9;
+ code[1] |= 0x00400000;
+ break;
+ case 2:
+ code[0] |= (offset >> 1) << 9;
+ break;
+ case 4:
+ code[0] |= (offset >> 2) << 9;
+ code[1] |= 0x04000000;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ default:
+ assert(!"invalid store destination file");
+ break;
+ }
+
+ if (f != FILE_SHADER_OUTPUT) {
+ srcId(i->src[1], 2);
+ if (f == FILE_MEMORY_GLOBAL)
+ srcId(*i->src[0].getIndirect(0), 9);
+ if (f == FILE_MEMORY_LOCAL)
+ srcAddr16(i->src[0], 9);
+ }
+ if (f != FILE_MEMORY_GLOBAL)
+ setAReg16(i, 0);
+
+ emitFlagsRd(i);
+}
+
+void
+CodeEmitterNV50::emitMOV(const Instruction *i)
+{
+ DataFile sf = i->getSrc(0)->reg.file;
+ DataFile df = i->getDef(0)->reg.file;
+
+ assert(sf == FILE_GPR || df == FILE_GPR);
+
+ if (sf == FILE_FLAGS) {
+ code[0] = 0x00000001;
+ code[1] = 0x20000000;
+ defId(i->def[0], 2);
+ srcId(i->src[0], 12);
+ emitFlagsRd(i);
+ } else
+ if (sf == FILE_ADDRESS) {
+ code[0] = 0x00000001;
+ code[1] = 0x40000000;
+ defId(i->def[0], 2);
+ setARegBits(SDATA(i->src[0]).id + 1);
+ } else
+ if (df == FILE_FLAGS) {
+ code[0] = 0x00000001;
+ code[1] = 0xa0000000;
+ defId(i->def[0], 4);
+ srcId(i->src[0], 9);
+ emitFlagsRd(i);
+ } else
+ if (sf == FILE_IMMEDIATE) {
+ code[0] = 0x10008001;
+ code[1] = 0x00000003;
+ emitForm_IMM(i);
+ } else {
+ if (i->encSize == 4) {
+ code[0] = 0x10008000;
+ } else {
+ code[0] = 0x10000001;
+ code[1] = 0x04000000 | (i->lanes << 14);
+ }
+ defId(i->def[0], 2);
+ srcId(i->src[0], 9);
+ }
+ if (df == FILE_SHADER_OUTPUT) {
+ assert(i->encSize == 8);
+ code[1] |= 0x8;
+ }
+}
+
+void
+CodeEmitterNV50::emitNOP()
+{
+ code[0] = 0xf0000001;
+ code[1] = 0xe0000000;
+}
+
+void
+CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
+{
+ code[0] = 0xc0000000 | (lane << 16);
+ code[1] = 0x80000000;
+
+ code[0] |= (quOp & 0x03) << 20;
+ code[1] |= (quOp & 0xfc) << 20;
+
+ emitForm_ADD(i);
+
+ if (!i->srcExists(1))
+ srcId(i->src[0], 32 + 14);
+}
+
+void
+CodeEmitterNV50::emitPFETCH(const Instruction *i)
+{
+ code[0] = 0x11800001;
+ code[1] = 0x04200000 | (0xf << 14);
+
+ defId(i->def[0], 2);
+ srcAddr8(i->src[0], 9);
+ setAReg16(i, 0);
+}
+
+void
+CodeEmitterNV50::emitINTERP(const Instruction *i)
+{
+ code[0] = 0x80000000;
+
+ defId(i->def[0], 2);
+ srcAddr8(i->src[0], 16);
+
+ if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
+ code[0] |= 1 << 8;
+ } else {
+ if (i->op == OP_PINTERP) {
+ code[0] |= 1 << 25;
+ srcId(i->src[1], 9);
+ }
+ if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
+ code[0] |= 1 << 24;
+ }
+
+ if (i->encSize == 8) {
+ emitFlagsRd(i);
+ code[1] |=
+ (code[0] & (3 << 24)) >> (24 - 16) |
+ (code[0] & (1 << 8)) >> (18 - 8);
+ code[0] &= ~0x03000100;
+ code[0] |= 1;
+ }
+}
+
+void
+CodeEmitterNV50::emitMINMAX(const Instruction *i)
+{
+ if (i->dType == TYPE_F64) {
+ code[0] = 0xe0000000;
+ code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
+ } else {
+ code[0] = 0x30000000;
+ code[1] = 0x80000000;
+ if (i->op == OP_MIN)
+ code[1] |= 0x20000000;
+
+ switch (i->dType) {
+ case TYPE_F32: code[0] |= 0x80000000; break;
+ case TYPE_S32: code[1] |= 0x8c000000; break;
+ case TYPE_U32: code[1] |= 0x84000000; break;
+ case TYPE_S16: code[1] |= 0x80000000; break;
+ case TYPE_U16: break;
+ default:
+ assert(0);
+ break;
+ }
+ code[1] |= i->src[0].mod.abs() << 20;
+ code[1] |= i->src[1].mod.abs() << 19;
+ }
+ emitForm_MAD(i);
+}
+
+void
+CodeEmitterNV50::emitFMAD(const Instruction *i)
+{
+ const int neg_mul = i->src[0].mod.neg() ^ i->src[1].mod.neg();
+ const int neg_add = i->src[2].mod.neg();
+
+ code[0] = 0xe0000000;
+
+ if (i->encSize == 4) {
+ emitForm_MUL(i);
+ assert(!neg_mul && !neg_add);
+ } else {
+ emitForm_MAD(i);
+ code[1] |= neg_mul << 26;
+ code[1] |= neg_add << 27;
+ if (i->saturate)
+ code[1] |= 1 << 29;
+ }
+}
+
+void
+CodeEmitterNV50::emitFADD(const Instruction *i)
+{
+ const int neg0 = i->src[0].mod.neg();
+ const int neg1 = i->src[1].mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
+
+ code[0] = 0xb0000000;
+
+ assert(!(i->src[0].mod | i->src[1].mod).abs());
+
+ if (i->src[1].getFile() == FILE_IMMEDIATE) {
+ emitForm_IMM(i);
+ code[0] |= neg0 << 15;
+ code[0] |= neg1 << 22;
+ } else
+ if (i->encSize == 8) {
+ emitForm_ADD(i);
+ code[1] |= neg0 << 26;
+ code[1] |= neg1 << 27;
+ if (i->saturate)
+ code[1] |= 1 << 29;
+ } else {
+ emitForm_MUL(i);
+ code[0] |= neg0 << 15;
+ code[0] |= neg1 << 22;
+ }
+}
+
+void
+CodeEmitterNV50::emitUADD(const Instruction *i)
+{
+ code[0] = 0x20008000;
+
+ if (i->src[0].getFile() == FILE_IMMEDIATE) {
+ emitForm_IMM(i);
+ } else
+ if (i->encSize == 8) {
+ code[0] = 0x20000000;
+ code[1] = 0x04000000;
+ emitForm_ADD(i);
+ } else {
+ emitForm_MUL(i);
+ }
+ assert(!(i->src[0].mod.neg() && i->src[1].mod.neg()));
+ code[0] |= i->src[0].mod.neg() << 28;
+ code[0] |= i->src[1].mod.neg() << 22;
+}
+
+void
+CodeEmitterNV50::emitAADD(const Instruction *i)
+{
+ const int s = (i->op == OP_MOV) ? 0 : 1;
+
+ code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
+ code[1] = 0x20000000;
+
+ code[0] |= (DDATA(i->def[0]).id + 1) << 2;
+
+ emitFlagsRd(i);
+
+ if (s && i->srcExists(0))
+ setARegBits(SDATA(i->src[0]).id + 1);
+}
+
+void
+CodeEmitterNV50::emitFMUL(const Instruction *i)
+{
+ const int neg = (i->src[0].mod ^ i->src[1].mod).neg();
+
+ code[0] = 0xc0000000;
+
+ if (i->src[0].getFile() == FILE_IMMEDIATE) {
+ emitForm_IMM(i);
+ if (neg)
+ code[0] |= 0x8000;
+ } else
+ if (i->encSize == 8) {
+ emitForm_MAD(i);
+ if (neg)
+ code[1] |= 0x08000000;
+ } else {
+ emitForm_MUL(i);
+ if (neg)
+ code[0] |= 0x8000;
+ }
+}
+
+void
+CodeEmitterNV50::emitSET(const Instruction *i)
+{
+ code[0] = 0x30000000;
+ code[1] = 0x60000000;
+
+ emitCondCode(i->asCmp()->setCond, 32 + 14);
+
+ switch (i->sType) {
+ case TYPE_F32: code[0] |= 0x80000000; break;
+ case TYPE_S32: code[1] |= 0x0c000000; break;
+ case TYPE_U32: code[1] |= 0x04000000; break;
+ case TYPE_S16: code[1] |= 0x08000000; break;
+ case TYPE_U16: break;
+ default:
+ assert(0);
+ break;
+ }
+ emitForm_MAD(i);
+}
+
+void
+CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
+{
+ switch (rnd) {
+ case ROUND_NI: code[1] |= 0x08000000; break;
+ case ROUND_M: code[1] |= 0x00020000; break;
+ case ROUND_MI: code[1] |= 0x08020000; break;
+ case ROUND_P: code[1] |= 0x00040000; break;
+ case ROUND_PI: code[1] |= 0x08040000; break;
+ case ROUND_Z: code[1] |= 0x00060000; break;
+ case ROUND_ZI: code[1] |= 0x08060000; break;
+ default:
+ assert(rnd == ROUND_N);
+ break;
+ }
+}
+
+void
+CodeEmitterNV50::emitCVT(const Instruction *i)
+{
+ const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
+ RoundMode rnd;
+
+ switch (i->op) {
+ case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
+ case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
+ case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
+ default:
+ rnd = i->rnd;
+ break;
+ }
+
+ code[0] = 0xa0000000;
+
+ switch (i->dType) {
+ case TYPE_F64:
+ switch (i->sType) {
+ case TYPE_F64: code[1] = 0xc4404000; break;
+ case TYPE_S64: code[1] = 0x44414000; break;
+ case TYPE_U64: code[1] = 0x44404000; break;
+ case TYPE_F32: code[1] = 0xc4400000; break;
+ case TYPE_S32: code[1] = 0x44410000; break;
+ case TYPE_U32: code[1] = 0x44400000; break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case TYPE_S64:
+ switch (i->sType) {
+ case TYPE_F64: code[1] = 0x8c404000; break;
+ case TYPE_F32: code[1] = 0x8c400000; break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case TYPE_U64:
+ switch (i->sType) {
+ case TYPE_F64: code[1] = 0x84404000; break;
+ case TYPE_F32: code[1] = 0x84400000; break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case TYPE_F32:
+ switch (i->sType) {
+ case TYPE_F64: code[1] = 0xc0404000; break;
+ case TYPE_S64: code[1] = 0x40414000; break;
+ case TYPE_U64: code[1] = 0x40404000; break;
+ case TYPE_F32: code[1] = 0xc4004000; break;
+ case TYPE_S32: code[1] = 0x44014000; break;
+ case TYPE_U32: code[1] = 0x44004000; break;
+ case TYPE_F16: code[1] = 0xc4000000; break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case TYPE_S32:
+ switch (i->sType) {
+ case TYPE_F64: code[1] = 0x88404000; break;
+ case TYPE_F32: code[1] = 0x8c004000; break;
+ case TYPE_S32: code[1] = 0x0c014000; break;
+ case TYPE_U32: code[1] = 0x0c004000; break;
+ case TYPE_F16: code[1] = 0x8c000000; break;
+ case TYPE_S16: code[1] = 0x0c010000; break;
+ case TYPE_U16: code[1] = 0x0c000000; break;
+ case TYPE_S8: code[1] = 0x0c018000; break;
+ case TYPE_U8: code[1] = 0x0c008000; break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case TYPE_U32:
+ switch (i->sType) {
+ case TYPE_F64: code[1] = 0x80404000; break;
+ case TYPE_F32: code[1] = 0x84004000; break;
+ case TYPE_S32: code[1] = 0x04014000; break;
+ case TYPE_U32: code[1] = 0x04004000; break;
+ case TYPE_F16: code[1] = 0x84000000; break;
+ case TYPE_S16: code[1] = 0x04010000; break;
+ case TYPE_U16: code[1] = 0x04000000; break;
+ case TYPE_S8: code[1] = 0x04018000; break;
+ case TYPE_U8: code[1] = 0x04008000; break;
+ default:
+ assert(0);
+ break;
+ }
+ case TYPE_S16:
+ case TYPE_U16:
+ case TYPE_S8:
+ case TYPE_U8:
+ default:
+ assert(0);
+ break;
+ }
+ if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
+ code[1] |= 0x00004000;
+
+ roundMode_CVT(rnd);
+
+ switch (i->op) {
+ case OP_ABS: code[1] |= 1 << 20; break;
+ case OP_SAT: code[1] |= 1 << 19; break;
+ case OP_NEG: code[1] |= 1 << 29; break;
+ default:
+ break;
+ }
+ code[1] ^= i->src[0].mod.neg() << 29;
+ code[1] |= i->src[0].mod.abs() << 20;
+ if (i->saturate)
+ code[1] |= 1 << 19;
+
+ assert(i->op != OP_ABS || !i->src[0].mod.neg());
+
+ emitForm_MAD(i);
+}
+
+void
+CodeEmitterNV50::emitPreOp(const Instruction *i)
+{
+ code[0] = 0xb0000000;
+ code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
+
+ code[1] |= i->src[0].mod.abs() << 20;
+ code[1] |= i->src[0].mod.neg() << 26;
+
+ emitForm_MAD(i);
+}
+
+void
+CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
+{
+ code[0] = 0x90000000;
+
+ if (i->encSize == 4) {
+ assert(i->op == OP_RCP);
+ emitForm_MUL(i);
+ } else {
+ code[1] = subOp << 29;
+ code[1] |= i->src[0].mod.abs() << 20;
+ code[1] |= i->src[0].mod.neg() << 26;
+ emitForm_MAD(i);
+ }
+}
+
+void
+CodeEmitterNV50::emitLogicOp(const Instruction *i)
+{
+ code[0] = 0xd0000000;
+
+ if (i->src[1].getFile() == FILE_IMMEDIATE) {
+ switch (i->op) {
+ case OP_OR: code[0] |= 0x0100; break;
+ case OP_XOR: code[0] |= 0x8000; break;
+ default:
+ assert(i->op == OP_AND);
+ break;
+ }
+ emitForm_IMM(i);
+ } else {
+ switch (i->op) {
+ case OP_AND: code[1] = 0x04000000; break;
+ case OP_OR: code[1] = 0x04004000; break;
+ case OP_XOR: code[1] = 0x04008000; break;
+ default:
+ assert(0);
+ break;
+ }
+ emitForm_MAD(i);
+ }
+}
+
+void
+CodeEmitterNV50::emitARL(const Instruction *i)
+{
+ assert(i->src[1].getFile() == FILE_IMMEDIATE);
+
+ code[0] = 0x00000001 | (i->getSrc(1)->reg.data.u32 & 0x3f) << 16;
+ code[1] = 0xc0000000;
+
+ code[0] |= (DDATA(i->def[0]).id + 1) << 2;
+ emitSrc0(i->src[0]);
+ emitFlagsRd(i);
+}
+
+void
+CodeEmitterNV50::emitShift(const Instruction *i)
+{
+ if (i->def[0].getFile() == FILE_ADDRESS) {
+ emitARL(i);
+ } else {
+ code[0] = 0x30000001;
+ code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
+ if (isSignedType(i->sType))
+ code[1] |= 1 << 27;
+
+ if (i->src[1].getFile() == FILE_IMMEDIATE) {
+ code[1] |= 1 << 20;
+ code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
+ emitFlagsRd(i);
+ } else {
+ emitForm_MAD(i);
+ }
+ }
+}
+
+void
+CodeEmitterNV50::emitOUT(const Instruction *i)
+{
+ code[0] = (i->op == OP_EMIT) ? 0xf0000200 : 0xf0000400;
+ code[1] = 0xc0000001;
+
+ emitFlagsRd(i);
+}
+
+void
+CodeEmitterNV50::emitTEX(const TexInstruction *i)
+{
+ code[0] = 0xf0000001;
+ code[1] = 0x00000000;
+
+ switch (i->op) {
+ case OP_TXB:
+ code[1] = 0x20000000;
+ break;
+ case OP_TXL:
+ code[1] = 0x40000000;
+ break;
+ case OP_TXF:
+ code[0] = 0x01000000;
+ break;
+ case OP_TXG:
+ code[0] = 0x01000000;
+ code[1] = 0x80000000;
+ break;
+ default:
+ assert(i->op == OP_TEX);
+ break;
+ }
+
+ code[0] |= i->tex.r << 9;
+ code[0] |= i->tex.s << 17;
+
+ int argc = i->tex.target.getArgCount();
+
+ if (i->op == OP_TXB || i->op == OP_TXL)
+ argc += 1;
+ if (i->tex.target.isShadow())
+ argc += 1;
+ assert(argc <= 4);
+
+ code[0] |= (argc - 1) << 22;
+
+ if (i->tex.target.isCube()) {
+ code[0] |= 0x08000000;
+ } else
+ if (i->tex.useOffsets) {
+ code[1] |= (i->tex.offset[0][0] & 0xf) << 16;
+ code[1] |= (i->tex.offset[0][1] & 0xf) << 20;
+ code[1] |= (i->tex.offset[0][2] & 0xf) << 24;
+ }
+
+ code[0] |= (i->tex.mask & 0x3) << 25;
+ code[1] |= (i->tex.mask & 0xc) << 12;
+
+ if (i->tex.liveOnly)
+ code[1] |= 4;
+
+ defId(i->def[0], 2);
+
+ emitFlagsRd(i);
+}
+
+void
+CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
+{
+ const FlowInstruction *f = i->asFlow();
+
+ code[0] = 0x00000003 | (flowOp << 28);
+ code[1] = 0x00000000;
+
+ emitFlagsRd(i);
+
+ if (f && f->target.bb) {
+ uint32_t pos;
+
+ if (f->op == OP_CALL) {
+ if (f->builtin) {
+ pos = 0; // XXX: TODO
+ } else {
+ pos = f->target.fn->binPos;
+ }
+ } else {
+ pos = f->target.bb->binPos;
+ }
+
+ code[0] |= ((pos >> 2) & 0xffff) << 11;
+ code[1] |= ((pos >> 18) & 0x003f) << 14;
+ }
+}
+
+bool
+CodeEmitterNV50::emitInstruction(Instruction *insn)
+{
+ if (!insn->encSize) {
+ ERROR("skipping unencodable instruction: "); insn->print();
+ return false;
+ } else
+ if (codeSize + insn->encSize > codeSizeLimit) {
+ ERROR("code emitter output buffer too small\n");
+ return false;
+ }
+
+ switch (insn->op) {
+ case OP_MOV:
+ emitMOV(insn);
+ break;
+ case OP_NOP:
+ case OP_JOIN:
+ emitNOP();
+ break;
+ case OP_VFETCH:
+ case OP_LOAD:
+ emitLOAD(insn);
+ break;
+ case OP_EXPORT:
+ case OP_STORE:
+ emitSTORE(insn);
+ break;
+ case OP_PFETCH:
+ emitPFETCH(insn);
+ break;
+ case OP_LINTERP:
+ case OP_PINTERP:
+ emitINTERP(insn);
+ break;
+ case OP_ADD:
+ case OP_SUB:
+ if (isFloatType(insn->dType))
+ emitFADD(insn);
+ else
+ emitUADD(insn);
+ break;
+ case OP_MUL:
+ if (isFloatType(insn->dType))
+ emitFMUL(insn);
+ else
+ emitUMUL(insn);
+ break;
+ case OP_MAD:
+ case OP_FMA:
+ emitFMAD(insn);
+ break;
+ break;
+ case OP_AND:
+ case OP_OR:
+ case OP_XOR:
+ emitLogicOp(insn);
+ break;
+ case OP_MIN:
+ case OP_MAX:
+ emitMINMAX(insn);
+ break;
+ case OP_CEIL:
+ case OP_FLOOR:
+ case OP_TRUNC:
+ case OP_CVT:
+ emitCVT(insn);
+ break;
+ case OP_RCP:
+ emitSFnOp(insn, 0);
+ break;
+ case OP_RSQ:
+ emitSFnOp(insn, 2);
+ break;
+ case OP_LG2:
+ emitSFnOp(insn, 3);
+ break;
+ case OP_SIN:
+ emitSFnOp(insn, 4);
+ break;
+ case OP_COS:
+ emitSFnOp(insn, 5);
+ break;
+ case OP_EX2:
+ emitSFnOp(insn, 6);
+ break;
+ case OP_PRESIN:
+ case OP_PREEX2:
+ emitPreOp(insn);
+ break;
+ case OP_TEX:
+ case OP_TXB:
+ case OP_TXL:
+ emitTEX(insn->asTex());
+ break;
+ case OP_EMIT:
+ case OP_RESTART:
+ emitOUT(insn);
+ break;
+ case OP_DISCARD:
+ emitFlow(insn, 0x0);
+ break;
+ case OP_BRA:
+ emitFlow(insn, 0x1);
+ break;
+ case OP_CALL:
+ emitFlow(insn, 0x2);
+ break;
+ case OP_RET:
+ emitFlow(insn, 0x3);
+ break;
+ case OP_PREBREAK:
+ emitFlow(insn, 0x4);
+ break;
+ case OP_BREAK:
+ emitFlow(insn, 0x5);
+ break;
+ case OP_QUADON:
+ emitFlow(insn, 0x6);
+ break;
+ case OP_QUADPOP:
+ emitFlow(insn, 0x7);
+ break;
+ case OP_JOINAT:
+ emitFlow(insn, 0xa);
+ break;
+ case OP_PRERET:
+ emitFlow(insn, 0xd);
+ break;
+ case OP_QUADOP:
+ emitQUADOP(insn, insn->lanes, insn->subOp);
+ break;
+ case OP_DFDX:
+ emitQUADOP(insn, 4, insn->src[0].mod.neg() ? 0x66 : 0x99);
+ break;
+ case OP_DFDY:
+ emitQUADOP(insn, 5, insn->src[0].mod.neg() ? 0x5a : 0xa5);
+ break;
+ case OP_PHI:
+ case OP_UNION:
+ case OP_CONSTRAINT:
+ ERROR("operation should have been eliminated");
+ return false;
+ case OP_EXP:
+ case OP_LOG:
+ case OP_SQRT:
+ case OP_POW:
+ case OP_SELP:
+ case OP_SLCT:
+ case OP_TXD:
+ case OP_PRECONT:
+ case OP_CONT:
+ case OP_POPCNT:
+ case OP_INSBF:
+ case OP_EXTBF:
+ ERROR("operation should have been lowered\n");
+ return false;
+ default:
+ ERROR("unknow op\n");
+ return false;
+ }
+ if (insn->join)
+ code[1] |= 0x2;
+ else
+ if (insn->exit)
+ code[1] |= 0x1;
+
+ assert((insn->encSize == 8) == (code[1] & 1));
+
+ code += insn->encSize / 4;
+ codeSize += insn->encSize;
+ return true;
+}
+
+uint32_t
+CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
+{
+ const Target::OpInfo &info = targ->getOpInfo(i);
+
+ if (info.minEncSize == 8)
+ return 8;
+
+ return 4;
+}
+
+CodeEmitterNV50::CodeEmitterNV50(const Target *target) : targ(target)
+{
+ code = NULL;
+ codeSize = codeSizeLimit = 0;
+}
+
+CodeEmitter *
+Target::getCodeEmitter(Program::Type type)
+{
+ CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
+ emit->setProgramType(type);
+ return emit;
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
new file mode 100644
index 00000000000..c2f464de31b
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
@@ -0,0 +1,2288 @@
+
+extern "C" {
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
+}
+
+#include "nv50_ir.h"
+#include "nv50_ir_util.h"
+#include "nv50_ir_build_util.h"
+
+namespace tgsi {
+
+class Source;
+
+static nv50_ir::operation translateOpcode(uint opcode);
+static nv50_ir::DataFile translateFile(uint file);
+static nv50_ir::TexTarget translateTexture(uint texTarg);
+static nv50_ir::SVSemantic translateSysVal(uint sysval);
+
+class Instruction
+{
+public:
+ Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { }
+
+ class SrcRegister
+ {
+ public:
+ SrcRegister(const struct tgsi_full_src_register *src)
+ : reg(src->Register),
+ fsr(src)
+ { }
+
+ SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { }
+
+ struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off)
+ {
+ struct tgsi_src_register reg;
+ memset(&reg, 0, sizeof(reg));
+ reg.Index = off.Index;
+ reg.File = off.File;
+ reg.SwizzleX = off.SwizzleX;
+ reg.SwizzleY = off.SwizzleY;
+ reg.SwizzleZ = off.SwizzleZ;
+ return reg;
+ }
+
+ SrcRegister(const struct tgsi_texture_offset& off) :
+ reg(offsetToSrc(off)),
+ fsr(NULL)
+ { }
+
+ uint getFile() const { return reg.File; }
+
+ bool is2D() const { return reg.Dimension; }
+
+ bool isIndirect(int dim) const
+ {
+ return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect;
+ }
+
+ int getIndex(int dim) const
+ {
+ return (dim && fsr) ? fsr->Dimension.Index : reg.Index;
+ }
+
+ int getSwizzle(int chan) const
+ {
+ return tgsi_util_get_src_register_swizzle(&reg, chan);
+ }
+
+ nv50_ir::Modifier getMod(int chan) const;
+
+ SrcRegister getIndirect(int dim) const
+ {
+ assert(fsr && isIndirect(dim));
+ if (dim)
+ return SrcRegister(fsr->DimIndirect);
+ return SrcRegister(fsr->Indirect);
+ }
+
+ uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const
+ {
+ assert(reg.File == TGSI_FILE_IMMEDIATE);
+ assert(!reg.Absolute);
+ assert(!reg.Negate);
+ return info->immd.data[reg.Index * 4 + getSwizzle(c)];
+ }
+
+ private:
+ const struct tgsi_src_register reg;
+ const struct tgsi_full_src_register *fsr;
+ };
+
+ class DstRegister
+ {
+ public:
+ DstRegister(const struct tgsi_full_dst_register *dst)
+ : reg(dst->Register),
+ fdr(dst)
+ { }
+
+ DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { }
+
+ uint getFile() const { return reg.File; }
+
+ bool is2D() const { return reg.Dimension; }
+
+ bool isIndirect(int dim) const
+ {
+ return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect;
+ }
+
+ int getIndex(int dim) const
+ {
+ return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index;
+ }
+
+ unsigned int getMask() const { return reg.WriteMask; }
+
+ bool isMasked(int chan) const { return !(getMask() & (1 << chan)); }
+
+ SrcRegister getIndirect(int dim) const
+ {
+ assert(fdr && isIndirect(dim));
+ if (dim)
+ return SrcRegister(fdr->DimIndirect);
+ return SrcRegister(fdr->Indirect);
+ }
+
+ private:
+ const struct tgsi_dst_register reg;
+ const struct tgsi_full_dst_register *fdr;
+ };
+
+ inline uint getOpcode() const { return insn->Instruction.Opcode; }
+
+ unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; }
+ unsigned int dstCount() const { return insn->Instruction.NumDstRegs; }
+
+ // mask of used components of source s
+ unsigned int srcMask(unsigned int s) const;
+
+ SrcRegister getSrc(unsigned int s) const
+ {
+ assert(s < srcCount());
+ return SrcRegister(&insn->Src[s]);
+ }
+
+ DstRegister getDst(unsigned int d) const
+ {
+ assert(d < dstCount());
+ return DstRegister(&insn->Dst[d]);
+ }
+
+ SrcRegister getTexOffset(unsigned int i) const
+ {
+ assert(i < TGSI_FULL_MAX_TEX_OFFSETS);
+ return SrcRegister(insn->TexOffsets[i]);
+ }
+
+ unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; }
+
+ bool checkDstSrcAliasing() const;
+
+ inline nv50_ir::operation getOP() const {
+ return translateOpcode(getOpcode()); }
+
+ nv50_ir::DataType inferSrcType() const;
+ nv50_ir::DataType inferDstType() const;
+
+ nv50_ir::CondCode getSetCond() const;
+
+ nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
+
+ inline uint getLabel() { return insn->Label.Label; }
+
+ unsigned getSaturate() const { return insn->Instruction.Saturate; }
+
+ void print() const
+ {
+ tgsi_dump_instruction(insn, 1);
+ }
+
+private:
+ const struct tgsi_full_instruction *insn;
+};
+
+unsigned int Instruction::srcMask(unsigned int s) const
+{
+ unsigned int mask = insn->Dst[0].Register.WriteMask;
+
+ switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
+ case TGSI_OPCODE_DP3:
+ return 0x7;
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_KIL: /* WriteMask ignored */
+ return 0xf;
+ case TGSI_OPCODE_DST:
+ return mask & (s ? 0xa : 0x6);
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_EXP:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_LOG:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SCS:
+ return 0x1;
+ case TGSI_OPCODE_IF:
+ return 0x1;
+ case TGSI_OPCODE_LIT:
+ return 0xb;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXD:
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXP:
+ {
+ const struct tgsi_instruction_texture *tex = &insn->Texture;
+
+ assert(insn->Instruction.Texture);
+
+ mask = 0x7;
+ if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
+ insn->Instruction.Opcode != TGSI_OPCODE_TXD)
+ mask |= 0x8; /* bias, lod or proj */
+
+ switch (tex->Texture) {
+ case TGSI_TEXTURE_1D:
+ mask &= 0x9;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ mask &= 0x5;
+ break;
+ case TGSI_TEXTURE_1D_ARRAY:
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ mask &= 0xb;
+ break;
+ default:
+ break;
+ }
+ }
+ return mask;
+ case TGSI_OPCODE_XPD:
+ {
+ unsigned int x = 0;
+ if (mask & 1) x |= 0x6;
+ if (mask & 2) x |= 0x5;
+ if (mask & 4) x |= 0x3;
+ return x;
+ }
+ default:
+ break;
+ }
+
+ return mask;
+}
+
+nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const
+{
+ nv50_ir::Modifier m(0);
+
+ if (reg.Absolute)
+ m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS);
+ if (reg.Negate)
+ m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG);
+ return m;
+}
+
+static nv50_ir::DataFile translateFile(uint file)
+{
+ switch (file) {
+ case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST;
+ case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT;
+ case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT;
+ case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR;
+ case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS;
+ case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
+ case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
+ case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
+ case TGSI_FILE_IMMEDIATE_ARRAY: return nv50_ir::FILE_IMMEDIATE;
+ case TGSI_FILE_TEMPORARY_ARRAY: return nv50_ir::FILE_MEMORY_LOCAL;
+ case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
+ case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_NULL:
+ default:
+ return nv50_ir::FILE_NULL;
+ }
+}
+
+static nv50_ir::SVSemantic translateSysVal(uint sysval)
+{
+ switch (sysval) {
+ case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE;
+ case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE;
+ case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID;
+ case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
+ default:
+ assert(0);
+ return nv50_ir::SV_CLOCK;
+ }
+}
+
+#define NV50_IR_TEX_TARG_CASE(a, b) \
+ case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b;
+
+static nv50_ir::TexTarget translateTexture(uint tex)
+{
+ switch (tex) {
+ NV50_IR_TEX_TARG_CASE(1D, 1D);
+ NV50_IR_TEX_TARG_CASE(2D, 2D);
+ NV50_IR_TEX_TARG_CASE(3D, 3D);
+ NV50_IR_TEX_TARG_CASE(CUBE, CUBE);
+ NV50_IR_TEX_TARG_CASE(RECT, RECT);
+ NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY);
+ NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY);
+ NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW);
+ NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW);
+ NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW);
+ NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);
+ NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);
+
+ case TGSI_TEXTURE_UNKNOWN:
+ default:
+ assert(!"invalid texture target");
+ return nv50_ir::TEX_TARGET_2D;
+ }
+}
+
+nv50_ir::DataType Instruction::inferSrcType() const
+{
+ switch (getOpcode()) {
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_XOR:
+ case TGSI_OPCODE_U2F:
+ case TGSI_OPCODE_UADD:
+ case TGSI_OPCODE_UDIV:
+ case TGSI_OPCODE_UMOD:
+ case TGSI_OPCODE_UMAD:
+ case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_UMAX:
+ case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ case TGSI_OPCODE_USHR:
+ case TGSI_OPCODE_UCMP:
+ return nv50_ir::TYPE_U32;
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_IDIV:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_INEG:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version
+ case TGSI_OPCODE_MOD:
+ case TGSI_OPCODE_UARL:
+ return nv50_ir::TYPE_S32;
+ default:
+ return nv50_ir::TYPE_F32;
+ }
+}
+
+nv50_ir::DataType Instruction::inferDstType() const
+{
+ switch (getOpcode()) {
+ case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;
+ case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_U2F:
+ return nv50_ir::TYPE_F32;
+ default:
+ return inferSrcType();
+ }
+}
+
+nv50_ir::CondCode Instruction::getSetCond() const
+{
+ using namespace nv50_ir;
+
+ switch (getOpcode()) {
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_USLT:
+ return CC_LT;
+ case TGSI_OPCODE_SLE:
+ return CC_LE;
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_USGE:
+ return CC_GE;
+ case TGSI_OPCODE_SGT:
+ return CC_GT;
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_USEQ:
+ return CC_EQ;
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_USNE:
+ return CC_NE;
+ case TGSI_OPCODE_SFL:
+ return CC_NEVER;
+ case TGSI_OPCODE_STR:
+ default:
+ return CC_ALWAYS;
+ }
+}
+
+#define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b
+
+static nv50_ir::operation translateOpcode(uint opcode)
+{
+ switch (opcode) {
+ NV50_IR_OPCODE_CASE(ARL, SHL);
+ NV50_IR_OPCODE_CASE(MOV, MOV);
+
+ NV50_IR_OPCODE_CASE(RCP, RCP);
+ NV50_IR_OPCODE_CASE(RSQ, RSQ);
+
+ NV50_IR_OPCODE_CASE(MUL, MUL);
+ NV50_IR_OPCODE_CASE(ADD, ADD);
+
+ NV50_IR_OPCODE_CASE(MIN, MIN);
+ NV50_IR_OPCODE_CASE(MAX, MAX);
+ NV50_IR_OPCODE_CASE(SLT, SET);
+ NV50_IR_OPCODE_CASE(SGE, SET);
+ NV50_IR_OPCODE_CASE(MAD, MAD);
+ NV50_IR_OPCODE_CASE(SUB, SUB);
+
+ NV50_IR_OPCODE_CASE(FLR, FLOOR);
+ NV50_IR_OPCODE_CASE(ROUND, CVT);
+ NV50_IR_OPCODE_CASE(EX2, EX2);
+ NV50_IR_OPCODE_CASE(LG2, LG2);
+ NV50_IR_OPCODE_CASE(POW, POW);
+
+ NV50_IR_OPCODE_CASE(ABS, ABS);
+
+ NV50_IR_OPCODE_CASE(COS, COS);
+ NV50_IR_OPCODE_CASE(DDX, DFDX);
+ NV50_IR_OPCODE_CASE(DDY, DFDY);
+ NV50_IR_OPCODE_CASE(KILP, DISCARD);
+
+ NV50_IR_OPCODE_CASE(SEQ, SET);
+ NV50_IR_OPCODE_CASE(SFL, SET);
+ NV50_IR_OPCODE_CASE(SGT, SET);
+ NV50_IR_OPCODE_CASE(SIN, SIN);
+ NV50_IR_OPCODE_CASE(SLE, SET);
+ NV50_IR_OPCODE_CASE(SNE, SET);
+ NV50_IR_OPCODE_CASE(STR, SET);
+ NV50_IR_OPCODE_CASE(TEX, TEX);
+ NV50_IR_OPCODE_CASE(TXD, TXD);
+ NV50_IR_OPCODE_CASE(TXP, TEX);
+
+ NV50_IR_OPCODE_CASE(BRA, BRA);
+ NV50_IR_OPCODE_CASE(CAL, CALL);
+ NV50_IR_OPCODE_CASE(RET, RET);
+ NV50_IR_OPCODE_CASE(CMP, SLCT);
+
+ NV50_IR_OPCODE_CASE(TXB, TXB);
+
+ NV50_IR_OPCODE_CASE(DIV, DIV);
+
+ NV50_IR_OPCODE_CASE(TXL, TXL);
+
+ NV50_IR_OPCODE_CASE(CEIL, CEIL);
+ NV50_IR_OPCODE_CASE(I2F, CVT);
+ NV50_IR_OPCODE_CASE(NOT, NOT);
+ NV50_IR_OPCODE_CASE(TRUNC, TRUNC);
+ NV50_IR_OPCODE_CASE(SHL, SHL);
+
+ NV50_IR_OPCODE_CASE(AND, AND);
+ NV50_IR_OPCODE_CASE(OR, OR);
+ NV50_IR_OPCODE_CASE(MOD, MOD);
+ NV50_IR_OPCODE_CASE(XOR, XOR);
+ NV50_IR_OPCODE_CASE(SAD, SAD);
+ NV50_IR_OPCODE_CASE(TXF, TXF);
+ NV50_IR_OPCODE_CASE(TXQ, TXQ);
+
+ NV50_IR_OPCODE_CASE(EMIT, EMIT);
+ NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);
+
+ NV50_IR_OPCODE_CASE(KIL, DISCARD);
+
+ NV50_IR_OPCODE_CASE(F2I, CVT);
+ NV50_IR_OPCODE_CASE(IDIV, DIV);
+ NV50_IR_OPCODE_CASE(IMAX, MAX);
+ NV50_IR_OPCODE_CASE(IMIN, MIN);
+ NV50_IR_OPCODE_CASE(INEG, NEG);
+ NV50_IR_OPCODE_CASE(ISGE, SET);
+ NV50_IR_OPCODE_CASE(ISHR, SHR);
+ NV50_IR_OPCODE_CASE(ISLT, SET);
+ NV50_IR_OPCODE_CASE(F2U, CVT);
+ NV50_IR_OPCODE_CASE(U2F, CVT);
+ NV50_IR_OPCODE_CASE(UADD, ADD);
+ NV50_IR_OPCODE_CASE(UDIV, DIV);
+ NV50_IR_OPCODE_CASE(UMAD, MAD);
+ NV50_IR_OPCODE_CASE(UMAX, MAX);
+ NV50_IR_OPCODE_CASE(UMIN, MIN);
+ NV50_IR_OPCODE_CASE(UMOD, MOD);
+ NV50_IR_OPCODE_CASE(UMUL, MUL);
+ NV50_IR_OPCODE_CASE(USEQ, SET);
+ NV50_IR_OPCODE_CASE(USGE, SET);
+ NV50_IR_OPCODE_CASE(USHR, SHR);
+ NV50_IR_OPCODE_CASE(USLT, SET);
+ NV50_IR_OPCODE_CASE(USNE, SET);
+
+ NV50_IR_OPCODE_CASE(LOAD, TXF);
+ NV50_IR_OPCODE_CASE(SAMPLE, TEX);
+ NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
+ NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
+ NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX);
+ NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);
+ NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);
+ NV50_IR_OPCODE_CASE(GATHER4, TXG);
+ NV50_IR_OPCODE_CASE(RESINFO, TXQ);
+
+ NV50_IR_OPCODE_CASE(END, EXIT);
+
+ default:
+ return nv50_ir::OP_NOP;
+ }
+}
+
+bool Instruction::checkDstSrcAliasing() const
+{
+ if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory
+ return false;
+
+ for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) {
+ if (insn->Src[s].Register.File == TGSI_FILE_NULL)
+ break;
+ if (insn->Src[s].Register.File == insn->Dst[0].Register.File &&
+ insn->Src[s].Register.Index == insn->Dst[0].Register.Index)
+ return true;
+ }
+ return false;
+}
+
+class Source
+{
+public:
+ Source(struct nv50_ir_prog_info *);
+ ~Source();
+
+ struct Subroutine
+ {
+ unsigned pc;
+ };
+
+public:
+ bool scanSource();
+ unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; }
+
+public:
+ struct tgsi_shader_info scan;
+ struct tgsi_full_instruction *insns;
+ const struct tgsi_token *tokens;
+ struct nv50_ir_prog_info *info;
+
+ nv50_ir::DynArray tempArrays;
+ nv50_ir::DynArray immdArrays;
+ int tempArrayCount;
+ int immdArrayCount;
+
+ bool mainTempsInLMem;
+
+ uint8_t *resourceTargets; // TGSI_TEXTURE_*
+ unsigned resourceCount;
+
+ Subroutine *subroutines;
+ unsigned subroutineCount;
+
+private:
+ int inferSysValDirection(unsigned sn) const;
+ bool scanDeclaration(const struct tgsi_full_declaration *);
+ bool scanInstruction(const struct tgsi_full_instruction *);
+ void scanProperty(const struct tgsi_full_property *);
+ void scanImmediate(const struct tgsi_full_immediate *);
+
+ inline bool isEdgeFlagPassthrough(const Instruction&) const;
+};
+
+Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
+{
+ tokens = (const struct tgsi_token *)info->bin.source;
+
+ if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
+ tgsi_dump(tokens, 0);
+
+ resourceTargets = NULL;
+ subroutines = NULL;
+
+ mainTempsInLMem = FALSE;
+}
+
+Source::~Source()
+{
+ if (insns)
+ FREE(insns);
+
+ if (info->immd.data)
+ FREE(info->immd.data);
+ if (info->immd.type)
+ FREE(info->immd.type);
+
+ if (resourceTargets)
+ delete[] resourceTargets;
+ if (subroutines)
+ delete[] subroutines;
+}
+
+bool Source::scanSource()
+{
+ unsigned insnCount = 0;
+ unsigned subrCount = 0;
+ struct tgsi_parse_context parse;
+
+ tgsi_scan_shader(tokens, &scan);
+
+ insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions *
+ sizeof(insns[0]));
+ if (!insns)
+ return false;
+
+ resourceCount = scan.file_max[TGSI_FILE_RESOURCE] + 1;
+ resourceTargets = new uint8_t[resourceCount];
+
+ subroutineCount = scan.opcode_count[TGSI_OPCODE_BGNSUB] + 1;
+ subroutines = new Subroutine[subroutineCount];
+
+ info->immd.bufSize = 0;
+ tempArrayCount = 0;
+ immdArrayCount = 0;
+
+ info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1;
+ info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
+ info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
+
+ if (info->type == PIPE_SHADER_FRAGMENT) {
+ info->prop.fp.writesDepth = scan.writes_z;
+ info->prop.fp.usesDiscard = scan.uses_kill;
+ } else
+ if (info->type == PIPE_SHADER_GEOMETRY) {
+ info->prop.gp.instanceCount = 1; // default value
+ }
+
+ info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
+ info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
+
+ tgsi_parse_init(&parse, tokens);
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ scanImmediate(&parse.FullToken.FullImmediate);
+ break;
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ scanDeclaration(&parse.FullToken.FullDeclaration);
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ insns[insnCount++] = parse.FullToken.FullInstruction;
+ if (insns[insnCount - 1].Instruction.Opcode == TGSI_OPCODE_BGNSUB)
+ subroutines[++subrCount].pc = insnCount - 1;
+ else
+ scanInstruction(&parse.FullToken.FullInstruction);
+ break;
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ scanProperty(&parse.FullToken.FullProperty);
+ break;
+ default:
+ INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type);
+ break;
+ }
+ }
+ tgsi_parse_free(&parse);
+
+ if (mainTempsInLMem)
+ info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
+
+ return info->assignSlots(info) == 0;
+}
+
+void Source::scanProperty(const struct tgsi_full_property *prop)
+{
+ switch (prop->Property.PropertyName) {
+ case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+ info->prop.gp.outputPrim = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_GS_INPUT_PRIM:
+ info->prop.gp.inputPrim = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+ info->prop.gp.maxVertices = prop->u[0].Data;
+ break;
+#if 0
+ case TGSI_PROPERTY_GS_INSTANCE_COUNT:
+ info->prop.gp.instanceCount = prop->u[0].Data;
+ break;
+#endif
+ case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
+ info->prop.fp.separateFragData = TRUE;
+ break;
+ case TGSI_PROPERTY_FS_COORD_ORIGIN:
+ case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
+ // we don't care
+ break;
+ default:
+ INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
+ break;
+ }
+}
+
+void Source::scanImmediate(const struct tgsi_full_immediate *imm)
+{
+ const unsigned n = info->immd.count++;
+
+ assert(n < scan.immediate_count);
+
+ for (int c = 0; c < 4; ++c)
+ info->immd.data[n * 4 + c] = imm->u[c].Uint;
+
+ info->immd.type[n] = imm->Immediate.DataType;
+}
+
+int Source::inferSysValDirection(unsigned sn) const
+{
+ switch (sn) {
+ case TGSI_SEMANTIC_INSTANCEID:
+// case TGSI_SEMANTIC_VERTEXID:
+ return 1;
+#if 0
+ case TGSI_SEMANTIC_LAYER:
+ case TGSI_SEMANTIC_VIEWPORTINDEX:
+ return 0;
+#endif
+ case TGSI_SEMANTIC_PRIMID:
+ return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0;
+ default:
+ return 0;
+ }
+}
+
+bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
+{
+ unsigned i;
+ unsigned sn = TGSI_SEMANTIC_GENERIC;
+ unsigned si = 0;
+ const unsigned first = decl->Range.First, last = decl->Range.Last;
+
+ if (decl->Declaration.Semantic) {
+ sn = decl->Semantic.Name;
+ si = decl->Semantic.Index;
+ }
+
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ if (info->type == PIPE_SHADER_VERTEX) {
+ // all vertex attributes are equal
+ for (i = first; i <= last; ++i) {
+ info->in[i].sn = TGSI_SEMANTIC_GENERIC;
+ info->in[i].si = i;
+ }
+ } else {
+ for (i = first; i <= last; ++i, ++si) {
+ info->in[i].id = i;
+ info->in[i].sn = sn;
+ info->in[i].si = si;
+ if (info->type == PIPE_SHADER_FRAGMENT) {
+ // translate interpolation mode
+ switch (decl->Declaration.Interpolate) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ info->in[i].flat = 1;
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ if (sn != TGSI_SEMANTIC_COLOR) // GL_NICEST
+ info->in[i].linear = 1;
+ break;
+ default:
+ break;
+ }
+ if (decl->Declaration.Centroid)
+ info->in[i].centroid = 1;
+ }
+ }
+ }
+ break;
+ case TGSI_FILE_OUTPUT:
+ for (i = first; i <= last; ++i, ++si) {
+ switch (sn) {
+ case TGSI_SEMANTIC_POSITION:
+ if (info->type == PIPE_SHADER_FRAGMENT)
+ info->io.fragDepth = i;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (info->type == PIPE_SHADER_FRAGMENT)
+ info->prop.fp.numColourResults++;
+ break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ info->io.edgeFlagOut = i;
+ break;
+ default:
+ break;
+ }
+ info->out[i].id = i;
+ info->out[i].sn = sn;
+ info->out[i].si = si;
+ }
+ break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ for (i = first; i <= last; ++i, ++si) {
+ info->sv[i].sn = sn;
+ info->sv[i].si = si;
+ info->sv[i].input = inferSysValDirection(sn);
+ }
+ break;
+ case TGSI_FILE_RESOURCE:
+ for (i = first; i <= last; ++i)
+ resourceTargets[i] = decl->Resource.Resource;
+ break;
+ case TGSI_FILE_IMMEDIATE_ARRAY:
+ {
+ if (decl->Dim.Index2D >= immdArrayCount)
+ immdArrayCount = decl->Dim.Index2D + 1;
+ immdArrays[decl->Dim.Index2D].u32 = (last + 1) << 2;
+ int c;
+ uint32_t base, count;
+ switch (decl->Declaration.UsageMask) {
+ case 0x1: c = 1; break;
+ case 0x3: c = 2; break;
+ default:
+ c = 4;
+ break;
+ }
+ immdArrays[decl->Dim.Index2D].u32 |= c;
+ count = (last + 1) * c;
+ base = info->immd.bufSize / 4;
+ info->immd.bufSize = (info->immd.bufSize + count * 4 + 0xf) & ~0xf;
+ info->immd.buf = (uint32_t *)REALLOC(info->immd.buf, base * 4,
+ info->immd.bufSize);
+ // NOTE: this assumes array declarations are ordered by Dim.Index2D
+ for (i = 0; i < count; ++i)
+ info->immd.buf[base + i] = decl->ImmediateData.u[i].Uint;
+ }
+ break;
+ case TGSI_FILE_TEMPORARY_ARRAY:
+ {
+ if (decl->Dim.Index2D >= tempArrayCount)
+ tempArrayCount = decl->Dim.Index2D + 1;
+ tempArrays[decl->Dim.Index2D].u32 = (last + 1) << 2;
+ int c;
+ uint32_t count;
+ switch (decl->Declaration.UsageMask) {
+ case 0x1: c = 1; break;
+ case 0x3: c = 2; break;
+ default:
+ c = 4;
+ break;
+ }
+ tempArrays[decl->Dim.Index2D].u32 |= c;
+ count = (last + 1) * c;
+ info->bin.tlsSpace += (info->bin.tlsSpace + count * 4 + 0xf) & ~0xf;
+ }
+ break;
+ case TGSI_FILE_NULL:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_ADDRESS:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_IMMEDIATE:
+ case TGSI_FILE_PREDICATE:
+ case TGSI_FILE_SAMPLER:
+ break;
+ default:
+ ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
+ return false;
+ }
+ return true;
+}
+
+inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
+{
+ return insn.getOpcode() == TGSI_OPCODE_MOV &&
+ insn.getDst(0).getIndex(0) == info->io.edgeFlagOut &&
+ insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
+}
+
+bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
+{
+ Instruction insn(inst);
+
+ if (insn.dstCount()) {
+ if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) {
+ Instruction::DstRegister dst = insn.getDst(0);
+
+ if (dst.isIndirect(0))
+ for (unsigned i = 0; i < info->numOutputs; ++i)
+ info->out[i].mask = 0xf;
+ else
+ info->out[dst.getIndex(0)].mask |= dst.getMask();
+
+ if (isEdgeFlagPassthrough(insn))
+ info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
+ } else
+ if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
+ if (insn.getDst(0).isIndirect(0))
+ mainTempsInLMem = TRUE;
+ }
+ }
+
+ for (unsigned s = 0; s < insn.srcCount(); ++s) {
+ Instruction::SrcRegister src = insn.getSrc(s);
+ if (src.getFile() == TGSI_FILE_TEMPORARY)
+ if (src.isIndirect(0))
+ mainTempsInLMem = TRUE;
+ if (src.getFile() != TGSI_FILE_INPUT)
+ continue;
+ unsigned mask = insn.srcMask(s);
+
+ if (src.isIndirect(0)) {
+ for (unsigned i = 0; i < info->numInputs; ++i)
+ info->in[i].mask = 0xf;
+ } else {
+ for (unsigned c = 0; c < 4; ++c) {
+ if (!(mask & (1 << c)))
+ continue;
+ int k = src.getSwizzle(c);
+ int i = src.getIndex(0);
+ if (info->in[i].sn != TGSI_SEMANTIC_FOG || k == TGSI_SWIZZLE_X)
+ if (k <= TGSI_SWIZZLE_W)
+ info->in[i].mask |= 1 << k;
+ }
+ }
+ }
+ return true;
+}
+
+nv50_ir::TexInstruction::Target
+Instruction::getTexture(const tgsi::Source *code, int s) const
+{
+ if (insn->Instruction.Texture) {
+ return translateTexture(insn->Texture.Texture);
+ } else {
+ // XXX: indirect access
+ unsigned int r = getSrc(s).getIndex(0);
+ assert(r < code->resourceCount);
+ return translateTexture(code->resourceTargets[r]);
+ }
+}
+
+} // namespace tgsi
+
+namespace {
+
+using namespace nv50_ir;
+
+class Converter : public BuildUtil
+{
+public:
+ Converter(Program *, const tgsi::Source *);
+ ~Converter();
+
+ bool run();
+
+private:
+ Value *getVertexBase(int s);
+ Value *fetchSrc(int s, int c);
+ Value *acquireDst(int d, int c);
+ void storeDst(int d, int c, Value *);
+
+ Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr);
+ void storeDst(const tgsi::Instruction::DstRegister dst, int c,
+ Value *val, Value *ptr);
+
+ Value *applySrcMod(Value *, int s, int c);
+
+ Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
+ Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);
+ Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);
+
+ bool handleInstruction(const struct tgsi_full_instruction *);
+ void exportOutputs();
+ inline bool isEndOfSubroutine(uint ip);
+
+ void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask);
+
+ // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto)
+ void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
+ void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
+ void handleTXF(Value *dst0[4], int R);
+ void handleTXQ(Value *dst0[4], enum TexQuery);
+ void handleLIT(Value *dst0[4]);
+ void handleUserClipPlanes();
+
+ Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
+
+ void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
+
+ Value *buildDot(int dim);
+
+private:
+ const struct tgsi::Source *code;
+ const struct nv50_ir_prog_info *info;
+
+ uint ip; // instruction pointer
+
+ tgsi::Instruction tgsi;
+
+ DataType dstTy;
+ DataType srcTy;
+
+ DataArray tData; // TGSI_FILE_TEMPORARY
+ DataArray aData; // TGSI_FILE_ADDRESS
+ DataArray pData; // TGSI_FILE_PREDICATE
+ DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
+ DataArray *lData; // TGSI_FILE_TEMPORARY_ARRAY
+ DataArray *iData; // TGSI_FILE_IMMEDIATE_ARRAY
+
+ Value *zero;
+ Value *fragCoord[4];
+ Value *clipVtx[4];
+
+ Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
+ uint8_t vtxBaseValid;
+
+ Stack condBBs; // fork BB, then else clause BB
+ Stack joinBBs; // fork BB, for inserting join ops on ENDIF
+ Stack loopBBs; // loop headers
+ Stack breakBBs; // end of / after loop
+ Stack entryBBs; // start of current (inlined) subroutine
+ Stack leaveBBs; // end of current (inlined) subroutine
+ Stack retIPs; // return instruction pointer
+};
+
+Symbol *
+Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
+{
+ const int swz = src.getSwizzle(c);
+
+ return makeSym(src.getFile(),
+ src.is2D() ? src.getIndex(1) : 0,
+ src.isIndirect(0) ? -1 : src.getIndex(0), swz,
+ src.getIndex(0) * 16 + swz * 4);
+}
+
+Symbol *
+Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
+{
+ return makeSym(dst.getFile(),
+ dst.is2D() ? dst.getIndex(1) : 0,
+ dst.isIndirect(0) ? -1 : dst.getIndex(0), c,
+ dst.getIndex(0) * 16 + c * 4);
+}
+
+Symbol *
+Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
+{
+ Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile));
+
+ sym->reg.fileIndex = fileIdx;
+
+ if (idx >= 0) {
+ if (sym->reg.file == FILE_SHADER_INPUT)
+ sym->setOffset(info->in[idx].slot[c] * 4);
+ else
+ if (sym->reg.file == FILE_SHADER_OUTPUT)
+ sym->setOffset(info->out[idx].slot[c] * 4);
+ else
+ if (sym->reg.file == FILE_SYSTEM_VALUE)
+ sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c);
+ else
+ sym->setOffset(address);
+ } else {
+ sym->setOffset(address);
+ }
+ return sym;
+}
+
+static inline uint8_t
+translateInterpMode(const struct nv50_ir_varying *var, operation& op)
+{
+ uint8_t mode;
+
+ if (var->flat)
+ mode = NV50_IR_INTERP_FLAT;
+ else
+ if (var->linear)
+ mode = NV50_IR_INTERP_LINEAR;
+ else
+ mode = NV50_IR_INTERP_PERSPECTIVE;
+
+ op = (mode == NV50_IR_INTERP_PERSPECTIVE) ? OP_PINTERP : OP_LINTERP;
+
+ if (var->centroid)
+ mode |= NV50_IR_INTERP_CENTROID;
+
+ return mode;
+}
+
+Value *
+Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
+{
+ operation op;
+
+ // XXX: no way to know interpolation mode if we don't know what's accessed
+ const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 :
+ src.getIndex(0)], op);
+
+ Instruction *insn = new_Instruction(func, op, TYPE_F32);
+
+ insn->setDef(0, getScratch());
+ insn->setSrc(0, srcToSym(src, c));
+ if (op == OP_PINTERP)
+ insn->setSrc(1, fragCoord[3]);
+ if (ptr)
+ insn->setIndirect(0, 0, ptr);
+
+ insn->setInterpolate(mode);
+
+ bb->insertTail(insn);
+ return insn->getDef(0);
+}
+
+Value *
+Converter::applySrcMod(Value *val, int s, int c)
+{
+ Modifier m = tgsi.getSrc(s).getMod(c);
+ DataType ty = tgsi.inferSrcType();
+
+ if (m & Modifier(NV50_IR_MOD_ABS))
+ val = mkOp1v(OP_ABS, ty, getScratch(), val);
+
+ if (m & Modifier(NV50_IR_MOD_NEG))
+ val = mkOp1v(OP_NEG, ty, getScratch(), val);
+
+ return val;
+}
+
+Value *
+Converter::getVertexBase(int s)
+{
+ assert(s < 5);
+ if (!(vtxBaseValid & (1 << s))) {
+ const int index = tgsi.getSrc(s).getIndex(1);
+ Value *rel = NULL;
+ if (tgsi.getSrc(s).isIndirect(1))
+ rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
+ vtxBaseValid |= 1 << s;
+ vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(index), rel);
+ }
+ return vtxBase[s];
+}
+
+Value *
+Converter::fetchSrc(int s, int c)
+{
+ Value *res;
+ Value *ptr = NULL, *dimRel = NULL;
+
+ tgsi::Instruction::SrcRegister src = tgsi.getSrc(s);
+
+ if (src.isIndirect(0))
+ ptr = fetchSrc(src.getIndirect(0), 0, NULL);
+
+ if (src.is2D()) {
+ switch (src.getFile()) {
+ case TGSI_FILE_INPUT:
+ dimRel = getVertexBase(s);
+ break;
+ case TGSI_FILE_CONSTANT:
+ // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
+ if (src.isIndirect(1))
+ dimRel = fetchSrc(src.getIndirect(1), 0, 0);
+ break;
+ default:
+ break;
+ }
+ }
+
+ res = fetchSrc(src, c, ptr);
+
+ if (dimRel)
+ res->getInsn()->setIndirect(0, 1, dimRel);
+
+ return applySrcMod(res, s, c);
+}
+
+Value *
+Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
+{
+ const int idx = src.getIndex(0);
+ const int swz = src.getSwizzle(c);
+
+ switch (src.getFile()) {
+ case TGSI_FILE_TEMPORARY:
+ return tData.load(idx, swz, ptr);
+ case TGSI_FILE_PREDICATE:
+ return pData.load(idx, swz, ptr);
+ case TGSI_FILE_ADDRESS:
+ return aData.load(idx, swz, ptr);
+
+ case TGSI_FILE_TEMPORARY_ARRAY:
+ assert(src.is2D() && src.getIndex(1) < code->tempArrayCount);
+ return lData[src.getIndex(1)].load(idx, swz, ptr);
+ case TGSI_FILE_IMMEDIATE_ARRAY:
+ assert(src.is2D() && src.getIndex(1) < code->immdArrayCount);
+ return iData[src.getIndex(1)].load(idx, swz, ptr);
+
+ case TGSI_FILE_IMMEDIATE:
+ assert(!ptr);
+ return loadImm(NULL, info->immd.data[idx * 4 + swz]);
+
+ case TGSI_FILE_CONSTANT:
+ return mkLoad(TYPE_U32, srcToSym(src, c), ptr);
+
+ case TGSI_FILE_INPUT:
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+ // don't load masked inputs, won't be assigned a slot
+ if (!ptr && !(info->in[idx].mask & (1 << swz)))
+ return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
+ return interpolate(src, c, ptr);
+ }
+ return mkLoad(TYPE_U32, srcToSym(src, c), ptr);
+
+ case TGSI_FILE_SYSTEM_VALUE:
+ assert(!ptr);
+ return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
+
+ case TGSI_FILE_OUTPUT:
+ case TGSI_FILE_RESOURCE:
+ case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_NULL:
+ default:
+ assert(!"invalid/unhandled TGSI source file");
+ return NULL;
+ }
+}
+
+Value *
+Converter::acquireDst(int d, int c)
+{
+ const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
+
+ if (dst.isMasked(c))
+ return NULL;
+ if (dst.isIndirect(0))
+ return getScratch();
+
+ const int idx = dst.getIndex(0);
+
+ switch (dst.getFile()) {
+ case TGSI_FILE_TEMPORARY:
+ return tData.acquire(idx, c);
+ case TGSI_FILE_TEMPORARY_ARRAY:
+ return getScratch();
+ case TGSI_FILE_PREDICATE:
+ return pData.acquire(idx, c);
+ case TGSI_FILE_ADDRESS:
+ return aData.acquire(idx, c);
+
+ case TGSI_FILE_OUTPUT:
+ if (prog->getType() == Program::TYPE_FRAGMENT)
+ return oData.acquire(idx, c);
+ // fall through
+ case TGSI_FILE_SYSTEM_VALUE:
+ return getScratch();
+
+ default:
+ assert(!"invalid dst file");
+ return NULL;
+ }
+}
+
+void
+Converter::storeDst(int d, int c, Value *val)
+{
+ const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
+
+ switch (tgsi.getSaturate()) {
+ case TGSI_SAT_NONE:
+ break;
+ case TGSI_SAT_ZERO_ONE:
+ mkOp1(OP_SAT, dstTy, val, val);
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f));
+ mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f));
+ break;
+ default:
+ assert(!"invalid saturation mode");
+ break;
+ }
+
+ Value *ptr = dst.isIndirect(0) ?
+ fetchSrc(dst.getIndirect(0), 0, NULL) : NULL;
+
+ if (info->io.clipDistanceCount &&
+ dst.getFile() == TGSI_FILE_OUTPUT &&
+ info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_POSITION) {
+ mkMov(clipVtx[c], val);
+ val = clipVtx[c];
+ }
+
+ storeDst(dst, c, val, ptr);
+}
+
+void
+Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
+ Value *val, Value *ptr)
+{
+ const int idx = dst.getIndex(0);
+
+ switch (dst.getFile()) {
+ case TGSI_FILE_TEMPORARY:
+ tData.store(idx, c, ptr, val);
+ break;
+ case TGSI_FILE_TEMPORARY_ARRAY:
+ assert(dst.is2D() && dst.getIndex(1) < code->tempArrayCount);
+ lData[dst.getIndex(1)].store(idx, c, ptr, val);
+ break;
+ case TGSI_FILE_PREDICATE:
+ pData.store(idx, c, ptr, val);
+ break;
+ case TGSI_FILE_ADDRESS:
+ aData.store(idx, c, ptr, val);
+ break;
+
+ case TGSI_FILE_OUTPUT:
+ if (prog->getType() == Program::TYPE_FRAGMENT)
+ oData.store(idx, c, ptr, val);
+ else
+ mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
+ break;
+
+ case TGSI_FILE_SYSTEM_VALUE:
+ assert(!ptr);
+ mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);
+ break;
+
+ default:
+ assert(!"invalid dst file");
+ break;
+ }
+}
+
+#define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \
+ for (chan = 0; chan < 4; ++chan) \
+ if (!inst.getDst(d).isMasked(chan))
+
+Value *
+Converter::buildDot(int dim)
+{
+ assert(dim > 0);
+
+ Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
+ Value *dotp = getScratch();
+
+ mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
+
+ for (int c = 1; c < dim; ++c) {
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp);
+ }
+ return dotp;
+}
+
+void
+Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
+{
+ FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
+ join->fixed = 1;
+ conv->insertHead(join);
+
+ fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
+ fork->insertBefore(fork->getExit(), fork->joinAt);
+}
+
+void
+Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
+{
+ unsigned rIdx = 0, sIdx = 0;
+
+ if (R >= 0)
+ rIdx = tgsi.getSrc(R).getIndex(0);
+ if (S >= 0)
+ sIdx = tgsi.getSrc(S).getIndex(0);
+
+ tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx);
+
+ if (tgsi.getSrc(R).isIndirect(0)) {
+ tex->tex.rIndirectSrc = s;
+ tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL));
+ }
+ if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) {
+ tex->tex.sIndirectSrc = s;
+ tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL));
+ }
+}
+
+void
+Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
+{
+ TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
+ tex->tex.query = query;
+ unsigned int c, d;
+
+ for (d = 0, c = 0; c < 4; ++c) {
+ if (!dst0[c])
+ continue;
+ tex->tex.mask |= 1 << c;
+ tex->setDef(d++, dst0[c]);
+ }
+ tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
+
+ setTexRS(tex, c, 1, -1);
+
+ bb->insertTail(tex);
+}
+
+void
+Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
+{
+ Value *proj = fetchSrc(0, 3);
+ Instruction *insn = proj->getUniqueInsn();
+ int c;
+
+ if (insn->op == OP_PINTERP) {
+ bb->insertTail(insn = insn->clone(true));
+ insn->op = OP_LINTERP;
+ insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode());
+ insn->setSrc(1, NULL);
+ proj = insn->getDef(0);
+ }
+ proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj);
+
+ for (c = 0; c < 4; ++c) {
+ if (!(mask & (1 << c)))
+ continue;
+ if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP)
+ continue;
+ mask &= ~(1 << c);
+
+ bb->insertTail(insn = insn->clone(true));
+ insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode());
+ insn->setSrc(1, proj);
+ dst[c] = insn->getDef(0);
+ }
+ if (!mask)
+ return;
+
+ proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3));
+
+ for (c = 0; c < 4; ++c)
+ if (mask & (1 << c))
+ dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj);
+}
+
+// order of nv50 ir sources: x y z layer lod/bias shadow
+// order of TGSI TEX sources: x y z layer shadow lod/bias
+// lowering will finally set the hw specific order (like array first on nvc0)
+void
+Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
+{
+ Value *val;
+ Value *arg[4], *src[8];
+ Value *lod = NULL, *shd = NULL;
+ unsigned int s, c, d;
+ TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
+
+ TexInstruction::Target tgt = tgsi.getTexture(code, R);
+
+ for (s = 0; s < tgt.getArgCount(); ++s)
+ arg[s] = src[s] = fetchSrc(0, s);
+
+ if (texi->op == OP_TXL || texi->op == OP_TXB)
+ lod = fetchSrc(L >> 4, L & 3);
+
+ if (C == 0x0f)
+ C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src
+
+ if (tgt.isShadow())
+ shd = fetchSrc(C >> 4, C & 3);
+
+ if (texi->op == OP_TXD) {
+ for (c = 0; c < tgt.getDim(); ++c) {
+ texi->dPdx[c] = fetchSrc(Dx >> 4, (Dx & 3) + c);
+ texi->dPdy[c] = fetchSrc(Dy >> 4, (Dy & 3) + c);
+ }
+ }
+
+ // cube textures don't care about projection value, it's divided out
+ if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) {
+ unsigned int n = tgt.getDim();
+ if (shd) {
+ arg[n] = shd;
+ ++n;
+ assert(tgt.getDim() == tgt.getArgCount());
+ }
+ loadProjTexCoords(src, arg, (1 << n) - 1);
+ if (shd)
+ shd = src[n - 1];
+ }
+
+ if (tgt.isCube()) {
+ for (c = 0; c < 3; ++c)
+ src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
+ val = getScratch();
+ mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c)
+ src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
+ }
+
+ for (c = 0, d = 0; c < 4; ++c) {
+ if (dst[c]) {
+ texi->setDef(d++, dst[c]);
+ texi->tex.mask |= 1 << c;
+ } else {
+ // NOTE: maybe hook up def too, for CSE
+ }
+ }
+ for (s = 0; s < tgt.getArgCount(); ++s)
+ texi->setSrc(s, src[s]);
+ if (lod)
+ texi->setSrc(s++, lod);
+ if (shd)
+ texi->setSrc(s++, shd);
+
+ setTexRS(texi, s, R, S);
+
+ if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
+ texi->tex.levelZero = true;
+
+ bb->insertTail(texi);
+}
+
+// 1st source: xyz = coordinates, w = lod
+// 2nd source: offset
+void
+Converter::handleTXF(Value *dst[4], int R)
+{
+ TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
+ unsigned int c, d, s;
+
+ texi->tex.target = tgsi.getTexture(code, R);
+
+ for (c = 0, d = 0; c < 4; ++c) {
+ if (dst[c]) {
+ texi->setDef(d++, dst[c]);
+ texi->tex.mask |= 1 << c;
+ }
+ }
+ for (c = 0; c < texi->tex.target.getArgCount(); ++c)
+ texi->setSrc(c, fetchSrc(0, c));
+ texi->setSrc(c++, fetchSrc(0, 3)); // lod
+
+ setTexRS(texi, c, R, -1);
+
+ for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
+ for (c = 0; c < 3; ++c) {
+ texi->tex.offset[s][c] = tgsi.getTexOffset(s).getValueU32(c, info);
+ if (texi->tex.offset[s][c])
+ texi->tex.useOffsets = s + 1;
+ }
+ }
+
+ bb->insertTail(texi);
+}
+
+void
+Converter::handleLIT(Value *dst0[4])
+{
+ Value *val0 = NULL;
+ unsigned int mask = tgsi.getDst(0).getMask();
+
+ if (mask & (1 << 0))
+ loadImm(dst0[0], 1.0f);
+
+ if (mask & (1 << 3))
+ loadImm(dst0[3], 1.0f);
+
+ if (mask & (3 << 1)) {
+ val0 = getScratch();
+ mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero);
+ if (mask & (1 << 1))
+ mkMov(dst0[1], val0);
+ }
+
+ if (mask & (1 << 2)) {
+ Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3);
+ Value *val1 = getScratch(), *val3 = getScratch();
+
+ Value *pos128 = loadImm(NULL, +127.999999f);
+ Value *neg128 = loadImm(NULL, -127.999999f);
+
+ mkOp2(OP_MAX, TYPE_F32, val1, src1, zero);
+ mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128);
+ mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
+ mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
+
+ mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], val3, zero, val0);
+ }
+}
+
+bool
+Converter::isEndOfSubroutine(uint ip)
+{
+ assert(ip < code->scan.num_instructions);
+ tgsi::Instruction insn(&code->insns[ip]);
+ return (insn.getOpcode() == TGSI_OPCODE_END ||
+ insn.getOpcode() == TGSI_OPCODE_ENDSUB ||
+ // does END occur at end of main or the very end ?
+ insn.getOpcode() == TGSI_OPCODE_BGNSUB);
+}
+
+bool
+Converter::handleInstruction(const struct tgsi_full_instruction *insn)
+{
+ Value *dst0[4], *rDst0[4];
+ Value *src0, *src1, *src2;
+ Value *val0, *val1;
+ int c;
+
+ tgsi = tgsi::Instruction(insn);
+
+ bool useScratchDst = tgsi.checkDstSrcAliasing();
+
+ operation op = tgsi.getOP();
+ dstTy = tgsi.inferDstType();
+ srcTy = tgsi.inferSrcType();
+
+ unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
+
+ if (tgsi.dstCount()) {
+ for (c = 0; c < 4; ++c) {
+ rDst0[c] = acquireDst(0, c);
+ dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
+ }
+ }
+
+ switch (tgsi.getOpcode()) {
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_UADD:
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_DIV:
+ case TGSI_OPCODE_IDIV:
+ case TGSI_OPCODE_UDIV:
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_UMAX:
+ case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_MOD:
+ case TGSI_OPCODE_UMOD:
+ case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_SHL:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_USHR:
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_XOR:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ mkOp2(op, dstTy, dst0[c], src0, src1);
+ }
+ break;
+ case TGSI_OPCODE_MAD:
+ case TGSI_OPCODE_UMAD:
+ case TGSI_OPCODE_SAD:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ src2 = fetchSrc(2, c);
+ mkOp3(op, dstTy, dst0[c], src0, src1, src2);
+ }
+ break;
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_CEIL:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_TRUNC:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_INEG:
+ case TGSI_OPCODE_NOT:
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));
+ break;
+ case TGSI_OPCODE_RSQ:
+ src0 = fetchSrc(0, 0);
+ val0 = getScratch();
+ mkOp1(OP_ABS, TYPE_F32, val0, src0);
+ mkOp1(OP_RSQ, TYPE_F32, val0, val0);
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkMov(dst0[c], val0);
+ break;
+ case TGSI_OPCODE_ARL:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = ROUND_M;
+ mkOp2(OP_SHL, TYPE_U32, dst0[c], dst0[c], mkImm(4));
+ }
+ break;
+ case TGSI_OPCODE_UARL:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkOp2(OP_SHL, TYPE_U32, dst0[c], fetchSrc(0, c), mkImm(4));
+ break;
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0);
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
+ break;
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ val0 = getScratch();
+ if (mask & 7) {
+ mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0));
+ mkOp1(op, TYPE_F32, val0, val0);
+ for (c = 0; c < 3; ++c)
+ if (dst0[c])
+ mkMov(dst0[c], val0);
+ }
+ if (dst0[3]) {
+ mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));
+ mkOp1(op, TYPE_F32, dst0[3], val0);
+ }
+ break;
+ case TGSI_OPCODE_SCS:
+ if (mask & 3) {
+ val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0));
+ if (dst0[0])
+ mkOp1(OP_COS, TYPE_F32, dst0[0], val0);
+ if (dst0[1])
+ mkOp1(OP_SIN, TYPE_F32, dst0[1], val0);
+ }
+ if (dst0[2])
+ loadImm(dst0[2], 0.0f);
+ if (dst0[3])
+ loadImm(dst0[3], 1.0f);
+ break;
+ case TGSI_OPCODE_EXP:
+ src0 = fetchSrc(0, 0);
+ val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
+ if (dst0[1])
+ mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);
+ if (dst0[0])
+ mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);
+ if (dst0[2])
+ mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);
+ if (dst0[3])
+ loadImm(dst0[3], 1.0f);
+ break;
+ case TGSI_OPCODE_LOG:
+ src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0));
+ val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0);
+ if (dst0[0] || dst0[1])
+ val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0);
+ if (dst0[1]) {
+ mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
+ mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
+ mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0);
+ }
+ if (dst0[3])
+ loadImm(dst0[3], 1.0f);
+ break;
+ case TGSI_OPCODE_DP2:
+ val0 = buildDot(2);
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkMov(dst0[c], val0);
+ break;
+ case TGSI_OPCODE_DP3:
+ val0 = buildDot(3);
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkMov(dst0[c], val0);
+ break;
+ case TGSI_OPCODE_DP4:
+ val0 = buildDot(4);
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkMov(dst0[c], val0);
+ break;
+ case TGSI_OPCODE_DPH:
+ val0 = buildDot(3);
+ src1 = fetchSrc(1, 3);
+ mkOp2(OP_ADD, TYPE_F32, val0, val0, src1);
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkMov(dst0[c], val0);
+ break;
+ case TGSI_OPCODE_DST:
+ if (dst0[0])
+ loadImm(dst0[0], 1.0f);
+ if (dst0[1]) {
+ src0 = fetchSrc(0, 1);
+ src1 = fetchSrc(1, 1);
+ mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1);
+ }
+ if (dst0[2])
+ mkMov(dst0[2], fetchSrc(0, 2));
+ if (dst0[3])
+ mkMov(dst0[3], fetchSrc(1, 3));
+ break;
+ case TGSI_OPCODE_LRP:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ src2 = fetchSrc(2, c);
+ mkOp3(OP_MAD, TYPE_F32, dst0[c],
+ mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2);
+ }
+ break;
+ case TGSI_OPCODE_LIT:
+ handleLIT(dst0);
+ break;
+ case TGSI_OPCODE_XPD:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ if (c < 3) {
+ val0 = getSSA();
+ src0 = fetchSrc(1, (c + 1) % 3);
+ src1 = fetchSrc(0, (c + 2) % 3);
+ mkOp2(OP_MUL, TYPE_F32, val0, src0, src1);
+ mkOp1(OP_NEG, TYPE_F32, val0, val0);
+
+ src0 = fetchSrc(0, (c + 1) % 3);
+ src1 = fetchSrc(1, (c + 2) % 3);
+ mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0);
+ } else {
+ loadImm(dst0[c], 1.0f);
+ }
+ }
+ break;
+ case TGSI_OPCODE_SSG:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ val0 = getScratch();
+ val1 = getScratch();
+ mkCmp(OP_SET, CC_GT, TYPE_F32, val0, src0, zero);
+ mkCmp(OP_SET, CC_LT, TYPE_F32, val1, src0, zero);
+ mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
+ }
+ break;
+ case TGSI_OPCODE_UCMP:
+ case TGSI_OPCODE_CMP:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ src2 = fetchSrc(2, c);
+ if (src1 == src2)
+ mkMov(dst0[c], src1);
+ else
+ mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
+ srcTy, dst0[c], src1, src2, src0);
+ }
+ break;
+ case TGSI_OPCODE_FRC:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ val0 = getScratch();
+ mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
+ mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
+ }
+ break;
+ case TGSI_OPCODE_ROUND:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
+ ->rnd = ROUND_NI;
+ break;
+ case TGSI_OPCODE_CLAMP:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ src2 = fetchSrc(2, c);
+ val0 = getScratch();
+ mkOp2(OP_MIN, TYPE_F32, val0, src0, src1);
+ mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2);
+ }
+ break;
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SFL:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_STR:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], src0, src1);
+ }
+ break;
+ case TGSI_OPCODE_KIL:
+ val0 = new_LValue(func, FILE_PREDICATE);
+ for (c = 0; c < 4; ++c) {
+ mkCmp(OP_SET, CC_LT, TYPE_F32, val0, fetchSrc(0, c), zero);
+ mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
+ }
+ break;
+ case TGSI_OPCODE_KILP:
+ mkOp(OP_DISCARD, TYPE_NONE, NULL);
+ break;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXP:
+ // R S L C Dx Dy
+ handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);
+ break;
+ case TGSI_OPCODE_TXD:
+ handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);
+ break;
+ case TGSI_OPCODE_SAMPLE:
+ case TGSI_OPCODE_SAMPLE_B:
+ case TGSI_OPCODE_SAMPLE_D:
+ case TGSI_OPCODE_SAMPLE_L:
+ case TGSI_OPCODE_SAMPLE_C:
+ case TGSI_OPCODE_SAMPLE_C_LZ:
+ handleTEX(dst0, 1, 2, 0x30, 0x31, 0x40, 0x50);
+ break;
+ case TGSI_OPCODE_TXF:
+ case TGSI_OPCODE_LOAD:
+ handleTXF(dst0, 1);
+ break;
+ case TGSI_OPCODE_TXQ:
+ case TGSI_OPCODE_RESINFO:
+ handleTXQ(dst0, TXQ_DIMS);
+ break;
+ case TGSI_OPCODE_F2I:
+ case TGSI_OPCODE_F2U:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z;
+ break;
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_U2F:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
+ break;
+ case TGSI_OPCODE_EMIT:
+ case TGSI_OPCODE_ENDPRIM:
+ // get vertex stream if specified (must be immediate)
+ src0 = tgsi.srcCount() ?
+ mkImm(tgsi.getSrc(0).getValueU32(0, info)) : zero;
+ mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
+ break;
+ case TGSI_OPCODE_IF:
+ {
+ BasicBlock *ifBB = new BasicBlock(func);
+
+ bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
+ condBBs.push(bb);
+ joinBBs.push(bb);
+
+ mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0));
+
+ setPosition(ifBB, true);
+ }
+ break;
+ case TGSI_OPCODE_ELSE:
+ {
+ BasicBlock *elseBB = new BasicBlock(func);
+ BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
+
+ forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
+ condBBs.push(bb);
+
+ forkBB->getExit()->asFlow()->target.bb = elseBB;
+ if (!bb->isTerminated())
+ mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
+
+ setPosition(elseBB, true);
+ }
+ break;
+ case TGSI_OPCODE_ENDIF:
+ {
+ BasicBlock *convBB = new BasicBlock(func);
+ BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
+ BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
+
+ if (!bb->isTerminated()) {
+ // we only want join if none of the clauses ended with CONT/BREAK/RET
+ if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
+ insertConvergenceOps(convBB, forkBB);
+ mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL);
+ bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
+ }
+
+ if (prevBB->getExit()->op == OP_BRA) {
+ prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
+ prevBB->getExit()->asFlow()->target.bb = convBB;
+ }
+ setPosition(convBB, true);
+ }
+ break;
+ case TGSI_OPCODE_BGNLOOP:
+ {
+ BasicBlock *lbgnBB = new BasicBlock(func);
+ BasicBlock *lbrkBB = new BasicBlock(func);
+
+ loopBBs.push(lbgnBB);
+ breakBBs.push(lbrkBB);
+ if (loopBBs.getSize() > func->loopNestingBound)
+ func->loopNestingBound++;
+
+ mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL);
+
+ bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE);
+ setPosition(lbgnBB, true);
+ mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL);
+ }
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ {
+ BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
+
+ if (!bb->isTerminated()) {
+ mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
+ bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
+ }
+ setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
+ }
+ break;
+ case TGSI_OPCODE_BRK:
+ {
+ if (bb->isTerminated())
+ break;
+ BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
+ mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL);
+ bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS);
+ }
+ break;
+ case TGSI_OPCODE_CONT:
+ {
+ if (bb->isTerminated())
+ break;
+ BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
+ mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
+ contBB->explicitCont = true;
+ bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
+ }
+ break;
+ case TGSI_OPCODE_BGNSUB:
+ {
+ if (!retIPs.getSize()) {
+ // end of main function
+ ip = code->scan.num_instructions - 2; // goto END
+ return true;
+ }
+ BasicBlock *entry = new BasicBlock(func);
+ BasicBlock *leave = new BasicBlock(func);
+ entryBBs.push(entry);
+ leaveBBs.push(leave);
+ bb->cfg.attach(&entry->cfg, Graph::Edge::TREE);
+ setPosition(entry, true);
+ }
+ return true;
+ case TGSI_OPCODE_ENDSUB:
+ {
+ BasicBlock *leave = reinterpret_cast<BasicBlock *>(leaveBBs.pop().u.p);
+ entryBBs.pop();
+ bb->cfg.attach(&leave->cfg, Graph::Edge::TREE);
+ setPosition(leave, true);
+ ip = retIPs.pop().u.u;
+ }
+ return true;
+ case TGSI_OPCODE_CAL:
+ // we don't have function declarations, so inline everything
+ retIPs.push(ip);
+ ip = code->subroutines[tgsi.getLabel()].pc - 1; // +1 after return
+ return true;
+ case TGSI_OPCODE_RET:
+ {
+ if (bb->isTerminated())
+ return true;
+ BasicBlock *entry = reinterpret_cast<BasicBlock *>(entryBBs.peek().u.p);
+ BasicBlock *leave = reinterpret_cast<BasicBlock *>(leaveBBs.peek().u.p);
+ if (!isEndOfSubroutine(ip + 1)) {
+ // insert a PRERET at the entry if this is an early return
+ FlowInstruction *preRet = new_FlowInstruction(func, OP_PRERET, leave);
+ preRet->fixed = 1;
+ entry->insertHead(preRet);
+ bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);
+ }
+ // everything inlined so RET serves only to wrap up the stack
+ if (entry->getEntry() && entry->getEntry()->op == OP_PRERET)
+ mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;
+ }
+ break;
+ case TGSI_OPCODE_END:
+ {
+ // attach and generate epilogue code
+ BasicBlock *epilogue = reinterpret_cast<BasicBlock *>(leaveBBs.pop().u.p);
+ entryBBs.pop();
+ bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
+ setPosition(epilogue, true);
+ if (prog->getType() == Program::TYPE_FRAGMENT)
+ exportOutputs();
+ if (info->io.clipDistanceCount)
+ handleUserClipPlanes();
+ mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
+ }
+ break;
+ case TGSI_OPCODE_SWITCH:
+ case TGSI_OPCODE_CASE:
+ ERROR("switch/case opcode encountered, should have been lowered\n");
+ abort();
+ break;
+ default:
+ ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
+ assert(0);
+ break;
+ }
+
+ if (tgsi.dstCount()) {
+ for (c = 0; c < 4; ++c) {
+ if (!dst0[c])
+ continue;
+ if (dst0[c] != rDst0[c])
+ mkMov(rDst0[c], dst0[c]);
+ storeDst(0, c, rDst0[c]);
+ }
+ }
+ vtxBaseValid = 0;
+
+ return true;
+}
+
+void
+Converter::handleUserClipPlanes()
+{
+ Value *res[8];
+ int i, c;
+
+ for (c = 0; c < 4; ++c) {
+ for (i = 0; i < info->io.clipDistanceCount; ++i) {
+ Value *ucp;
+ ucp = mkLoad(TYPE_F32, mkSymbol(FILE_MEMORY_CONST, 15, TYPE_F32,
+ i * 16 + c * 4), NULL);
+ if (c == 0)
+ res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
+ else
+ mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
+ }
+ }
+
+ for (i = 0; i < info->io.clipDistanceCount; ++i)
+ mkOp2(OP_WRSV, TYPE_F32, NULL, mkSysVal(SV_CLIP_DISTANCE, i), res[i]);
+}
+
+void
+Converter::exportOutputs()
+{
+ for (unsigned int i = 0; i < info->numOutputs; ++i) {
+ for (unsigned int c = 0; c < 4; ++c) {
+ if (!oData.exists(i, c))
+ continue;
+ Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
+ info->out[i].slot[c] * 4);
+ Value *val = oData.load(i, c, NULL);
+ if (val)
+ mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
+ }
+ }
+}
+
+Converter::Converter(Program *ir, const tgsi::Source *src)
+ : code(src),
+ tgsi(NULL),
+ tData(this), aData(this), pData(this), oData(this)
+{
+ prog = ir;
+ info = code->info;
+
+ DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR;
+
+ tData.setup(0, code->fileSize(TGSI_FILE_TEMPORARY), 4, 4, tFile);
+ pData.setup(0, code->fileSize(TGSI_FILE_PREDICATE), 4, 4, FILE_PREDICATE);
+ aData.setup(0, code->fileSize(TGSI_FILE_ADDRESS), 4, 4, FILE_ADDRESS);
+ oData.setup(0, code->fileSize(TGSI_FILE_OUTPUT), 4, 4, FILE_GPR);
+
+ lData = NULL;
+ iData = NULL;
+
+ zero = mkImm((uint32_t)0);
+
+ vtxBaseValid = 0;
+}
+
+Converter::~Converter()
+{
+ if (lData)
+ delete[] lData;
+ if (iData)
+ delete[] iData;
+}
+
+bool
+Converter::run()
+{
+ BasicBlock *entry = new BasicBlock(prog->main);
+ BasicBlock *leave = new BasicBlock(prog->main);
+
+ if (code->tempArrayCount && !lData) {
+ uint32_t volume = 0;
+ lData = new DataArray[code->tempArrayCount];
+ if (!lData)
+ return false;
+ for (int i = 0; i < code->tempArrayCount; ++i) {
+ int len = code->tempArrays[i].u32 >> 2;
+ int dim = code->tempArrays[i].u32 & 3;
+ lData[i].setParent(this);
+ lData[i].setup(volume, len, dim, 4, FILE_MEMORY_LOCAL);
+ volume += (len * dim * 4 + 0xf) & ~0xf;
+ }
+ }
+ if (code->immdArrayCount && !iData) {
+ uint32_t volume = 0;
+ iData = new DataArray[code->immdArrayCount];
+ if (!iData)
+ return false;
+ for (int i = 0; i < code->immdArrayCount; ++i) {
+ int len = code->immdArrays[i].u32 >> 2;
+ int dim = code->immdArrays[i].u32 & 3;
+ iData[i].setParent(this);
+ iData[i].setup(volume, len, dim, 4, FILE_MEMORY_CONST, 14);
+ volume += (len * dim * 4 + 0xf) & ~0xf;
+ }
+ }
+
+ prog->main->setEntry(entry);
+ prog->main->setExit(leave);
+
+ setPosition(entry, true);
+ entryBBs.push(entry);
+ leaveBBs.push(leave);
+
+ if (info->io.clipDistanceCount) {
+ for (int c = 0; c < 4; ++c)
+ clipVtx[c] = getScratch();
+ }
+
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+ Symbol *sv = mkSysVal(SV_POSITION, 3);
+ fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
+ mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
+ }
+
+ for (ip = 0; ip < code->scan.num_instructions; ++ip) {
+ if (!handleInstruction(&code->insns[ip]))
+ return false;
+ }
+ return true;
+}
+
+} // unnamed namespace
+
+namespace nv50_ir {
+
+bool
+Program::makeFromTGSI(struct nv50_ir_prog_info *info)
+{
+ tgsi::Source src(info);
+ if (!src.scanSource())
+ return false;
+
+ Converter builder(this, &src);
+ return builder.run();
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp
new file mode 100644
index 00000000000..08075751d14
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp
@@ -0,0 +1,381 @@
+
+#include "nv50_ir_graph.h"
+
+namespace nv50_ir {
+
+Graph::Graph()
+{
+ root = NULL;
+ size = 0;
+ sequence = 0;
+}
+
+Graph::~Graph()
+{
+ Iterator *iter = this->safeIteratorDFS();
+
+ for (; !iter->end(); iter->next())
+ reinterpret_cast<Node *>(iter->get())->cut();
+
+ putIterator(iter);
+}
+
+void Graph::insert(Node *node)
+{
+ if (!root) {
+ root = node;
+ size = 1;
+ node->graph = this;
+ } else {
+ root->attach(node, Edge::TREE);
+ }
+}
+
+void Graph::Edge::unlink()
+{
+ if (origin) {
+ prev[0]->next[0] = next[0];
+ next[0]->prev[0] = prev[0];
+ if (origin->out == this)
+ origin->out = (next[0] == this) ? NULL : next[0];
+
+ --origin->outCount;
+ }
+ if (target) {
+ prev[1]->next[1] = next[1];
+ next[1]->prev[1] = prev[1];
+ if (target->in == this)
+ target->in = (next[1] == this) ? NULL : next[1];
+
+ --target->inCount;
+ }
+}
+
+const char *Graph::Edge::typeStr() const
+{
+ switch (type) {
+ case TREE: return "tree";
+ case FORWARD: return "forward";
+ case BACK: return "back";
+ case CROSS: return "cross";
+ case DUMMY: return "dummy";
+ case UNKNOWN:
+ default:
+ return "unk";
+ }
+}
+
+Graph::Node::Node(void *priv) : data(priv),
+ in(0), out(0), graph(0),
+ visited(0),
+ inCount(0), outCount(0)
+{
+ // nothing to do
+}
+
+void Graph::Node::attach(Node *node, Edge::Type kind)
+{
+ Edge *edge = new Edge(this, node, kind);
+
+ // insert head
+ if (this->out) {
+ edge->next[0] = this->out;
+ edge->prev[0] = this->out->prev[0];
+ edge->prev[0]->next[0] = edge;
+ this->out->prev[0] = edge;
+ }
+ this->out = edge;
+
+ if (node->in) {
+ edge->next[1] = node->in;
+ edge->prev[1] = node->in->prev[1];
+ edge->prev[1]->next[1] = edge;
+ node->in->prev[1] = edge;
+ }
+ node->in = edge;
+
+ ++this->outCount;
+ ++node->inCount;
+
+ assert(this->graph);
+ if (!node->graph) {
+ node->graph = this->graph;
+ ++node->graph->size;
+ }
+
+ if (kind == Edge::UNKNOWN)
+ graph->classifyEdges();
+}
+
+bool Graph::Node::detach(Graph::Node *node)
+{
+ EdgeIterator ei = this->outgoing();
+ for (; !ei.end(); ei.next())
+ if (ei.getNode() == node)
+ break;
+ if (ei.end()) {
+ ERROR("no such node attached\n");
+ return false;
+ }
+ delete ei.getEdge();
+ return true;
+}
+
+// Cut a node from the graph, deleting all attached edges.
+void Graph::Node::cut()
+{
+ if (!graph || (!in && !out))
+ return;
+
+ while (out)
+ delete out;
+ while (in)
+ delete in;
+
+ if (graph->root == this)
+ graph->root = NULL;
+}
+
+Graph::Edge::Edge(Node *org, Node *tgt, Type kind)
+{
+ target = tgt;
+ origin = org;
+ type = kind;
+
+ next[0] = next[1] = this;
+ prev[0] = prev[1] = this;
+}
+
+bool
+Graph::Node::reachableBy(Node *node, Node *term)
+{
+ Stack stack;
+ Node *pos;
+ const int seq = graph->nextSequence();
+
+ stack.push(node);
+
+ while (stack.getSize()) {
+ pos = reinterpret_cast<Node *>(stack.pop().u.p);
+
+ if (pos == this)
+ return true;
+ if (pos == term)
+ continue;
+
+ for (EdgeIterator ei = pos->outgoing(); !ei.end(); ei.next()) {
+ if (ei.getType() == Edge::BACK || ei.getType() == Edge::DUMMY)
+ continue;
+ if (ei.getNode()->visit(seq))
+ stack.push(ei.getNode());
+ }
+ }
+ return pos == this;
+}
+
+class DFSIterator : public Graph::GraphIterator
+{
+public:
+ DFSIterator(Graph *graph, const bool preorder)
+ {
+ unsigned int seq = graph->nextSequence();
+
+ nodes = new Graph::Node * [graph->getSize() + 1];
+ count = 0;
+ pos = 0;
+ nodes[graph->getSize()] = 0;
+
+ if (graph->getRoot()) {
+ graph->getRoot()->visit(seq);
+ search(graph->getRoot(), preorder, seq);
+ }
+ }
+
+ ~DFSIterator()
+ {
+ if (nodes)
+ delete[] nodes;
+ }
+
+ void search(Graph::Node *node, const bool preorder, const int sequence)
+ {
+ if (preorder)
+ nodes[count++] = node;
+
+ for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
+ if (ei.getNode()->visit(sequence))
+ search(ei.getNode(), preorder, sequence);
+
+ if (!preorder)
+ nodes[count++] = node;
+ }
+
+ virtual bool end() const { return pos >= count; }
+ virtual void next() { if (pos < count) ++pos; }
+ virtual void *get() const { return nodes[pos]; }
+
+ void reset() { pos = 0; }
+
+protected:
+ Graph::Node **nodes;
+ int count;
+ int pos;
+};
+
+Graph::GraphIterator *Graph::iteratorDFS(bool preorder)
+{
+ return new DFSIterator(this, preorder);
+}
+
+Graph::GraphIterator *Graph::safeIteratorDFS(bool preorder)
+{
+ return this->iteratorDFS(preorder);
+}
+
+class CFGIterator : public Graph::GraphIterator
+{
+public:
+ CFGIterator(Graph *graph)
+ {
+ nodes = new Graph::Node * [graph->getSize() + 1];
+ count = 0;
+ pos = 0;
+ nodes[graph->getSize()] = 0;
+
+ // TODO: argh, use graph->sequence instead of tag and just raise it by > 1
+ Iterator *iter;
+ for (iter = graph->iteratorDFS(); !iter->end(); iter->next())
+ reinterpret_cast<Graph::Node *>(iter->get())->tag = 0;
+ graph->putIterator(iter);
+
+ if (graph->getRoot())
+ search(graph->getRoot(), graph->nextSequence());
+ }
+
+ ~CFGIterator()
+ {
+ if (nodes)
+ delete[] nodes;
+ }
+
+ virtual void *get() const { return nodes[pos]; }
+ virtual bool end() const { return pos >= count; }
+ virtual void next() { if (pos < count) ++pos; }
+
+private:
+ void search(Graph::Node *node, const int sequence)
+ {
+ Stack bb, cross;
+
+ bb.push(node);
+
+ while (bb.getSize()) {
+ node = reinterpret_cast<Graph::Node *>(bb.pop().u.p);
+ assert(node);
+ if (!node->visit(sequence))
+ continue;
+ node->tag = 0;
+
+ for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) {
+ switch (ei.getType()) {
+ case Graph::Edge::TREE:
+ case Graph::Edge::FORWARD:
+ case Graph::Edge::DUMMY:
+ if (++(ei.getNode()->tag) == ei.getNode()->incidentCountFwd())
+ bb.push(ei.getNode());
+ break;
+ case Graph::Edge::BACK:
+ continue;
+ case Graph::Edge::CROSS:
+ if (++(ei.getNode()->tag) == 1)
+ cross.push(ei.getNode());
+ break;
+ default:
+ assert(!"unknown edge kind in CFG");
+ break;
+ }
+ }
+ nodes[count++] = node;
+
+ if (bb.getSize() == 0)
+ cross.moveTo(bb);
+ }
+ }
+
+private:
+ Graph::Node **nodes;
+ int count;
+ int pos;
+};
+
+Graph::GraphIterator *Graph::iteratorCFG()
+{
+ return new CFGIterator(this);
+}
+
+Graph::GraphIterator *Graph::safeIteratorCFG()
+{
+ return this->iteratorCFG();
+}
+
+void Graph::classifyEdges()
+{
+ DFSIterator *iter;
+ int seq;
+
+ for (iter = new DFSIterator(this, true); !iter->end(); iter->next()) {
+ Node *node = reinterpret_cast<Node *>(iter->get());
+ node->visit(0);
+ node->tag = 0;
+ }
+ putIterator(iter);
+
+ classifyDFS(root, (seq = 0));
+
+ sequence = seq;
+}
+
+void Graph::classifyDFS(Node *curr, int& seq)
+{
+ Graph::Edge *edge;
+ Graph::Node *node;
+
+ curr->visit(++seq);
+ curr->tag = 1;
+
+ for (edge = curr->out; edge; edge = edge->next[0]) {
+ node = edge->target;
+ if (edge->type == Edge::DUMMY)
+ continue;
+
+ if (node->getSequence() == 0) {
+ edge->type = Edge::TREE;
+ classifyDFS(node, seq);
+ } else
+ if (node->getSequence() > curr->getSequence()) {
+ edge->type = Edge::FORWARD;
+ } else {
+ edge->type = node->tag ? Edge::BACK : Edge::CROSS;
+ }
+ }
+
+ for (edge = curr->in; edge; edge = edge->next[1]) {
+ node = edge->origin;
+ if (edge->type == Edge::DUMMY)
+ continue;
+
+ if (node->getSequence() == 0) {
+ edge->type = Edge::TREE;
+ classifyDFS(node, seq);
+ } else
+ if (node->getSequence() > curr->getSequence()) {
+ edge->type = Edge::FORWARD;
+ } else {
+ edge->type = node->tag ? Edge::BACK : Edge::CROSS;
+ }
+ }
+
+ curr->tag = 0;
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h
new file mode 100644
index 00000000000..6407ff98ab5
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h
@@ -0,0 +1,207 @@
+
+#ifndef __NV50_IR_GRAPH_H__
+#define __NV50_IR_GRAPH_H__
+
+#include "nv50_ir_util.h"
+
+namespace nv50_ir {
+
+#define ITER_NODE(x) reinterpret_cast<Graph::Node *>((x).get())
+#define ITER_EDGE(x) reinterpret_cast<Graph::Edge *>((x).get())
+
+// A connected graph.
+class Graph
+{
+public:
+ class Node;
+
+ class GraphIterator : public Iterator
+ {
+ public:
+ virtual ~GraphIterator() { };
+ };
+
+ class Edge
+ {
+ public:
+ enum Type
+ {
+ UNKNOWN,
+ TREE,
+ FORWARD,
+ BACK,
+ CROSS, // e.g. loop break
+ DUMMY
+ };
+
+ Edge(Node *dst, Node *src, Type kind);
+ ~Edge() { unlink(); }
+
+ inline Node *getOrigin() const { return origin; }
+ inline Node *getTarget() const { return target; }
+
+ inline Type getType() const { return type; }
+ const char *typeStr() const;
+
+ private:
+ Node *origin;
+ Node *target;
+
+ Type type;
+ Edge *next[2]; // next edge outgoing/incident from/to origin/target
+ Edge *prev[2];
+
+ void unlink();
+
+ friend class Graph;
+ };
+
+ class EdgeIterator : public Iterator
+ {
+ public:
+ EdgeIterator() : e(0), t(0), d(0) { }
+ EdgeIterator(Graph::Edge *first, int dir) : e(first), t(first), d(dir) { }
+
+ virtual void next() { e = (e->next[d] == t) ? 0 : e->next[d]; }
+ virtual bool end() const { return !e; }
+ virtual void *get() const { return e; }
+
+ inline Node *getNode() const { assert(e); return d ?
+ e->origin : e->target; }
+ inline Edge *getEdge() const { return e; }
+ inline Edge::Type getType() { return e ? e->getType() : Edge::UNKNOWN; }
+
+ private:
+ Graph::Edge *e;
+ Graph::Edge *t;
+ int d;
+ };
+
+ class Node
+ {
+ public:
+ Node(void *);
+ ~Node() { cut(); }
+
+ void attach(Node *, Edge::Type);
+ bool detach(Node *);
+ void cut();
+
+ inline EdgeIterator outgoing() const;
+ inline EdgeIterator incident() const;
+
+ inline Node *parent() const; // returns NULL if count(incident edges) != 1
+
+ bool reachableBy(Node *node, Node *term);
+
+ inline bool visit(int);
+ inline int getSequence() const;
+
+ inline int incidentCountFwd() const; // count of incident non-back edges
+ inline int incidentCount() const { return inCount; }
+ inline int outgoingCount() const { return outCount; }
+
+ Graph *getGraph() const { return graph; }
+
+ void *data;
+
+ private:
+ Edge *in;
+ Edge *out;
+ Graph *graph;
+
+ int visited;
+
+ int16_t inCount;
+ int16_t outCount;
+ public:
+ int tag; // for temporary use
+
+ friend class Graph;
+ };
+
+public:
+ Graph();
+ ~Graph(); // does *not* free the nodes (make it an option ?)
+
+ inline Node *getRoot() const { return root; }
+
+ inline unsigned int getSize() const { return size; }
+
+ inline int nextSequence();
+
+ void insert(Node *node); // attach to or set as root
+
+ GraphIterator *iteratorDFS(bool preorder = true);
+ GraphIterator *iteratorCFG();
+
+ // safe iterators are unaffected by changes to the *edges* of the graph
+ GraphIterator *safeIteratorDFS(bool preorder = true);
+ GraphIterator *safeIteratorCFG();
+
+ inline void putIterator(Iterator *); // should be GraphIterator *
+
+ void classifyEdges();
+
+private:
+ void classifyDFS(Node *, int&);
+
+private:
+ Node *root;
+ unsigned int size;
+ int sequence;
+};
+
+int Graph::nextSequence()
+{
+ return ++sequence;
+}
+
+Graph::Node *Graph::Node::parent() const
+{
+ if (inCount != 1)
+ return NULL;
+ assert(in);
+ return in->origin;
+}
+
+bool Graph::Node::visit(int v)
+{
+ if (visited == v)
+ return false;
+ visited = v;
+ return true;
+}
+
+int Graph::Node::getSequence() const
+{
+ return visited;
+}
+
+void Graph::putIterator(Iterator *iter)
+{
+ delete reinterpret_cast<GraphIterator *>(iter);
+}
+
+Graph::EdgeIterator Graph::Node::outgoing() const
+{
+ return EdgeIterator(out, 0);
+}
+
+Graph::EdgeIterator Graph::Node::incident() const
+{
+ return EdgeIterator(in, 1);
+}
+
+int Graph::Node::incidentCountFwd() const
+{
+ int n = 0;
+ for (EdgeIterator ei = incident(); !ei.end(); ei.next())
+ if (ei.getType() != Edge::BACK)
+ ++n;
+ return n;
+}
+
+} // namespace nv50_ir
+
+#endif // __NV50_IR_GRAPH_H__
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
new file mode 100644
index 00000000000..8730e953482
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
@@ -0,0 +1,328 @@
+
+#ifndef __NV50_IR_INLINES_H__
+#define __NV50_IR_INLINES_H__
+
+static inline CondCode reverseCondCode(CondCode cc)
+{
+ static const uint8_t ccRev[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
+
+ return static_cast<CondCode>(ccRev[cc & 7] | (cc & ~7));
+}
+
+static inline CondCode inverseCondCode(CondCode cc)
+{
+ return static_cast<CondCode>(cc ^ 7);
+}
+
+static inline bool isMemoryFile(DataFile f)
+{
+ return (f >= FILE_MEMORY_CONST && f <= FILE_MEMORY_LOCAL);
+}
+
+static inline bool isTextureOp(operation op)
+{
+ return (op >= OP_TEX && op <= OP_TEXCSAA);
+}
+
+static inline unsigned int typeSizeof(DataType ty)
+{
+ switch (ty) {
+ case TYPE_U8:
+ case TYPE_S8:
+ return 1;
+ case TYPE_F16:
+ case TYPE_U16:
+ case TYPE_S16:
+ return 2;
+ case TYPE_F32:
+ case TYPE_U32:
+ case TYPE_S32:
+ return 4;
+ case TYPE_F64:
+ case TYPE_U64:
+ case TYPE_S64:
+ return 8;
+ case TYPE_B96:
+ return 12;
+ case TYPE_B128:
+ return 16;
+ default:
+ return 0;
+ }
+}
+
+static inline DataType typeOfSize(unsigned int size,
+ bool flt = false, bool sgn = false)
+{
+ switch (size) {
+ case 1: return sgn ? TYPE_S8 : TYPE_U8;
+ case 2: return flt ? TYPE_F16 : (sgn ? TYPE_S16 : TYPE_U16);
+ case 8: return flt ? TYPE_F64 : (sgn ? TYPE_S64 : TYPE_U64);
+ case 12: return TYPE_B96;
+ case 16: return TYPE_B128;
+ case 4:
+ default:
+ return flt ? TYPE_F32 : (sgn ? TYPE_S32 : TYPE_U32);
+ }
+}
+
+static inline bool isFloatType(DataType ty)
+{
+ return (ty >= TYPE_F16 && ty <= TYPE_F64);
+}
+
+static inline bool isSignedIntType(DataType ty)
+{
+ return (ty == TYPE_S8 || ty == TYPE_S16 || ty == TYPE_S32);
+}
+
+static inline bool isSignedType(DataType ty)
+{
+ switch (ty) {
+ case TYPE_NONE:
+ case TYPE_U8:
+ case TYPE_U16:
+ case TYPE_U32:
+ case TYPE_B96:
+ case TYPE_B128:
+ return false;
+ default:
+ return true;
+ }
+}
+
+const ValueRef *ValueRef::getIndirect(int dim) const
+{
+ return isIndirect(dim) ? &insn->src[indirect[dim]] : NULL;
+}
+
+DataFile ValueRef::getFile() const
+{
+ return value ? value->reg.file : FILE_NULL;
+}
+
+unsigned int ValueRef::getSize() const
+{
+ return value ? value->reg.size : 0;
+}
+
+Value *ValueRef::rep() const
+{
+ assert(value);
+ return value->join;
+}
+
+Value *ValueDef::rep() const
+{
+ assert(value);
+ return value->join;
+}
+
+DataFile ValueDef::getFile() const
+{
+ return value ? value->reg.file : FILE_NULL;
+}
+
+unsigned int ValueDef::getSize() const
+{
+ return value ? value->reg.size : 0;
+}
+
+void ValueDef::setSSA(LValue *lval)
+{
+ Value *save = value;
+
+ this->set(NULL);
+ prev = reinterpret_cast<ValueDef *>(save);
+ value = lval;
+ lval->defs = this;
+}
+
+void ValueDef::restoreDefList()
+{
+ if (next == this)
+ prev = this;
+}
+
+const LValue *ValueDef::preSSA() const
+{
+ return reinterpret_cast<LValue *>(prev);
+}
+
+Instruction *Value::getInsn() const
+{
+ assert(!defs || getUniqueInsn());
+ return defs ? defs->getInsn() : NULL;
+}
+
+Instruction *Value::getUniqueInsn() const
+{
+ if (defs) {
+ if (join != this) {
+ ValueDef::Iterator it = defs->iterator();
+ while (!it.end() && it.get()->get() != this)
+ it.next();
+ assert(it.get()->get() == this);
+ return it.get()->getInsn();
+ }
+
+ // after regalloc, the definitions of coalesced values are linked
+ if (reg.data.id < 0) {
+ ValueDef::Iterator it = defs->iterator();
+ int nDef;
+ for (nDef = 0; !it.end() && nDef < 2; it.next())
+ if (it.get()->get() == this) // don't count joined values
+ ++nDef;
+ if (nDef > 1)
+ WARN("value %%%i not uniquely defined\n", id); // return NULL ?
+ }
+
+ assert(defs->get() == this);
+ return defs->getInsn();
+ }
+ return NULL;
+}
+
+Value *Instruction::getIndirect(int s, int dim) const
+{
+ return src[s].isIndirect(dim) ? getSrc(src[s].indirect[dim]) : NULL;
+}
+
+Value *Instruction::getPredicate() const
+{
+ return (predSrc >= 0) ? getSrc(predSrc) : NULL;
+}
+
+Value *TexInstruction::getIndirectR() const
+{
+ return tex.rIndirectSrc >= 0 ? getSrc(tex.rIndirectSrc) : NULL;
+}
+
+Value *TexInstruction::getIndirectS() const
+{
+ return tex.rIndirectSrc >= 0 ? getSrc(tex.rIndirectSrc) : NULL;
+}
+
+CmpInstruction *Instruction::asCmp()
+{
+ if (op >= OP_SET_AND && op <= OP_SLCT && op != OP_SELP)
+ return static_cast<CmpInstruction *>(this);
+ return NULL;
+}
+
+const CmpInstruction *Instruction::asCmp() const
+{
+ if (op >= OP_SET_AND && op <= OP_SLCT && op != OP_SELP)
+ return static_cast<const CmpInstruction *>(this);
+ return NULL;
+}
+
+FlowInstruction *Instruction::asFlow()
+{
+ if (op >= OP_BRA && op <= OP_JOIN)
+ return static_cast<FlowInstruction *>(this);
+ return NULL;
+}
+
+const FlowInstruction *Instruction::asFlow() const
+{
+ if (op >= OP_BRA && op <= OP_JOINAT)
+ return static_cast<const FlowInstruction *>(this);
+ return NULL;
+}
+
+TexInstruction *Instruction::asTex()
+{
+ if (op >= OP_TEX && op <= OP_TEXCSAA)
+ return static_cast<TexInstruction *>(this);
+ return NULL;
+}
+
+const TexInstruction *Instruction::asTex() const
+{
+ if (op >= OP_TEX && op <= OP_TEXCSAA)
+ return static_cast<const TexInstruction *>(this);
+ return NULL;
+}
+
+// XXX: use a virtual function so we're really really safe ?
+LValue *Value::asLValue()
+{
+ if (reg.file >= FILE_GPR && reg.file <= FILE_ADDRESS)
+ return static_cast<LValue *>(this);
+ return NULL;
+}
+
+Symbol *Value::asSym()
+{
+ if (reg.file >= FILE_MEMORY_CONST)
+ return static_cast<Symbol *>(this);
+ return NULL;
+}
+
+const Symbol *Value::asSym() const
+{
+ if (reg.file >= FILE_MEMORY_CONST)
+ return static_cast<const Symbol *>(this);
+ return NULL;
+}
+
+void Symbol::setOffset(int32_t offset)
+{
+ reg.data.offset = offset;
+}
+
+void Symbol::setAddress(Symbol *base, int32_t offset)
+{
+ baseSym = base;
+ reg.data.offset = offset;
+}
+
+void Symbol::setSV(SVSemantic sv, uint32_t index)
+{
+ reg.data.sv.sv = sv;
+ reg.data.sv.index = index;
+}
+
+ImmediateValue *Value::asImm()
+{
+ if (reg.file == FILE_IMMEDIATE)
+ return static_cast<ImmediateValue *>(this);
+ return NULL;
+}
+
+const ImmediateValue *Value::asImm() const
+{
+ if (reg.file == FILE_IMMEDIATE)
+ return static_cast<const ImmediateValue *>(this);
+ return NULL;
+}
+
+Value *Value::get(Iterator &it)
+{
+ return reinterpret_cast<Value *>(it.get());
+}
+
+bool BasicBlock::reachableBy(BasicBlock *by, BasicBlock *term)
+{
+ return cfg.reachableBy(&by->cfg, &term->cfg);
+}
+
+BasicBlock *BasicBlock::get(Iterator &iter)
+{
+ return reinterpret_cast<BasicBlock *>(iter.get());
+}
+
+BasicBlock *BasicBlock::get(Graph::Node *node)
+{
+ assert(node);
+ return reinterpret_cast<BasicBlock *>(node->data);
+}
+
+LValue *Function::getLValue(int id)
+{
+ assert((unsigned int)id < (unsigned int)allLValues.getSize());
+ return reinterpret_cast<LValue *>(allLValues.get(id));
+}
+
+#endif // __NV50_IR_INLINES_H__
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
new file mode 100644
index 00000000000..bd331ea8f03
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
@@ -0,0 +1,2192 @@
+
+#include "nv50_ir.h"
+#include "nv50_ir_target.h"
+#include "nv50_ir_build_util.h"
+
+extern "C" {
+#include "util/u_math.h"
+}
+
+namespace nv50_ir {
+
+bool
+Instruction::isNop() const
+{
+ if (op == OP_CONSTRAINT || op == OP_PHI)
+ return true;
+ if (terminator || join) // XXX: should terminator imply flow ?
+ return false;
+ if (!fixed && op == OP_NOP)
+ return true;
+
+ if (def[0].exists() && def[0].rep()->reg.data.id < 0) {
+ for (int d = 1; defExists(d); ++d)
+ if (def[d].rep()->reg.data.id >= 0)
+ WARN("part of vector result is unused !\n");
+ return true;
+ }
+
+ if (op == OP_MOV || op == OP_UNION) {
+ if (!def[0].rep()->equals(getSrc(0)))
+ return false;
+ if (op == OP_UNION)
+ if (!def[0].rep()->equals(getSrc(1)))
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+bool Instruction::isDead() const
+{
+ if (op == OP_STORE ||
+ op == OP_EXPORT)
+ return false;
+
+ for (int d = 0; defExists(d); ++d)
+ if (getDef(d)->refCount() || getDef(d)->reg.data.id >= 0)
+ return false;
+
+ if (terminator || asFlow())
+ return false;
+ if (fixed)
+ return false;
+
+ return true;
+};
+
+// =============================================================================
+
+class CopyPropagation : public Pass
+{
+private:
+ virtual bool visit(BasicBlock *);
+};
+
+// Propagate all MOVs forward to make subsequent optimization easier, except if
+// the sources stem from a phi, in which case we don't want to mess up potential
+// swaps $rX <-> $rY, i.e. do not create live range overlaps of phi src and def.
+bool
+CopyPropagation::visit(BasicBlock *bb)
+{
+ Instruction *mov, *si, *next;
+
+ for (mov = bb->getEntry(); mov; mov = next) {
+ next = mov->next;
+ if (mov->op != OP_MOV || mov->fixed || !mov->getSrc(0)->asLValue())
+ continue;
+ si = mov->getSrc(0)->getInsn();
+ if (mov->getDef(0)->reg.data.id < 0 && si && si->op != OP_PHI) {
+ // propagate
+ mov->def[0].replace(mov->getSrc(0), false);
+ delete_Instruction(prog, mov);
+ }
+ }
+ return true;
+}
+
+// =============================================================================
+
+class LoadPropagation : public Pass
+{
+private:
+ virtual bool visit(BasicBlock *);
+
+ void checkSwapSrc01(Instruction *);
+
+ bool isCSpaceLoad(Instruction *);
+ bool isImmd32Load(Instruction *);
+};
+
+bool
+LoadPropagation::isCSpaceLoad(Instruction *ld)
+{
+ return ld && ld->op == OP_LOAD && ld->src[0].getFile() == FILE_MEMORY_CONST;
+}
+
+bool
+LoadPropagation::isImmd32Load(Instruction *ld)
+{
+ if (!ld || (ld->op != OP_MOV) || (typeSizeof(ld->dType) != 4))
+ return false;
+ return ld->src[0].getFile() == FILE_IMMEDIATE;
+}
+
+void
+LoadPropagation::checkSwapSrc01(Instruction *insn)
+{
+ if (!prog->getTarget()->getOpInfo(insn).commutative)
+ if (insn->op != OP_SET && insn->op != OP_SLCT)
+ return;
+ if (insn->src[1].getFile() != FILE_GPR)
+ return;
+
+ Instruction *i0 = insn->getSrc(0)->getInsn();
+ Instruction *i1 = insn->getSrc(1)->getInsn();
+
+ if (isCSpaceLoad(i0)) {
+ if (!isCSpaceLoad(i1))
+ insn->swapSources(0, 1);
+ else
+ return;
+ } else
+ if (isImmd32Load(i0)) {
+ if (!isCSpaceLoad(i1) && !isImmd32Load(i1))
+ insn->swapSources(0, 1);
+ else
+ return;
+ } else {
+ return;
+ }
+
+ if (insn->op == OP_SET)
+ insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond);
+ else
+ if (insn->op == OP_SLCT)
+ insn->asCmp()->setCond = inverseCondCode(insn->asCmp()->setCond);
+}
+
+bool
+LoadPropagation::visit(BasicBlock *bb)
+{
+ const Target *targ = prog->getTarget();
+ Instruction *next;
+
+ for (Instruction *i = bb->getEntry(); i; i = next) {
+ next = i->next;
+
+ if (i->srcExists(1))
+ checkSwapSrc01(i);
+
+ for (int s = 0; i->srcExists(s); ++s) {
+ Instruction *ld = i->getSrc(s)->getInsn();
+
+ if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV))
+ continue;
+ if (!targ->insnCanLoad(i, s, ld))
+ continue;
+
+ // propagate !
+ i->setSrc(s, ld->getSrc(0));
+ if (ld->src[0].isIndirect(0))
+ i->setIndirect(s, 0, ld->getIndirect(0, 0));
+
+ if (ld->getDef(0)->refCount() == 0)
+ delete_Instruction(prog, ld);
+ }
+ }
+ return true;
+}
+
+// =============================================================================
+
+// Evaluate constant expressions.
+class ConstantFolding : public Pass
+{
+public:
+ bool foldAll(Program *);
+
+private:
+ virtual bool visit(BasicBlock *);
+
+ void expr(Instruction *, ImmediateValue *, ImmediateValue *);
+ void opnd(Instruction *, ImmediateValue *, int s);
+
+ void unary(Instruction *, const ImmediateValue&);
+
+ // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
+ CmpInstruction *findOriginForTestWithZero(Value *);
+
+ unsigned int foldCount;
+
+ BuildUtil bld;
+};
+
+// TODO: remember generated immediates and only revisit these
+bool
+ConstantFolding::foldAll(Program *prog)
+{
+ unsigned int iterCount = 0;
+ do {
+ foldCount = 0;
+ if (!run(prog))
+ return false;
+ } while (foldCount && ++iterCount < 2);
+ return true;
+}
+
+bool
+ConstantFolding::visit(BasicBlock *bb)
+{
+ Instruction *i, *next;
+
+ for (i = bb->getEntry(); i; i = next) {
+ next = i->next;
+ if (i->op == OP_MOV) // continue early, MOV appears frequently
+ continue;
+
+ ImmediateValue *src0 = i->src[0].getImmediate();
+ ImmediateValue *src1 = i->src[1].getImmediate();
+
+ if (src0 && src1)
+ expr(i, src0, src1);
+ else
+ if (src0)
+ opnd(i, src0, 0);
+ else
+ if (src1)
+ opnd(i, src1, 1);
+ }
+ return true;
+}
+
+CmpInstruction *
+ConstantFolding::findOriginForTestWithZero(Value *value)
+{
+ if (!value)
+ return NULL;
+ Instruction *insn = value->getInsn();
+
+ while (insn && insn->op != OP_SET) {
+ Instruction *next = NULL;
+ switch (insn->op) {
+ case OP_NEG:
+ case OP_ABS:
+ case OP_CVT:
+ next = insn->getSrc(0)->getInsn();
+ if (insn->sType != next->dType)
+ return NULL;
+ break;
+ case OP_MOV:
+ next = insn->getSrc(0)->getInsn();
+ break;
+ default:
+ return NULL;
+ }
+ insn = next;
+ }
+ return insn ? insn->asCmp() : NULL;
+}
+
+void
+Modifier::applyTo(ImmediateValue& imm) const
+{
+ switch (imm.reg.type) {
+ case TYPE_F32:
+ if (bits & NV50_IR_MOD_ABS)
+ imm.reg.data.f32 = fabsf(imm.reg.data.f32);
+ if (bits & NV50_IR_MOD_NEG)
+ imm.reg.data.f32 = -imm.reg.data.f32;
+ if (bits & NV50_IR_MOD_SAT) {
+ if (imm.reg.data.f32 < 0.0f)
+ imm.reg.data.f32 = 0.0f;
+ else
+ if (imm.reg.data.f32 > 1.0f)
+ imm.reg.data.f32 = 1.0f;
+ }
+ assert(!(bits & NV50_IR_MOD_NOT));
+ break;
+
+ case TYPE_S8: // NOTE: will be extended
+ case TYPE_S16:
+ case TYPE_S32:
+ case TYPE_U8: // NOTE: treated as signed
+ case TYPE_U16:
+ case TYPE_U32:
+ if (bits & NV50_IR_MOD_ABS)
+ imm.reg.data.s32 = (imm.reg.data.s32 >= 0) ?
+ imm.reg.data.s32 : -imm.reg.data.s32;
+ if (bits & NV50_IR_MOD_NEG)
+ imm.reg.data.s32 = -imm.reg.data.s32;
+ if (bits & NV50_IR_MOD_NOT)
+ imm.reg.data.s32 = ~imm.reg.data.s32;
+ break;
+
+ case TYPE_F64:
+ if (bits & NV50_IR_MOD_ABS)
+ imm.reg.data.f64 = fabs(imm.reg.data.f64);
+ if (bits & NV50_IR_MOD_NEG)
+ imm.reg.data.f64 = -imm.reg.data.f64;
+ if (bits & NV50_IR_MOD_SAT) {
+ if (imm.reg.data.f64 < 0.0)
+ imm.reg.data.f64 = 0.0;
+ else
+ if (imm.reg.data.f64 > 1.0)
+ imm.reg.data.f64 = 1.0;
+ }
+ assert(!(bits & NV50_IR_MOD_NOT));
+ break;
+
+ default:
+ assert(!"invalid/unhandled type");
+ imm.reg.data.u64 = 0;
+ break;
+ }
+}
+
+operation
+Modifier::getOp() const
+{
+ switch (bits) {
+ case NV50_IR_MOD_ABS: return OP_ABS;
+ case NV50_IR_MOD_NEG: return OP_NEG;
+ case NV50_IR_MOD_SAT: return OP_SAT;
+ case NV50_IR_MOD_NOT: return OP_NOT;
+ case 0:
+ return OP_MOV;
+ default:
+ return OP_CVT;
+ }
+}
+
+void
+ConstantFolding::expr(Instruction *i,
+ ImmediateValue *src0, ImmediateValue *src1)
+{
+ ImmediateValue imm0(src0, i->sType);
+ ImmediateValue imm1(src1, i->sType);
+ struct Storage res;
+ struct Storage *const a = &imm0.reg, *const b = &imm1.reg;
+
+ i->src[0].mod.applyTo(imm0);
+ i->src[1].mod.applyTo(imm1);
+
+ switch (i->op) {
+ case OP_MAD:
+ case OP_FMA:
+ case OP_MUL:
+ if (i->dnz && i->dType == TYPE_F32) {
+ if (!isfinite(a->data.f32))
+ a->data.f32 = 0.0f;
+ if (!isfinite(b->data.f32))
+ b->data.f32 = 0.0f;
+ }
+ switch (i->dType) {
+ case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break;
+ case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break;
+ case TYPE_S32:
+ case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break;
+ default:
+ return;
+ }
+ break;
+ case OP_DIV:
+ if (b->data.u32 == 0)
+ break;
+ switch (i->dType) {
+ case TYPE_F32: res.data.f32 = a->data.f32 / b->data.f32; break;
+ case TYPE_F64: res.data.f64 = a->data.f64 / b->data.f64; break;
+ case TYPE_S32: res.data.s32 = a->data.s32 / b->data.s32; break;
+ case TYPE_U32: res.data.u32 = a->data.u32 / b->data.u32; break;
+ default:
+ return;
+ }
+ break;
+ case OP_ADD:
+ switch (i->dType) {
+ case TYPE_F32: res.data.f32 = a->data.f32 + b->data.f32; break;
+ case TYPE_F64: res.data.f64 = a->data.f64 + b->data.f64; break;
+ case TYPE_S32:
+ case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break;
+ default:
+ return;
+ }
+ break;
+ case OP_POW:
+ switch (i->dType) {
+ case TYPE_F32: res.data.f32 = pow(a->data.f32, b->data.f32); break;
+ case TYPE_F64: res.data.f64 = pow(a->data.f64, b->data.f64); break;
+ default:
+ return;
+ }
+ break;
+ case OP_MAX:
+ switch (i->dType) {
+ case TYPE_F32: res.data.f32 = MAX2(a->data.f32, b->data.f32); break;
+ case TYPE_F64: res.data.f64 = MAX2(a->data.f64, b->data.f64); break;
+ case TYPE_S32: res.data.s32 = MAX2(a->data.s32, b->data.s32); break;
+ case TYPE_U32: res.data.u32 = MAX2(a->data.u32, b->data.u32); break;
+ default:
+ return;
+ }
+ break;
+ case OP_MIN:
+ switch (i->dType) {
+ case TYPE_F32: res.data.f32 = MIN2(a->data.f32, b->data.f32); break;
+ case TYPE_F64: res.data.f64 = MIN2(a->data.f64, b->data.f64); break;
+ case TYPE_S32: res.data.s32 = MIN2(a->data.s32, b->data.s32); break;
+ case TYPE_U32: res.data.u32 = MIN2(a->data.u32, b->data.u32); break;
+ default:
+ return;
+ }
+ break;
+ case OP_AND:
+ res.data.u64 = a->data.u64 & b->data.u64;
+ break;
+ case OP_OR:
+ res.data.u64 = a->data.u64 | b->data.u64;
+ break;
+ case OP_XOR:
+ res.data.u64 = a->data.u64 ^ b->data.u64;
+ break;
+ case OP_SHL:
+ res.data.u32 = a->data.u32 << b->data.u32;
+ break;
+ case OP_SHR:
+ switch (i->dType) {
+ case TYPE_S32: res.data.s32 = a->data.s32 >> b->data.u32; break;
+ case TYPE_U32: res.data.u32 = a->data.u32 >> b->data.u32; break;
+ default:
+ return;
+ }
+ break;
+ case OP_SLCT:
+ if (a->data.u32 != b->data.u32)
+ return;
+ res.data.u32 = a->data.u32;
+ break;
+ default:
+ return;
+ }
+ ++foldCount;
+
+ i->src[0].mod = Modifier(0);
+ i->src[1].mod = Modifier(0);
+
+ i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32));
+ i->setSrc(1, NULL);
+
+ i->getSrc(0)->reg.data = res.data;
+
+ if (i->op == OP_MAD || i->op == OP_FMA) {
+ i->op = OP_ADD;
+
+ i->setSrc(1, i->getSrc(0));
+ i->setSrc(0, i->getSrc(2));
+ i->setSrc(2, NULL);
+
+ i->src[1].mod = i->src[2].mod;
+
+ src0 = i->src[0].getImmediate();
+ if (src0)
+ expr(i, src0, i->getSrc(1)->asImm());
+ } else {
+ i->op = OP_MOV;
+ }
+}
+
+void
+ConstantFolding::unary(Instruction *i, const ImmediateValue &imm)
+{
+ Storage res;
+
+ if (i->dType != TYPE_F32)
+ return;
+ switch (i->op) {
+ case OP_NEG: res.data.f32 = -imm.reg.data.f32; break;
+ case OP_ABS: res.data.f32 = fabsf(imm.reg.data.f32); break;
+ case OP_RCP: res.data.f32 = 1.0f / imm.reg.data.f32; break;
+ case OP_RSQ: res.data.f32 = 1.0f / sqrtf(imm.reg.data.f32); break;
+ case OP_LG2: res.data.f32 = log2f(imm.reg.data.f32); break;
+ case OP_EX2: res.data.f32 = exp2f(imm.reg.data.f32); break;
+ case OP_SIN: res.data.f32 = sinf(imm.reg.data.f32); break;
+ case OP_COS: res.data.f32 = cosf(imm.reg.data.f32); break;
+ case OP_SQRT: res.data.f32 = sqrtf(imm.reg.data.f32); break;
+ case OP_PRESIN:
+ case OP_PREEX2:
+ // these should be handled in subsequent OP_SIN/COS/EX2
+ res.data.f32 = imm.reg.data.f32;
+ break;
+ default:
+ return;
+ }
+ i->op = OP_MOV;
+ i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.f32));
+ i->src[0].mod = Modifier(0);
+}
+
+void
+ConstantFolding::opnd(Instruction *i, ImmediateValue *src, int s)
+{
+ const int t = !s;
+ const operation op = i->op;
+
+ ImmediateValue imm(src, i->sType);
+
+ i->src[s].mod.applyTo(imm);
+
+ switch (i->op) {
+ case OP_MUL:
+ if (i->dType == TYPE_F32 && i->getSrc(t)->refCount() == 1) {
+ Instruction *si = i->getSrc(t)->getUniqueInsn();
+
+ if (si && si->op == OP_MUL) {
+ float f = imm.reg.data.f32;
+
+ if (si->src[1].getImmediate()) {
+ f *= si->src[1].getImmediate()->reg.data.f32;
+ si->setSrc(1, new_ImmediateValue(prog, f));
+ i->def[0].replace(i->getSrc(t), false);
+ break;
+ } else {
+ int fac;
+ if (f == 0.125f) fac = -3;
+ else
+ if (f == 0.250f) fac = -2;
+ else
+ if (f == 0.500f) fac = -1;
+ else
+ if (f == 2.000f) fac = +1;
+ else
+ if (f == 4.000f) fac = +2;
+ else
+ if (f == 8.000f) fac = +3;
+ else
+ fac = 0;
+ if (fac) {
+ // FIXME: allowed & modifier
+ si->postFactor = fac;
+ i->def[0].replace(i->getSrc(t), false);
+ break;
+ }
+ }
+ }
+ }
+ if (imm.isInteger(0)) {
+ i->op = OP_MOV;
+ i->setSrc(0, i->getSrc(s));
+ i->setSrc(1, NULL);
+ } else
+ if (imm.isInteger(1) || imm.isInteger(-1)) {
+ if (imm.isNegative())
+ i->src[t].mod = i->src[t].mod ^ Modifier(NV50_IR_MOD_NEG);
+ i->op = i->src[t].mod.getOp();
+ if (s == 0) {
+ i->setSrc(0, i->getSrc(1));
+ i->src[0].mod = i->src[1].mod;
+ i->src[1].mod = 0;
+ }
+ if (i->op != OP_CVT)
+ i->src[0].mod = 0;
+ i->setSrc(1, NULL);
+ } else
+ if (imm.isInteger(2) || imm.isInteger(-2)) {
+ if (imm.isNegative())
+ i->src[t].mod = i->src[t].mod ^ Modifier(NV50_IR_MOD_NEG);
+ i->op = OP_ADD;
+ i->setSrc(s, i->getSrc(t));
+ i->src[s].mod = i->src[t].mod;
+ } else
+ if (!isFloatType(i->sType) && !imm.isNegative() && imm.isPow2()) {
+ i->op = OP_SHL;
+ imm.applyLog2();
+ i->setSrc(1, new_ImmediateValue(prog, imm.reg.data.u32));
+ }
+ break;
+ case OP_ADD:
+ if (imm.isInteger(0)) {
+ if (s == 0) {
+ i->setSrc(0, i->getSrc(1));
+ i->src[0].mod = i->src[1].mod;
+ }
+ i->setSrc(1, NULL);
+ i->op = i->src[0].mod.getOp();
+ if (i->op != OP_CVT)
+ i->src[0].mod = Modifier(0);
+ }
+ break;
+
+ case OP_DIV:
+ if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32))
+ break;
+ bld.setPosition(i, false);
+ if (imm.reg.data.u32 == 0) {
+ break;
+ } else
+ if (imm.reg.data.u32 == 1) {
+ i->op = OP_MOV;
+ i->setSrc(1, NULL);
+ } else
+ if (i->dType == TYPE_U32 && imm.isPow2()) {
+ i->op = OP_SHL;
+ i->setSrc(1, bld.mkImm(util_logbase2(imm.reg.data.u32)));
+ } else
+ if (i->dType == TYPE_U32) {
+ Instruction *mul;
+ Value *tA, *tB;
+ const uint32_t d = imm.reg.data.u32;
+ uint32_t m;
+ int r, s;
+ uint32_t l = util_logbase2(d);
+ if (((uint32_t)1 << l) < d)
+ ++l;
+ m = (((uint64_t)1 << 32) * (((uint64_t)1 << l) - d)) / d + 1;
+ r = l ? 1 : 0;
+ s = l ? (l - 1) : 0;
+
+ tA = bld.getSSA();
+ tB = bld.getSSA();
+ mul = bld.mkOp2(OP_MUL, TYPE_U32, tA, i->getSrc(0),
+ bld.loadImm(NULL, m));
+ mul->subOp = NV50_IR_SUBOP_MUL_HIGH;
+ bld.mkOp2(OP_SUB, TYPE_U32, tB, i->getSrc(0), tA);
+ tA = bld.getSSA();
+ if (r)
+ bld.mkOp2(OP_SHR, TYPE_U32, tA, tB, bld.mkImm(r));
+ else
+ tA = tB;
+ tB = s ? bld.getSSA() : i->getDef(0);
+ bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
+ if (s)
+ bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
+
+ delete_Instruction(prog, i);
+ } else
+ if (imm.reg.data.s32 == -1) {
+ i->op = OP_NEG;
+ i->setSrc(1, NULL);
+ } else {
+ LValue *tA, *tB;
+ LValue *tD;
+ const int32_t d = imm.reg.data.s32;
+ int32_t m;
+ int32_t l = util_logbase2(static_cast<unsigned>(abs(d)));
+ if ((1 << l) < abs(d))
+ ++l;
+ if (!l)
+ l = 1;
+ m = ((uint64_t)1 << (32 + l - 1)) / abs(d) + 1 - ((uint64_t)1 << 32);
+
+ tA = bld.getSSA();
+ tB = bld.getSSA();
+ bld.mkOp3(OP_MAD, TYPE_S32, tA, i->getSrc(0), bld.loadImm(NULL, m),
+ i->getSrc(0))->subOp = NV50_IR_SUBOP_MUL_HIGH;
+ if (l > 1)
+ bld.mkOp2(OP_SHR, TYPE_S32, tB, tA, bld.mkImm(l - 1));
+ else
+ tB = tA;
+ tA = bld.getSSA();
+ bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, i->getSrc(0), bld.mkImm(0));
+ tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue();
+ bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
+ if (d < 0)
+ bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
+
+ delete_Instruction(prog, i);
+ }
+ break;
+
+ case OP_SET: // TODO: SET_AND,OR,XOR
+ {
+ CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
+ CondCode cc, ccZ;
+ if (i->src[t].mod != Modifier(0))
+ return;
+ if (imm.reg.data.u32 != 0 || !si || si->op != OP_SET)
+ return;
+ cc = si->setCond;
+ ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
+ if (s == 0)
+ ccZ = reverseCondCode(ccZ);
+ switch (ccZ) {
+ case CC_LT: cc = CC_FL; break;
+ case CC_GE: cc = CC_TR; break;
+ case CC_EQ: cc = inverseCondCode(cc); break;
+ case CC_LE: cc = inverseCondCode(cc); break;
+ case CC_GT: break;
+ case CC_NE: break;
+ default:
+ return;
+ }
+ i->asCmp()->setCond = cc;
+ i->setSrc(0, si->src[0]);
+ i->setSrc(1, si->src[1]);
+ i->sType = si->sType;
+ }
+ break;
+
+ case OP_SHL:
+ {
+ if (s != 1 || i->src[0].mod != Modifier(0))
+ break;
+ // try to concatenate shifts
+ Instruction *si = i->getSrc(0)->getInsn();
+ if (!si ||
+ si->op != OP_SHL || si->src[1].mod != Modifier(0))
+ break;
+ ImmediateValue *siImm = si->src[1].getImmediate();
+ if (siImm) {
+ bld.setPosition(i, false);
+ i->setSrc(0, si->getSrc(0));
+ i->setSrc(1, bld.loadImm(NULL,
+ imm.reg.data.u32 + siImm->reg.data.u32));
+ }
+ }
+ break;
+
+ case OP_ABS:
+ case OP_NEG:
+ case OP_LG2:
+ case OP_RCP:
+ case OP_SQRT:
+ case OP_RSQ:
+ case OP_PRESIN:
+ case OP_SIN:
+ case OP_COS:
+ case OP_PREEX2:
+ case OP_EX2:
+ unary(i, imm);
+ break;
+ default:
+ return;
+ }
+ if (i->op != op)
+ foldCount++;
+}
+
+// =============================================================================
+
+// Merge modifier operations (ABS, NEG, NOT) into ValueRefs where allowed.
+class ModifierFolding : public Pass
+{
+private:
+ virtual bool visit(BasicBlock *);
+};
+
+bool
+ModifierFolding::visit(BasicBlock *bb)
+{
+ const Target *target = prog->getTarget();
+
+ Instruction *i, *next, *mi;
+ Modifier mod;
+
+ for (i = bb->getEntry(); i; i = next) {
+ next = i->next;
+
+ if (0 && i->op == OP_SUB) {
+ // turn "sub" into "add neg" (do we really want this ?)
+ i->op = OP_ADD;
+ i->src[0].mod = i->src[0].mod ^ Modifier(NV50_IR_MOD_NEG);
+ }
+
+ for (int s = 0; s < 3 && i->srcExists(s); ++s) {
+ mi = i->getSrc(s)->getInsn();
+ if (!mi ||
+ mi->predSrc >= 0 || mi->getDef(0)->refCount() > 8)
+ continue;
+ if (i->sType == TYPE_U32 && mi->dType == TYPE_S32) {
+ if ((i->op != OP_ADD &&
+ i->op != OP_MUL) ||
+ (mi->op != OP_ABS &&
+ mi->op != OP_NEG))
+ continue;
+ } else
+ if (i->sType != mi->dType) {
+ continue;
+ }
+ if ((mod = Modifier(mi->op)) == Modifier(0))
+ continue;
+ mod = mod * mi->src[0].mod;
+
+ if ((i->op == OP_ABS) || i->src[s].mod.abs()) {
+ // abs neg [abs] = abs
+ mod = mod & Modifier(~(NV50_IR_MOD_NEG | NV50_IR_MOD_ABS));
+ } else
+ if ((i->op == OP_NEG) && mod.neg()) {
+ assert(s == 0);
+ // neg as both opcode and modifier on same insn is prohibited
+ // neg neg abs = abs, neg neg = identity
+ mod = mod & Modifier(~NV50_IR_MOD_NEG);
+ i->op = mod.getOp();
+ mod = mod & Modifier(~NV50_IR_MOD_ABS);
+ if (mod == Modifier(0))
+ i->op = OP_MOV;
+ }
+
+ if (target->isModSupported(i, s, mod)) {
+ i->setSrc(s, mi->getSrc(0));
+ i->src[s].mod = i->src[s].mod * mod;
+ }
+ }
+
+ if (i->op == OP_SAT) {
+ mi = i->getSrc(0)->getInsn();
+ if (mi &&
+ mi->getDef(0)->refCount() <= 1 && target->isSatSupported(mi)) {
+ mi->saturate = 1;
+ mi->setDef(0, i->getDef(0));
+ delete_Instruction(prog, i);
+ }
+ }
+ }
+
+ return true;
+}
+
+// =============================================================================
+
+// MUL + ADD -> MAD/FMA
+// MIN/MAX(a, a) -> a, etc.
+// SLCT(a, b, const) -> cc(const) ? a : b
+// RCP(RCP(a)) -> a
+// MUL(MUL(a, b), const) -> MUL_Xconst(a, b)
+class AlgebraicOpt : public Pass
+{
+private:
+ virtual bool visit(BasicBlock *);
+
+ void handleADD(Instruction *);
+ void handleMINMAX(Instruction *);
+ void handleRCP(Instruction *);
+ void handleSLCT(Instruction *);
+ void handleLOGOP(Instruction *);
+ void handleCVT(Instruction *);
+};
+
+void
+AlgebraicOpt::handleADD(Instruction *add)
+{
+ Value *src0 = add->getSrc(0);
+ Value *src1 = add->getSrc(1);
+ Value *src;
+ int s;
+ Modifier mod[4];
+
+ if (!prog->getTarget()->isOpSupported(OP_MAD, add->dType))
+ return;
+
+ if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
+ return;
+
+ if (src0->refCount() == 1 &&
+ src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_MUL)
+ s = 0;
+ else
+ if (src1->refCount() == 1 &&
+ src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_MUL)
+ s = 1;
+ else
+ return;
+
+ if ((src0->getUniqueInsn() && src0->getUniqueInsn()->bb != add->bb) ||
+ (src1->getUniqueInsn() && src1->getUniqueInsn()->bb != add->bb))
+ return;
+
+ src = add->getSrc(s);
+
+ mod[0] = add->src[0].mod;
+ mod[1] = add->src[1].mod;
+ mod[2] = src->getUniqueInsn()->src[0].mod;
+ mod[3] = src->getUniqueInsn()->src[1].mod;
+
+ if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & Modifier(~NV50_IR_MOD_NEG))
+ return;
+
+ add->op = OP_MAD;
+ add->subOp = src->getInsn()->subOp; // potentially mul-high
+
+ add->setSrc(2, add->src[s ? 0 : 1]);
+
+ add->setSrc(0, src->getInsn()->getSrc(0));
+ add->src[0].mod = mod[2] ^ mod[s];
+ add->setSrc(1, src->getInsn()->getSrc(1));
+ add->src[1].mod = mod[3];
+}
+
+void
+AlgebraicOpt::handleMINMAX(Instruction *minmax)
+{
+ Value *src0 = minmax->getSrc(0);
+ Value *src1 = minmax->getSrc(1);
+
+ if (src0 != src1 || src0->reg.file != FILE_GPR)
+ return;
+ if (minmax->src[0].mod == minmax->src[1].mod) {
+ if (minmax->src[0].mod) {
+ minmax->op = OP_CVT;
+ minmax->setSrc(1, NULL);
+ } else {
+ minmax->def[0].replace(minmax->getSrc(0), false);
+ minmax->bb->remove(minmax);
+ }
+ } else {
+ // TODO:
+ // min(x, -x) = -abs(x)
+ // min(x, -abs(x)) = -abs(x)
+ // min(x, abs(x)) = x
+ // max(x, -abs(x)) = x
+ // max(x, abs(x)) = abs(x)
+ // max(x, -x) = abs(x)
+ }
+}
+
+void
+AlgebraicOpt::handleRCP(Instruction *rcp)
+{
+ Instruction *si = rcp->getSrc(0)->getUniqueInsn();
+
+ if (si && si->op == OP_RCP) {
+ Modifier mod = rcp->src[0].mod * si->src[0].mod;
+ rcp->op = mod.getOp();
+ rcp->setSrc(0, si->getSrc(0));
+ }
+}
+
+void
+AlgebraicOpt::handleSLCT(Instruction *slct)
+{
+ if (slct->getSrc(2)->reg.file == FILE_IMMEDIATE) {
+ if (slct->getSrc(2)->asImm()->compare(slct->asCmp()->setCond, 0.0f))
+ slct->setSrc(0, slct->getSrc(1));
+ } else
+ if (slct->getSrc(0) != slct->getSrc(1)) {
+ return;
+ }
+ slct->op = OP_MOV;
+ slct->setSrc(1, NULL);
+ slct->setSrc(2, NULL);
+}
+
+void
+AlgebraicOpt::handleLOGOP(Instruction *logop)
+{
+ Value *src0 = logop->getSrc(0);
+ Value *src1 = logop->getSrc(1);
+
+ if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
+ return;
+
+ if (src0 == src1) {
+ if (logop->src[0].mod != Modifier(0) ||
+ logop->src[1].mod != Modifier(0))
+ return;
+ if (logop->op == OP_AND || logop->op == OP_OR) {
+ logop->def[0].replace(logop->getSrc(0), false);
+ delete_Instruction(prog, logop);
+ }
+ } else {
+ // try AND(SET, SET) -> SET_AND(SET)
+ Instruction *set0 = src0->getInsn();
+ Instruction *set1 = src1->getInsn();
+
+ if (!set0 || set0->fixed || !set1 || set1->fixed)
+ return;
+ if (set1->op != OP_SET) {
+ Instruction *xchg = set0;
+ set0 = set1;
+ set1 = xchg;
+ if (set1->op != OP_SET)
+ return;
+ }
+ if (set0->op != OP_SET &&
+ set0->op != OP_SET_AND &&
+ set0->op != OP_SET_OR &&
+ set0->op != OP_SET_XOR)
+ return;
+ if (set0->getDef(0)->refCount() > 1 &&
+ set1->getDef(0)->refCount() > 1)
+ return;
+ if (set0->getPredicate() || set1->getPredicate())
+ return;
+ // check that they don't source each other
+ for (int s = 0; s < 2; ++s)
+ if (set0->getSrc(s) == set1->getDef(0) ||
+ set1->getSrc(s) == set0->getDef(0))
+ return;
+
+ set0 = set0->clone(true);
+ set1 = set1->clone(false);
+ logop->bb->insertAfter(logop, set1);
+ logop->bb->insertAfter(logop, set0);
+
+ set0->dType = TYPE_U8;
+ set0->getDef(0)->reg.file = FILE_PREDICATE;
+ set0->getDef(0)->reg.size = 1;
+ set1->setSrc(2, set0->getDef(0));
+ switch (logop->op) {
+ case OP_AND: set1->op = OP_SET_AND; break;
+ case OP_OR: set1->op = OP_SET_OR; break;
+ case OP_XOR: set1->op = OP_SET_XOR; break;
+ default:
+ assert(0);
+ break;
+ }
+ set1->setDef(0, logop->getDef(0));
+ delete_Instruction(prog, logop);
+ }
+}
+
+// F2I(NEG(SET with result 1.0f/0.0f)) -> SET with result -1/0
+void
+AlgebraicOpt::handleCVT(Instruction *cvt)
+{
+ if (cvt->sType != TYPE_F32 ||
+ cvt->dType != TYPE_S32 || cvt->src[0].mod != Modifier(0))
+ return;
+ Instruction *insn = cvt->getSrc(0)->getInsn();
+ if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32)
+ return;
+ if (insn->src[0].mod != Modifier(0))
+ return;
+ insn = insn->getSrc(0)->getInsn();
+ if (!insn || insn->op != OP_SET || insn->dType != TYPE_F32)
+ return;
+
+ Instruction *bset = insn->clone(false);
+ bset->dType = TYPE_U32;
+ bset->setDef(0, cvt->getDef(0));
+ cvt->bb->insertAfter(cvt, bset);
+ delete_Instruction(prog, cvt);
+}
+
+bool
+AlgebraicOpt::visit(BasicBlock *bb)
+{
+ Instruction *next;
+ for (Instruction *i = bb->getEntry(); i; i = next) {
+ next = i->next;
+ switch (i->op) {
+ case OP_ADD:
+ handleADD(i);
+ break;
+ case OP_RCP:
+ handleRCP(i);
+ break;
+ case OP_MIN:
+ case OP_MAX:
+ handleMINMAX(i);
+ break;
+ case OP_SLCT:
+ handleSLCT(i);
+ break;
+ case OP_AND:
+ case OP_OR:
+ case OP_XOR:
+ handleLOGOP(i);
+ break;
+ case OP_CVT:
+ handleCVT(i);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return true;
+}
+
+// =============================================================================
+
+static inline void
+updateLdStOffset(Instruction *ldst, int32_t offset, Function *fn)
+{
+ if (offset != ldst->getSrc(0)->reg.data.offset) {
+ if (ldst->getSrc(0)->refCount() > 1)
+ ldst->setSrc(0, ldst->getSrc(0)->clone(fn));
+ ldst->getSrc(0)->reg.data.offset = offset;
+ }
+}
+
+// Combine loads and stores, forward stores to loads where possible.
+class MemoryOpt : public Pass
+{
+private:
+ class Record
+ {
+ public:
+ Record *next;
+ Instruction *insn;
+ const Value *rel[2];
+ const Value *base;
+ int32_t offset;
+ int8_t fileIndex;
+ uint8_t size;
+ bool locked;
+ Record *prev;
+
+ bool overlaps(const Instruction *ldst) const;
+
+ inline void link(Record **);
+ inline void unlink(Record **);
+ inline void set(const Instruction *ldst);
+ };
+
+public:
+ MemoryOpt();
+
+ Record *loads[DATA_FILE_COUNT];
+ Record *stores[DATA_FILE_COUNT];
+
+ MemoryPool recordPool;
+
+private:
+ virtual bool visit(BasicBlock *);
+ bool runOpt(BasicBlock *);
+
+ Record **getList(const Instruction *);
+
+ Record *findRecord(const Instruction *, bool load, bool& isAdjacent) const;
+
+ // merge @insn into load/store instruction from @rec
+ bool combineLd(Record *rec, Instruction *ld);
+ bool combineSt(Record *rec, Instruction *st);
+
+ bool replaceLdFromLd(Instruction *ld, Record *ldRec);
+ bool replaceLdFromSt(Instruction *ld, Record *stRec);
+ bool replaceStFromSt(Instruction *restrict st, Record *stRec);
+
+ void addRecord(Instruction *ldst);
+ void purgeRecords(Instruction *const st, DataFile);
+ void lockStores(Instruction *const ld);
+ void reset();
+
+private:
+ Record *prevRecord;
+};
+
+MemoryOpt::MemoryOpt() : recordPool(sizeof(MemoryOpt::Record), 6)
+{
+ for (int i = 0; i < DATA_FILE_COUNT; ++i) {
+ loads[i] = NULL;
+ stores[i] = NULL;
+ }
+ prevRecord = NULL;
+}
+
+void
+MemoryOpt::reset()
+{
+ for (unsigned int i = 0; i < DATA_FILE_COUNT; ++i) {
+ Record *it, *next;
+ for (it = loads[i]; it; it = next) {
+ next = it->next;
+ recordPool.release(it);
+ }
+ loads[i] = NULL;
+ for (it = stores[i]; it; it = next) {
+ next = it->next;
+ recordPool.release(it);
+ }
+ stores[i] = NULL;
+ }
+}
+
+bool
+MemoryOpt::combineLd(Record *rec, Instruction *ld)
+{
+ int32_t offRc = rec->offset;
+ int32_t offLd = ld->getSrc(0)->reg.data.offset;
+ int sizeRc = rec->size;
+ int sizeLd = typeSizeof(ld->dType);
+ int size = sizeRc + sizeLd;
+ int d, j;
+
+ // only VFETCH can do a 96 byte load
+ if (ld->op != OP_VFETCH && size == 12)
+ return false;
+ // no unaligned loads
+ if (((size == 0x8) && (MIN2(offLd, offRc) & 0x7)) ||
+ ((size == 0xc) && (MIN2(offLd, offRc) & 0xf)))
+ return false;
+
+ assert(sizeRc + sizeLd <= 16 && offRc != offLd);
+
+ for (j = 0; sizeRc; sizeRc -= rec->insn->getDef(j)->reg.size, ++j);
+
+ if (offLd < offRc) {
+ int sz;
+ for (sz = 0, d = 0; sz < sizeLd; sz += ld->getDef(d)->reg.size, ++d);
+ // d: nr of definitions in ld
+ // j: nr of definitions in rec->insn, move:
+ for (d = d + j - 1; j > 0; --j, --d)
+ rec->insn->setDef(d, rec->insn->getDef(j - 1));
+
+ if (rec->insn->getSrc(0)->refCount() > 1)
+ rec->insn->setSrc(0, rec->insn->getSrc(0)->clone(func));
+ rec->offset = rec->insn->getSrc(0)->reg.data.offset = offLd;
+
+ d = 0;
+ } else {
+ d = j;
+ }
+ // move definitions of @ld to @rec->insn
+ for (j = 0; sizeLd; ++j, ++d) {
+ sizeLd -= ld->getDef(j)->reg.size;
+ rec->insn->setDef(d, ld->getDef(j));
+ }
+
+ rec->size = size;
+ rec->insn->setType(typeOfSize(size));
+
+ delete_Instruction(prog, ld);
+
+ return true;
+}
+
+bool
+MemoryOpt::combineSt(Record *rec, Instruction *st)
+{
+ int32_t offRc = rec->offset;
+ int32_t offSt = st->getSrc(0)->reg.data.offset;
+ int sizeRc = rec->size;
+ int sizeSt = typeSizeof(st->dType);
+ int s = sizeSt / 4;
+ int size = sizeRc + sizeSt;
+ int j, k;
+ Value *src[4]; // no modifiers in ValueRef allowed for st
+ Value *extra[3];
+
+ if (size == 12) // XXX: check if EXPORT a[] can do this after all
+ return false;
+ if (size == 8 && MIN2(offRc, offSt) & 0x7)
+ return false;
+
+ st->takeExtraSources(0, extra); // save predicate and indirect address
+
+ if (offRc < offSt) {
+ // save values from @st
+ for (s = 0; sizeSt; ++s) {
+ sizeSt -= st->getSrc(s + 1)->reg.size;
+ src[s] = st->getSrc(s + 1);
+ }
+ // set record's values as low sources of @st
+ for (j = 1; sizeRc; ++j) {
+ sizeRc -= st->getSrc(j)->reg.size;
+ st->setSrc(j, rec->insn->getSrc(j));
+ }
+ // set saved values as high sources of @st
+ for (k = j, j = 0; j < s; ++j)
+ st->setSrc(k++, src[j]);
+
+ updateLdStOffset(st, offRc, func);
+ } else {
+ for (j = 1; sizeSt; ++j)
+ sizeSt -= st->getSrc(j)->reg.size;
+ for (s = 1; sizeRc; ++j, ++s) {
+ sizeRc -= rec->insn->getSrc(s)->reg.size;
+ st->setSrc(j, rec->insn->getSrc(s));
+ }
+ rec->offset = offSt;
+ }
+ st->putExtraSources(0, extra); // restore pointer and predicate
+
+ delete_Instruction(prog, rec->insn);
+ rec->insn = st;
+ rec->size = size;
+ rec->insn->setType(typeOfSize(size));
+ return true;
+}
+
+void
+MemoryOpt::Record::set(const Instruction *ldst)
+{
+ const Symbol *mem = ldst->getSrc(0)->asSym();
+ fileIndex = mem->reg.fileIndex;
+ rel[0] = ldst->getIndirect(0, 0);
+ rel[1] = ldst->getIndirect(0, 1);
+ offset = mem->reg.data.offset;
+ base = mem->getBase();
+ size = typeSizeof(ldst->sType);
+}
+
+void
+MemoryOpt::Record::link(Record **list)
+{
+ next = *list;
+ if (next)
+ next->prev = this;
+ prev = NULL;
+ *list = this;
+}
+
+void
+MemoryOpt::Record::unlink(Record **list)
+{
+ if (next)
+ next->prev = prev;
+ if (prev)
+ prev->next = next;
+ else
+ *list = next;
+}
+
+MemoryOpt::Record **
+MemoryOpt::getList(const Instruction *insn)
+{
+ if (insn->op == OP_LOAD || insn->op == OP_VFETCH)
+ return &loads[insn->src[0].getFile()];
+ return &stores[insn->src[0].getFile()];
+}
+
+void
+MemoryOpt::addRecord(Instruction *i)
+{
+ Record **list = getList(i);
+ Record *it = reinterpret_cast<Record *>(recordPool.allocate());
+
+ it->link(list);
+ it->set(i);
+ it->insn = i;
+ it->locked = false;
+}
+
+MemoryOpt::Record *
+MemoryOpt::findRecord(const Instruction *insn, bool load, bool& isAdj) const
+{
+ const Symbol *sym = insn->getSrc(0)->asSym();
+ const int size = typeSizeof(insn->sType);
+ Record *rec = NULL;
+ Record *it = load ? loads[sym->reg.file] : stores[sym->reg.file];
+
+ for (; it; it = it->next) {
+ if (it->locked && insn->op != OP_LOAD)
+ continue;
+ if ((it->offset >> 4) != (sym->reg.data.offset >> 4) ||
+ it->rel[0] != insn->getIndirect(0, 0) ||
+ it->fileIndex != sym->reg.fileIndex ||
+ it->rel[1] != insn->getIndirect(0, 1))
+ continue;
+
+ if (it->offset < sym->reg.data.offset) {
+ if (it->offset + it->size >= sym->reg.data.offset) {
+ isAdj = (it->offset + it->size == sym->reg.data.offset);
+ if (!isAdj)
+ return it;
+ if (!(it->offset & 0x7))
+ rec = it;
+ }
+ } else {
+ isAdj = it->offset != sym->reg.data.offset;
+ if (size <= it->size && !isAdj)
+ return it;
+ else
+ if (!(sym->reg.data.offset & 0x7))
+ if (it->offset - size <= sym->reg.data.offset)
+ rec = it;
+ }
+ }
+ return rec;
+}
+
+bool
+MemoryOpt::replaceLdFromSt(Instruction *ld, Record *rec)
+{
+ Instruction *st = rec->insn;
+ int32_t offSt = rec->offset;
+ int32_t offLd = ld->getSrc(0)->reg.data.offset;
+ int d, s;
+
+ for (s = 1; offSt != offLd && st->srcExists(s); ++s)
+ offSt += st->getSrc(s)->reg.size;
+ if (offSt != offLd)
+ return false;
+
+ for (d = 0; ld->defExists(d) && st->srcExists(s); ++d, ++s) {
+ if (ld->getDef(d)->reg.size != st->getSrc(s)->reg.size)
+ return false;
+ if (st->getSrc(s)->reg.file != FILE_GPR)
+ return false;
+ ld->def[d].replace(st->getSrc(s), false);
+ }
+ ld->bb->remove(ld);
+ return true;
+}
+
+bool
+MemoryOpt::replaceLdFromLd(Instruction *ldE, Record *rec)
+{
+ Instruction *ldR = rec->insn;
+ int32_t offR = rec->offset;
+ int32_t offE = ldE->getSrc(0)->reg.data.offset;
+ int dR, dE;
+
+ assert(offR <= offE);
+ for (dR = 0; offR < offE && ldR->defExists(dR); ++dR)
+ offR += ldR->getDef(dR)->reg.size;
+ if (offR != offE)
+ return false;
+
+ for (dE = 0; ldE->defExists(dE) && ldR->defExists(dR); ++dE, ++dR) {
+ if (ldE->getDef(dE)->reg.size != ldR->getDef(dR)->reg.size)
+ return false;
+ ldE->def[dE].replace(ldR->getDef(dR), false);
+ }
+
+ delete_Instruction(prog, ldE);
+ return true;
+}
+
+bool
+MemoryOpt::replaceStFromSt(Instruction *restrict st, Record *rec)
+{
+ const Instruction *const ri = rec->insn;
+ Value *extra[3];
+
+ int32_t offS = st->getSrc(0)->reg.data.offset;
+ int32_t offR = rec->offset;
+ int32_t endS = offS + typeSizeof(st->dType);
+ int32_t endR = offR + typeSizeof(ri->dType);
+
+ rec->size = MAX2(endS, endR) - MIN2(offS, offR);
+
+ st->takeExtraSources(0, extra);
+
+ if (offR < offS) {
+ Value *vals[4];
+ int s, n;
+ int k = 0;
+ // get non-replaced sources of ri
+ for (s = 1; offR < offS; offR += ri->getSrc(s)->reg.size, ++s)
+ vals[k++] = ri->getSrc(s);
+ n = s;
+ // get replaced sources of st
+ for (s = 1; st->srcExists(s); offS += st->getSrc(s)->reg.size, ++s)
+ vals[k++] = st->getSrc(s);
+ // skip replaced sources of ri
+ for (s = n; offR < endS; offR += ri->getSrc(s)->reg.size, ++s);
+ // get non-replaced sources after values covered by st
+ for (; offR < endR; offR += ri->getSrc(s)->reg.size, ++s)
+ vals[k++] = ri->getSrc(s);
+ for (s = 0; s < k; ++s)
+ st->setSrc(s + 1, vals[s]);
+ st->setSrc(0, ri->getSrc(0));
+ } else
+ if (endR > endS) {
+ int j, s;
+ for (j = 1; offR < endS; offR += ri->getSrc(j++)->reg.size);
+ for (s = 1; offS < endS; offS += st->getSrc(s++)->reg.size);
+ for (; offR < endR; offR += ri->getSrc(j++)->reg.size)
+ st->setSrc(s++, ri->getSrc(j));
+ }
+ st->putExtraSources(0, extra);
+
+ delete_Instruction(prog, rec->insn);
+
+ rec->insn = st;
+ rec->offset = st->getSrc(0)->reg.data.offset;
+
+ st->setType(typeOfSize(rec->size));
+
+ return true;
+}
+
+bool
+MemoryOpt::Record::overlaps(const Instruction *ldst) const
+{
+ Record that;
+ that.set(ldst);
+
+ if (this->fileIndex != that.fileIndex)
+ return false;
+
+ if (this->rel[0] || that.rel[0])
+ return this->base == that.base;
+ return
+ (this->offset < that.offset + that.size) &&
+ (this->offset + this->size > that.offset);
+}
+
+// We must not eliminate stores that affect the result of @ld if
+// we find later stores to the same location, and we may no longer
+// merge them with later stores.
+// The stored value can, however, still be used to determine the value
+// returned by future loads.
+void
+MemoryOpt::lockStores(Instruction *const ld)
+{
+ for (Record *r = stores[ld->src[0].getFile()]; r; r = r->next)
+ if (!r->locked && r->overlaps(ld))
+ r->locked = true;
+}
+
+// Prior loads from the location of @st are no longer valid.
+// Stores to the location of @st may no longer be used to derive
+// the value at it nor be coalesced into later stores.
+void
+MemoryOpt::purgeRecords(Instruction *const st, DataFile f)
+{
+ if (st)
+ f = st->src[0].getFile();
+
+ for (Record *r = loads[f]; r; r = r->next)
+ if (!st || r->overlaps(st))
+ r->unlink(&loads[f]);
+
+ for (Record *r = stores[f]; r; r = r->next)
+ if (!st || r->overlaps(st))
+ r->unlink(&stores[f]);
+}
+
+bool
+MemoryOpt::visit(BasicBlock *bb)
+{
+ bool ret = runOpt(bb);
+ // Run again, one pass won't combine 4 32 bit ld/st to a single 128 bit ld/st
+ // where 96 bit memory operations are forbidden.
+ if (ret)
+ ret = runOpt(bb);
+ return ret;
+}
+
+bool
+MemoryOpt::runOpt(BasicBlock *bb)
+{
+ Instruction *ldst, *next;
+ Record *rec;
+ bool isAdjacent = true;
+
+ for (ldst = bb->getEntry(); ldst; ldst = next) {
+ bool keep = true;
+ bool isLoad = true;
+ next = ldst->next;
+
+ if (ldst->op == OP_LOAD || ldst->op == OP_VFETCH) {
+ if (ldst->isDead()) {
+ // might have been produced by earlier optimization
+ delete_Instruction(prog, ldst);
+ continue;
+ }
+ } else
+ if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) {
+ isLoad = false;
+ } else {
+ // TODO: maybe have all fixed ops act as barrier ?
+ if (ldst->op == OP_CALL) {
+ purgeRecords(NULL, FILE_MEMORY_LOCAL);
+ purgeRecords(NULL, FILE_MEMORY_GLOBAL);
+ purgeRecords(NULL, FILE_MEMORY_SHARED);
+ purgeRecords(NULL, FILE_SHADER_OUTPUT);
+ } else
+ if (ldst->op == OP_EMIT || ldst->op == OP_RESTART) {
+ purgeRecords(NULL, FILE_SHADER_OUTPUT);
+ }
+ continue;
+ }
+ if (ldst->getPredicate()) // TODO: handle predicated ld/st
+ continue;
+
+ if (isLoad) {
+ DataFile file = ldst->src[0].getFile();
+
+ // if ld l[]/g[] look for previous store to eliminate the reload
+ if (file == FILE_MEMORY_GLOBAL || file == FILE_MEMORY_LOCAL) {
+ // TODO: shared memory ?
+ rec = findRecord(ldst, false, isAdjacent);
+ if (rec && !isAdjacent)
+ keep = !replaceLdFromSt(ldst, rec);
+ }
+
+ // or look for ld from the same location and replace this one
+ rec = keep ? findRecord(ldst, true, isAdjacent) : NULL;
+ if (rec) {
+ if (!isAdjacent)
+ keep = !replaceLdFromLd(ldst, rec);
+ else
+ // or combine a previous load with this one
+ keep = !combineLd(rec, ldst);
+ }
+ if (keep)
+ lockStores(ldst);
+ } else {
+ rec = findRecord(ldst, false, isAdjacent);
+ if (rec) {
+ if (!isAdjacent)
+ keep = !replaceStFromSt(ldst, rec);
+ else
+ keep = !combineSt(rec, ldst);
+ }
+ if (keep)
+ purgeRecords(ldst, DATA_FILE_COUNT);
+ }
+ if (keep)
+ addRecord(ldst);
+ }
+ reset();
+
+ return true;
+}
+
+// =============================================================================
+
+// Turn control flow into predicated instructions (after register allocation !).
+// TODO:
+// Could move this to before register allocation on NVC0 and also handle nested
+// constructs.
+class FlatteningPass : public Pass
+{
+private:
+ virtual bool visit(BasicBlock *);
+
+ bool tryPredicateConditional(BasicBlock *);
+ void predicateInstructions(BasicBlock *, Value *pred, CondCode cc);
+ void tryPropagateBranch(BasicBlock *);
+ inline bool isConstantCondition(Value *pred);
+ inline bool mayPredicate(const Instruction *, const Value *pred) const;
+ inline void removeFlow(Instruction *);
+};
+
+bool
+FlatteningPass::isConstantCondition(Value *pred)
+{
+ Instruction *insn = pred->getUniqueInsn();
+ assert(insn);
+ if (insn->op != OP_SET || insn->srcExists(2))
+ return false;
+
+ for (int s = 0; s < 2 && insn->srcExists(s); ++s) {
+ Instruction *ld = insn->getSrc(s)->getUniqueInsn();
+ DataFile file;
+ if (ld) {
+ if (ld->op != OP_MOV && ld->op != OP_LOAD)
+ return false;
+ if (ld->src[0].isIndirect(0))
+ return false;
+ file = ld->src[0].getFile();
+ } else {
+ file = insn->src[s].getFile();
+ // catch $r63 on NVC0
+ if (file == FILE_GPR && insn->getSrc(s)->reg.data.id > prog->maxGPR)
+ file = FILE_IMMEDIATE;
+ }
+ if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST)
+ return false;
+ }
+ return true;
+}
+
+void
+FlatteningPass::removeFlow(Instruction *insn)
+{
+ FlowInstruction *term = insn ? insn->asFlow() : NULL;
+ if (!term)
+ return;
+ Graph::Edge::Type ty = term->bb->cfg.outgoing().getType();
+
+ if (term->op == OP_BRA) {
+ // TODO: this might get more difficult when we get arbitrary BRAs
+ if (ty == Graph::Edge::CROSS || ty == Graph::Edge::BACK)
+ return;
+ } else
+ if (term->op != OP_JOIN)
+ return;
+
+ delete_Instruction(prog, term);
+
+ Value *pred = term->getPredicate();
+
+ if (pred && pred->refCount() == 0) {
+ Instruction *pSet = pred->getUniqueInsn();
+ pred->join->reg.data.id = -1; // deallocate
+ if (pSet->isDead())
+ delete_Instruction(prog, pSet);
+ }
+}
+
+void
+FlatteningPass::predicateInstructions(BasicBlock *bb, Value *pred, CondCode cc)
+{
+ for (Instruction *i = bb->getEntry(); i; i = i->next) {
+ if (i->isNop())
+ continue;
+ assert(!i->getPredicate());
+ i->setPredicate(cc, pred);
+ }
+ removeFlow(bb->getExit());
+}
+
+bool
+FlatteningPass::mayPredicate(const Instruction *insn, const Value *pred) const
+{
+ if (insn->isPseudo())
+ return true;
+ // TODO: calls where we don't know which registers are modified
+
+ if (!prog->getTarget()->mayPredicate(insn, pred))
+ return false;
+ for (int d = 0; insn->defExists(d); ++d)
+ if (insn->getDef(d)->equals(pred))
+ return false;
+ return true;
+}
+
+// If we conditionally skip over or to a branch instruction, replace it.
+// NOTE: We do not update the CFG anymore here !
+void
+FlatteningPass::tryPropagateBranch(BasicBlock *bb)
+{
+ BasicBlock *bf = NULL;
+ unsigned int i;
+
+ if (bb->cfg.outgoingCount() != 2)
+ return;
+ if (!bb->getExit() || bb->getExit()->op != OP_BRA)
+ return;
+ Graph::EdgeIterator ei = bb->cfg.outgoing();
+
+ for (i = 0; !ei.end(); ++i, ei.next()) {
+ bf = BasicBlock::get(ei.getNode());
+ if (bf->getInsnCount() == 1)
+ break;
+ }
+ if (ei.end() || !bf->getExit())
+ return;
+ FlowInstruction *bra = bb->getExit()->asFlow();
+ FlowInstruction *rep = bf->getExit()->asFlow();
+
+ if (rep->getPredicate())
+ return;
+ if (rep->op != OP_BRA &&
+ rep->op != OP_JOIN &&
+ rep->op != OP_EXIT)
+ return;
+
+ bra->op = rep->op;
+ bra->target.bb = rep->target.bb;
+ if (i) // 2nd out block means branch not taken
+ bra->cc = inverseCondCode(bra->cc);
+ bf->remove(rep);
+}
+
+bool
+FlatteningPass::visit(BasicBlock *bb)
+{
+ if (tryPredicateConditional(bb))
+ return true;
+
+ // try to attach join to previous instruction
+ Instruction *insn = bb->getExit();
+ if (insn && insn->op == OP_JOIN && !insn->getPredicate()) {
+ insn = insn->prev;
+ if (insn && !insn->getPredicate() && !insn->asFlow() && !insn->isNop()) {
+ insn->join = 1;
+ bb->remove(bb->getExit());
+ return true;
+ }
+ }
+
+ tryPropagateBranch(bb);
+
+ return true;
+}
+
+bool
+FlatteningPass::tryPredicateConditional(BasicBlock *bb)
+{
+ BasicBlock *bL = NULL, *bR = NULL;
+ unsigned int nL = 0, nR = 0, limit = 12;
+ Instruction *insn;
+ unsigned int mask;
+
+ mask = bb->initiatesSimpleConditional();
+ if (!mask)
+ return false;
+
+ assert(bb->getExit());
+ Value *pred = bb->getExit()->getPredicate();
+ assert(pred);
+
+ if (isConstantCondition(pred))
+ limit = 4;
+
+ Graph::EdgeIterator ei = bb->cfg.outgoing();
+
+ if (mask & 1) {
+ bL = BasicBlock::get(ei.getNode());
+ for (insn = bL->getEntry(); insn; insn = insn->next, ++nL)
+ if (!mayPredicate(insn, pred))
+ return false;
+ if (nL > limit)
+ return false; // too long, do a real branch
+ }
+ ei.next();
+
+ if (mask & 2) {
+ bR = BasicBlock::get(ei.getNode());
+ for (insn = bR->getEntry(); insn; insn = insn->next, ++nR)
+ if (!mayPredicate(insn, pred))
+ return false;
+ if (nR > limit)
+ return false; // too long, do a real branch
+ }
+
+ if (bL)
+ predicateInstructions(bL, pred, bb->getExit()->cc);
+ if (bR)
+ predicateInstructions(bR, pred, inverseCondCode(bb->getExit()->cc));
+
+ if (bb->joinAt) {
+ bb->remove(bb->joinAt);
+ bb->joinAt = NULL;
+ }
+ removeFlow(bb->getExit()); // delete the branch/join at the fork point
+
+ // remove potential join operations at the end of the conditional
+ if (prog->getTarget()->joinAnterior) {
+ bb = BasicBlock::get((bL ? bL : bR)->cfg.outgoing().getNode());
+ if (bb->getEntry() && bb->getEntry()->op == OP_JOIN)
+ removeFlow(bb->getEntry());
+ }
+
+ return true;
+}
+
+// =============================================================================
+
+// Common subexpression elimination. Stupid O^2 implementation.
+class LocalCSE : public Pass
+{
+private:
+ virtual bool visit(BasicBlock *);
+
+ inline bool tryReplace(Instruction **, Instruction *);
+
+ DLList ops[OP_LAST + 1];
+};
+
+class GlobalCSE : public Pass
+{
+private:
+ virtual bool visit(BasicBlock *);
+};
+
+bool
+Instruction::isActionEqual(const Instruction *that) const
+{
+ if (this->op != that->op ||
+ this->dType != that->dType ||
+ this->sType != that->sType)
+ return false;
+ if (this->cc != that->cc)
+ return false;
+
+ if (this->asTex()) {
+ if (memcmp(&this->asTex()->tex,
+ &that->asTex()->tex,
+ sizeof(this->asTex()->tex)))
+ return false;
+ } else
+ if (this->asCmp()) {
+ if (this->asCmp()->setCond != that->asCmp()->setCond)
+ return false;
+ } else
+ if (this->asFlow()) {
+ return false;
+ } else {
+ if (this->atomic != that->atomic ||
+ this->ipa != that->ipa ||
+ this->lanes != that->lanes ||
+ this->perPatch != that->perPatch)
+ return false;
+ if (this->postFactor != that->postFactor)
+ return false;
+ }
+
+ if (this->subOp != that->subOp ||
+ this->saturate != that->saturate ||
+ this->rnd != that->rnd ||
+ this->ftz != that->ftz ||
+ this->dnz != that->dnz ||
+ this->cache != that->cache)
+ return false;
+
+ return true;
+}
+
+bool
+Instruction::isResultEqual(const Instruction *that) const
+{
+ unsigned int d, s;
+
+ // NOTE: location of discard only affects tex with liveOnly and quadops
+ if (!this->defExists(0) && this->op != OP_DISCARD)
+ return false;
+
+ if (!isActionEqual(that))
+ return false;
+
+ if (this->predSrc != that->predSrc)
+ return false;
+
+ for (d = 0; this->defExists(d); ++d) {
+ if (!that->defExists(d) ||
+ !this->getDef(d)->equals(that->getDef(d), false))
+ return false;
+ }
+ if (that->defExists(d))
+ return false;
+
+ for (s = 0; this->srcExists(s); ++s) {
+ if (!that->srcExists(s))
+ return false;
+ if (this->src[s].mod != that->src[s].mod)
+ return false;
+ if (!this->getSrc(s)->equals(that->getSrc(s), true))
+ return false;
+ }
+ if (that->srcExists(s))
+ return false;
+
+ if (op == OP_LOAD || op == OP_VFETCH) {
+ switch (src[0].getFile()) {
+ case FILE_MEMORY_CONST:
+ case FILE_SHADER_INPUT:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// pull through common expressions from different in-blocks
+bool
+GlobalCSE::visit(BasicBlock *bb)
+{
+ Instruction *phi, *next, *ik;
+ int s;
+
+ for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = next) {
+ next = phi->next;
+ if (phi->getSrc(0)->refCount() > 1)
+ continue;
+ ik = phi->getSrc(0)->getInsn();
+ for (s = 1; phi->srcExists(s); ++s) {
+ if (phi->getSrc(s)->refCount() > 1)
+ break;
+ if (!phi->getSrc(s)->getInsn()->isResultEqual(ik))
+ break;
+ }
+ if (!phi->srcExists(s)) {
+ Instruction *entry = bb->getEntry();
+ ik->bb->remove(ik);
+ if (!entry || entry->op != OP_JOIN)
+ bb->insertHead(ik);
+ else
+ bb->insertAfter(entry, ik);
+ ik->setDef(0, phi->getDef(0));
+ delete_Instruction(prog, phi);
+ }
+ }
+
+ return true;
+}
+
+bool
+LocalCSE::tryReplace(Instruction **ptr, Instruction *i)
+{
+ Instruction *old = *ptr;
+ if (!old->isResultEqual(i))
+ return false;
+ for (int d = 0; old->defExists(d); ++d)
+ old->def[d].replace(i->getDef(d), false);
+ delete_Instruction(prog, old);
+ *ptr = NULL;
+ return true;
+}
+
+bool
+LocalCSE::visit(BasicBlock *bb)
+{
+ unsigned int replaced;
+
+ do {
+ Instruction *ir, *next;
+
+ replaced = 0;
+
+ // will need to know the order of instructions
+ int serial = 0;
+ for (ir = bb->getEntry(); ir; ir = ir->next)
+ ir->serial = serial++;
+
+ for (ir = bb->getEntry(); ir; ir = next) {
+ int s;
+ Value *src = NULL;
+
+ next = ir->next;
+
+ if (ir->fixed) {
+ ops[ir->op].insert(ir);
+ continue;
+ }
+
+ for (s = 0; ir->srcExists(s); ++s)
+ if (ir->getSrc(s)->asLValue())
+ if (!src || ir->getSrc(s)->refCount() < src->refCount())
+ src = ir->getSrc(s);
+
+ if (src) {
+ for (ValueRef::Iterator refs = src->uses->iterator(); !refs.end();
+ refs.next()) {
+ Instruction *ik = refs.get()->getInsn();
+ if (ik->serial < ir->serial && ik->bb == ir->bb)
+ if (tryReplace(&ir, ik))
+ break;
+ }
+ } else {
+ DLLIST_FOR_EACH(&ops[ir->op], iter)
+ {
+ Instruction *ik = reinterpret_cast<Instruction *>(iter.get());
+ if (tryReplace(&ir, ik))
+ break;
+ }
+ }
+
+ if (ir)
+ ops[ir->op].insert(ir);
+ else
+ ++replaced;
+ }
+ for (unsigned int i = 0; i <= OP_LAST; ++i)
+ ops[i].clear();
+
+ } while (replaced);
+
+ return true;
+}
+
+// =============================================================================
+
+// Remove computations of unused values.
+class DeadCodeElim : public Pass
+{
+public:
+ bool buryAll(Program *);
+
+private:
+ virtual bool visit(BasicBlock *);
+
+ void checkSplitLoad(Instruction *ld); // for partially dead loads
+
+ unsigned int deadCount;
+};
+
+bool
+DeadCodeElim::buryAll(Program *prog)
+{
+ do {
+ deadCount = 0;
+ if (!this->run(prog, false, false))
+ return false;
+ } while (deadCount);
+
+ return true;
+}
+
+bool
+DeadCodeElim::visit(BasicBlock *bb)
+{
+ Instruction *next;
+
+ for (Instruction *i = bb->getFirst(); i; i = next) {
+ next = i->next;
+ if (i->isDead()) {
+ ++deadCount;
+ delete_Instruction(prog, i);
+ } else
+ if (i->defExists(1) && (i->op == OP_VFETCH || i->op == OP_LOAD)) {
+ checkSplitLoad(i);
+ }
+ }
+ return true;
+}
+
+void
+DeadCodeElim::checkSplitLoad(Instruction *ld1)
+{
+ Instruction *ld2 = NULL; // can get at most 2 loads
+ Value *def1[4];
+ Value *def2[4];
+ int32_t addr1, addr2;
+ int32_t size1, size2;
+ int d, n1, n2;
+ uint32_t mask = 0xffffffff;
+
+ for (d = 0; ld1->defExists(d); ++d)
+ if (!ld1->getDef(d)->refCount() && ld1->getDef(d)->reg.data.id < 0)
+ mask &= ~(1 << d);
+ if (mask == 0xffffffff)
+ return;
+
+ addr1 = ld1->getSrc(0)->reg.data.offset;
+ n1 = n2 = 0;
+ size1 = size2 = 0;
+ for (d = 0; ld1->defExists(d); ++d) {
+ if (mask & (1 << d)) {
+ if (size1 && (addr1 & 0x7))
+ break;
+ def1[n1] = ld1->getDef(d);
+ size1 += def1[n1++]->reg.size;
+ } else
+ if (!n1) {
+ addr1 += ld1->getDef(d)->reg.size;
+ } else {
+ break;
+ }
+ }
+ for (addr2 = addr1 + size1; ld1->defExists(d); ++d) {
+ if (mask & (1 << d)) {
+ def2[n2] = ld1->getDef(d);
+ size2 += def2[n2++]->reg.size;
+ } else {
+ assert(!n2);
+ addr2 += ld1->getDef(d)->reg.size;
+ }
+ }
+
+ updateLdStOffset(ld1, addr1, func);
+ ld1->setType(typeOfSize(size1));
+ for (d = 0; d < 4; ++d)
+ ld1->setDef(d, (d < n1) ? def1[d] : NULL);
+
+ if (!n2)
+ return;
+
+ ld2 = ld1->clone(false);
+ updateLdStOffset(ld2, addr2, func);
+ ld2->setType(typeOfSize(size2));
+ for (d = 0; d < 4; ++d)
+ ld2->setDef(d, (d < n2) ? def2[d] : NULL);
+
+ ld1->bb->insertAfter(ld1, ld2);
+}
+
+// =============================================================================
+
+#define RUN_PASS(l, n, f) \
+ if (level >= (l)) { \
+ if (dbgFlags & NV50_IR_DEBUG_VERBOSE) \
+ INFO("PEEPHOLE: %s\n", #n); \
+ n pass; \
+ if (!pass.f(this)) \
+ return false; \
+ }
+
+bool
+Program::optimizeSSA(int level)
+{
+ RUN_PASS(1, DeadCodeElim, buryAll);
+ RUN_PASS(1, CopyPropagation, run);
+ RUN_PASS(2, GlobalCSE, run);
+ RUN_PASS(1, LocalCSE, run);
+ RUN_PASS(2, AlgebraicOpt, run);
+ RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks
+ RUN_PASS(1, ConstantFolding, foldAll);
+ RUN_PASS(1, LoadPropagation, run);
+ RUN_PASS(2, MemoryOpt, run);
+ RUN_PASS(2, LocalCSE, run);
+ RUN_PASS(0, DeadCodeElim, buryAll);
+ return true;
+}
+
+bool
+Program::optimizePostRA(int level)
+{
+ RUN_PASS(2, FlatteningPass, run);
+ return true;
+}
+
+}
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
new file mode 100644
index 00000000000..b5ca3814098
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
@@ -0,0 +1,558 @@
+
+#include "nv50_ir.h"
+#include "nv50_ir_target.h"
+
+namespace nv50_ir {
+
+enum TextStyle
+{
+ TXT_DEFAULT,
+ TXT_GPR,
+ TXT_REGISTER,
+ TXT_FLAGS,
+ TXT_MEM,
+ TXT_IMMD,
+ TXT_BRA,
+ TXT_INSN
+};
+
+static const char *colour[8] =
+{
+#if 1
+ "\x1b[00m",
+ "\x1b[34m",
+ "\x1b[35m",
+ "\x1b[35m",
+ "\x1b[36m",
+ "\x1b[33m",
+ "\x1b[37m",
+ "\x1b[32m"
+#else
+ "", "", "", "", "", "", "", ""
+#endif
+};
+
+const char *operationStr[OP_LAST + 1] =
+{
+ "nop",
+ "phi",
+ "union",
+ "split",
+ "merge",
+ "consec",
+ "mov",
+ "ld",
+ "st",
+ "add",
+ "sub",
+ "mul",
+ "div",
+ "mod",
+ "mad",
+ "fma",
+ "sad",
+ "abs",
+ "neg",
+ "not",
+ "and",
+ "or",
+ "xor",
+ "shl",
+ "shr",
+ "max",
+ "min",
+ "sat",
+ "ceil",
+ "floor",
+ "trunc",
+ "cvt",
+ "set and",
+ "set or",
+ "set xor",
+ "set",
+ "selp",
+ "slct",
+ "rcp",
+ "rsq",
+ "lg2",
+ "sin",
+ "cos",
+ "ex2",
+ "exp",
+ "log",
+ "presin",
+ "preex2",
+ "sqrt",
+ "pow",
+ "bra",
+ "call",
+ "ret",
+ "cont",
+ "break",
+ "preret",
+ "precont",
+ "prebreak",
+ "brkpt",
+ "joinat",
+ "join",
+ "discard",
+ "exit",
+ "barrier",
+ "vfetch",
+ "pfetch",
+ "export",
+ "linterp",
+ "pinterp",
+ "emit",
+ "restart",
+ "tex",
+ "texbias",
+ "texlod",
+ "texfetch",
+ "texquery",
+ "texgrad",
+ "texgather",
+ "texcsaa",
+ "suld",
+ "sust",
+ "dfdx",
+ "dfdy",
+ "rdsv",
+ "wrsv",
+ "pixld",
+ "quadop",
+ "quadon",
+ "quadpop",
+ "popcnt",
+ "insbf",
+ "extbf",
+ "(invalid)"
+};
+
+static const char *DataTypeStr[] =
+{
+ "-",
+ "u8", "s8",
+ "u16", "s16",
+ "u32", "s32",
+ "u64", "s64",
+ "f16", "f32", "f64",
+ "b96", "b128"
+};
+
+static const char *RoundModeStr[] =
+{
+ "", "rm", "rz", "rp", "rni", "rmi", "rzi", "rpi"
+};
+
+static const char *CondCodeStr[] =
+{
+ "never",
+ "lt",
+ "eq",
+ "le",
+ "gt",
+ "ne",
+ "ge",
+ "",
+ "(invalid)",
+ "ltu",
+ "equ",
+ "leu",
+ "gtu",
+ "neu",
+ "geu",
+ "",
+ "no",
+ "nc",
+ "ns",
+ "na",
+ "a",
+ "s",
+ "c",
+ "o"
+};
+
+static const char *SemanticStr[SV_LAST + 1] =
+{
+ "POSITION",
+ "VERTEX_ID",
+ "INSTANCE_ID",
+ "INVOCATION_ID",
+ "PRIMITIVE_ID",
+ "VERTEX_COUNT",
+ "LAYER",
+ "VIEWPORT_INDEX",
+ "Y_DIR",
+ "FACE",
+ "POINT_SIZE",
+ "POINT_COORD",
+ "CLIP_DISTANCE",
+ "SAMPLE_INDEX",
+ "TESS_FACTOR",
+ "TESS_COORD",
+ "TID",
+ "CTAID",
+ "NTID",
+ "GRIDID",
+ "NCTAID",
+ "LANEID",
+ "PHYSID",
+ "NPHYSID",
+ "CLOCK",
+ "LBASE",
+ "SBASE",
+ "?",
+ "(INVALID)"
+};
+
+#define PRINT(args...) \
+ do { \
+ pos += snprintf(&buf[pos], size - pos, args); \
+ } while(0)
+
+#define SPACE_PRINT(cond, args...) \
+ do { \
+ if (cond) \
+ buf[pos++] = ' '; \
+ pos += snprintf(&buf[pos], size - pos, args); \
+ } while(0)
+
+#define SPACE() \
+ do { \
+ if (pos < size) \
+ buf[pos++] = ' '; \
+ } while(0)
+
+int Modifier::print(char *buf, size_t size) const
+{
+ size_t pos = 0;
+
+ if (bits)
+ PRINT("%s", colour[TXT_INSN]);
+
+ size_t base = pos;
+
+ if (bits & NV50_IR_MOD_NOT)
+ PRINT("not");
+ if (bits & NV50_IR_MOD_SAT)
+ SPACE_PRINT(pos > base && pos < size, "sat");
+ if (bits & NV50_IR_MOD_NEG)
+ SPACE_PRINT(pos > base && pos < size, "neg");
+ if (bits & NV50_IR_MOD_ABS)
+ SPACE_PRINT(pos > base && pos < size, "abs");
+
+ return pos;
+}
+
+int LValue::print(char *buf, size_t size, DataType ty) const
+{
+ const char *postFix = "";
+ size_t pos = 0;
+ int idx = join->reg.data.id >= 0 ? join->reg.data.id : id;
+ char p = join->reg.data.id >= 0 ? '$' : '%';
+ char r;
+ int col = TXT_DEFAULT;
+
+ switch (reg.file) {
+ case FILE_GPR:
+ r = 'r'; col = TXT_GPR;
+ if (reg.size == 8)
+ postFix = "d";
+ else
+ if (reg.size == 16)
+ postFix = "q";
+ break;
+ case FILE_PREDICATE:
+ r = 'p'; col = TXT_REGISTER;
+ if (reg.size == 2)
+ postFix = "d";
+ else
+ if (reg.size == 4)
+ postFix = "q";
+ break;
+ case FILE_FLAGS:
+ r = 'c'; col = TXT_FLAGS;
+ break;
+ case FILE_ADDRESS:
+ r = 'a'; col = TXT_REGISTER;
+ break;
+ default:
+ assert(!"invalid file for lvalue");
+ r = '?';
+ break;
+ }
+
+ PRINT("%s%c%c%i%s", colour[col], p, r, idx, postFix);
+
+ return pos;
+}
+
+int ImmediateValue::print(char *buf, size_t size, DataType ty) const
+{
+ size_t pos = 0;
+
+ PRINT("%s", colour[TXT_IMMD]);
+
+ switch (ty) {
+ case TYPE_F32: PRINT("%f", reg.data.f32); break;
+ case TYPE_F64: PRINT("%f", reg.data.f64); break;
+ case TYPE_U8: PRINT("0x%02x", reg.data.u8); break;
+ case TYPE_S8: PRINT("%i", reg.data.s8); break;
+ case TYPE_U16: PRINT("0x%04x", reg.data.u16); break;
+ case TYPE_S16: PRINT("%i", reg.data.s16); break;
+ case TYPE_U32: PRINT("0x%08x", reg.data.u32); break;
+ case TYPE_S32: PRINT("%i", reg.data.s32); break;
+ case TYPE_U64:
+ case TYPE_S64:
+ default:
+ PRINT("0x%016lx", reg.data.u64);
+ break;
+ }
+ return pos;
+}
+
+int Symbol::print(char *buf, size_t size, DataType ty) const
+{
+ return print(buf, size, NULL, NULL, ty);
+}
+
+int Symbol::print(char *buf, size_t size,
+ Value *rel, Value *dimRel, DataType ty) const
+{
+ size_t pos = 0;
+ char c;
+
+ if (ty == TYPE_NONE)
+ ty = typeOfSize(reg.size);
+
+ if (reg.file == FILE_SYSTEM_VALUE) {
+ PRINT("%ssv[%s%s:%i%s", colour[TXT_MEM],
+ colour[TXT_REGISTER],
+ SemanticStr[reg.data.sv.sv], reg.data.sv.index, colour[TXT_MEM]);
+ if (rel) {
+ PRINT("%s+", colour[TXT_DEFAULT]);
+ pos += rel->print(&buf[pos], size - pos);
+ }
+ PRINT("%s]", colour[TXT_MEM]);
+ return pos;
+ }
+
+ switch (reg.file) {
+ case FILE_MEMORY_CONST: c = 'c'; break;
+ case FILE_SHADER_INPUT: c = 'a'; break;
+ case FILE_SHADER_OUTPUT: c = 'o'; break;
+ case FILE_MEMORY_GLOBAL: c = 'g'; break;
+ case FILE_MEMORY_SHARED: c = 's'; break;
+ case FILE_MEMORY_LOCAL: c = 'l'; break;
+ default:
+ assert(!"invalid file");
+ c = '?';
+ break;
+ }
+
+ if (c == 'c')
+ PRINT("%s%c%i[", colour[TXT_MEM], c, reg.fileIndex);
+ else
+ PRINT("%s%c[", colour[TXT_MEM], c);
+
+ if (dimRel) {
+ pos += dimRel->print(&buf[pos], size - pos, TYPE_S32);
+ PRINT("%s][", colour[TXT_MEM]);
+ }
+
+ if (rel) {
+ pos += rel->print(&buf[pos], size - pos);
+ PRINT("%s%c", colour[TXT_DEFAULT], (reg.data.offset < 0) ? '-' : '+');
+ } else {
+ assert(reg.data.offset >= 0);
+ }
+ PRINT("%s0x%x%s]", colour[TXT_IMMD], abs(reg.data.offset), colour[TXT_MEM]);
+
+ return pos;
+}
+
+void Instruction::print() const
+{
+ #define BUFSZ 512
+
+ const size_t size = BUFSZ;
+
+ char buf[BUFSZ];
+ int s, d;
+ size_t pos = 0;
+
+ PRINT("%s", colour[TXT_INSN]);
+
+ if (join)
+ PRINT("join ");
+
+ if (predSrc >= 0) {
+ const size_t pre = pos;
+ if (getSrc(predSrc)->reg.file == FILE_PREDICATE) {
+ if (cc == CC_NOT_P)
+ PRINT("not");
+ } else {
+ PRINT("%s", CondCodeStr[cc]);
+ }
+ if (pos > pre + 1)
+ SPACE();
+ pos += src[predSrc].get()->print(&buf[pos], BUFSZ - pos);
+ PRINT(" %s", colour[TXT_INSN]);
+ }
+
+ if (saturate)
+ PRINT("sat ");
+
+ if (asFlow()) {
+ PRINT("%s", operationStr[op]);
+ if (op == OP_CALL && asFlow()->builtin) {
+ PRINT(" %sBUILTIN:%i", colour[TXT_BRA], asFlow()->target.builtin);
+ } else
+ if (op == OP_CALL && asFlow()->target.fn) {
+ PRINT(" %s%s", colour[TXT_BRA], asFlow()->target.fn->getName());
+ } else
+ if (asFlow()->target.bb)
+ PRINT(" %sBB:%i", colour[TXT_BRA], asFlow()->target.bb->getId());
+ } else {
+ PRINT("%s ", operationStr[op]);
+ if (perPatch)
+ PRINT("patch ");
+ if (asTex())
+ PRINT("%s ", asTex()->tex.target.getName());
+ if (postFactor)
+ PRINT("x2^%i ", postFactor);
+ PRINT("%s%s", dnz ? "dnz " : (ftz ? "ftz " : ""), DataTypeStr[dType]);
+ }
+
+ if (rnd != ROUND_N)
+ PRINT(" %s", RoundModeStr[rnd]);
+
+ if (def[1].exists())
+ PRINT(" {");
+ for (d = 0; defExists(d); ++d) {
+ SPACE();
+ pos += def[d].get()->print(&buf[pos], size - pos);
+ }
+ if (d > 1)
+ PRINT(" %s}", colour[TXT_INSN]);
+ else
+ if (!d && !asFlow())
+ PRINT(" %s#", colour[TXT_INSN]);
+
+ if (asCmp())
+ PRINT(" %s%s", colour[TXT_INSN], CondCodeStr[asCmp()->setCond]);
+
+ if (sType != dType)
+ PRINT(" %s%s", colour[TXT_INSN], DataTypeStr[sType]);
+
+ for (s = 0; srcExists(s); ++s) {
+ if (s == predSrc || src[s].usedAsPtr)
+ continue;
+ const size_t pre = pos;
+ SPACE();
+ pos += src[s].mod.print(&buf[pos], BUFSZ - pos);
+ if (pos > pre + 1)
+ SPACE();
+ if (src[s].isIndirect(0) || src[s].isIndirect(1))
+ pos += src[s].get()->asSym()->print(&buf[pos], BUFSZ - pos,
+ getIndirect(s, 0),
+ getIndirect(s, 1));
+ else
+ pos += src[s].get()->print(&buf[pos], BUFSZ - pos, sType);
+ }
+
+ PRINT("%s", colour[TXT_DEFAULT]);
+
+ buf[MIN2(pos, BUFSZ - 1)] = 0;
+
+ INFO("%s (%u)\n", buf, encSize);
+}
+
+class PrintPass : public Pass
+{
+public:
+ PrintPass() : serial(0) { }
+
+ virtual bool visit(Function *);
+ virtual bool visit(BasicBlock *);
+ virtual bool visit(Instruction *);
+
+private:
+ int serial;
+};
+
+bool
+PrintPass::visit(Function *fn)
+{
+ INFO("\n%s:\n", fn->getName());
+
+ return true;
+}
+
+bool
+PrintPass::visit(BasicBlock *bb)
+{
+#if 0
+ INFO("---\n");
+ for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next())
+ INFO(" <- BB:%i (%s)\n",
+ BasicBlock::get(ei.getNode())->getId(),
+ ei.getEdge()->typeStr());
+#endif
+ INFO("BB:%i (%u instructions) - ", bb->getId(), bb->getInsnCount());
+
+ if (bb->idom())
+ INFO("idom = BB:%i, ", bb->idom()->getId());
+
+ INFO("df = { ");
+ for (DLList::Iterator df = bb->getDF().iterator(); !df.end(); df.next())
+ INFO("BB:%i ", BasicBlock::get(df)->getId());
+
+ INFO("}\n");
+
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next())
+ INFO(" -> BB:%i (%s)\n",
+ BasicBlock::get(ei.getNode())->getId(),
+ ei.getEdge()->typeStr());
+
+ return true;
+}
+
+bool
+PrintPass::visit(Instruction *insn)
+{
+ INFO("%3i: ", serial++);
+ insn->print();
+ return true;
+}
+
+void
+Function::print()
+{
+ PrintPass pass;
+ pass.run(this, true, false);
+}
+
+void
+Program::print()
+{
+ PrintPass pass;
+ pass.run(this, true, false);
+}
+
+void
+Function::printLiveIntervals() const
+{
+ INFO("printing live intervals ...\n");
+
+ for (ArrayList::Iterator it = allLValues.iterator(); !it.end(); it.next()) {
+ const Value *lval = Value::get(it)->asLValue();
+ if (lval && !lval->livei.isEmpty()) {
+ INFO("livei(%%%i): ", lval->id);
+ lval->livei.print();
+ }
+ }
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp
new file mode 100644
index 00000000000..7e3c44d3b15
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp
@@ -0,0 +1,963 @@
+
+#include "nv50_ir.h"
+#include "nv50_ir_target.h"
+
+#include "nv50/nv50_debug.h"
+
+namespace nv50_ir {
+
+#define MAX_REGISTER_FILE_SIZE 256
+
+class RegisterSet
+{
+public:
+ RegisterSet();
+ RegisterSet(const Target *);
+
+ void init(const Target *);
+ void reset(); // reset allocation status, but not max assigned regs
+
+ void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
+ void intersect(DataFile f, const RegisterSet *);
+
+ bool assign(Value **, int nr);
+ void release(const Value *);
+ void occupy(const Value *);
+
+ int getMaxAssigned(DataFile f) const { return fill[f]; }
+
+ void print() const;
+
+private:
+ uint32_t bits[FILE_ADDRESS + 1][(MAX_REGISTER_FILE_SIZE + 31) / 32];
+
+ int unit[FILE_ADDRESS + 1]; // log2 of allocation granularity
+
+ int last[FILE_ADDRESS + 1];
+ int fill[FILE_ADDRESS + 1];
+};
+
+void
+RegisterSet::reset()
+{
+ memset(bits, 0, sizeof(bits));
+}
+
+RegisterSet::RegisterSet()
+{
+ reset();
+}
+
+void
+RegisterSet::init(const Target *targ)
+{
+ for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) {
+ DataFile f = static_cast<DataFile>(rf);
+ last[rf] = targ->getFileSize(f) - 1;
+ unit[rf] = targ->getFileUnit(f);
+ fill[rf] = -1;
+ assert(last[rf] < MAX_REGISTER_FILE_SIZE);
+ }
+}
+
+RegisterSet::RegisterSet(const Target *targ)
+{
+ reset();
+ init(targ);
+}
+
+void
+RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
+{
+ for (int i = 0; i < (last[f] + 31) / 32; ++i)
+ bits[f][i] = (bits[f][i] | lock) & ~unlock;
+}
+
+void
+RegisterSet::intersect(DataFile f, const RegisterSet *set)
+{
+ for (int i = 0; i < (last[f] + 31) / 32; ++i)
+ bits[f][i] |= set->bits[f][i];
+}
+
+void
+RegisterSet::print() const
+{
+ INFO("GPR:");
+ for (int i = 0; i < (last[FILE_GPR] + 31) / 32; ++i)
+ INFO(" %08x", bits[FILE_GPR][i]);
+ INFO("\n");
+}
+
+bool
+RegisterSet::assign(Value **def, int nr)
+{
+ DataFile f = def[0]->reg.file;
+ int n = nr;
+ if (n == 3)
+ n = 4;
+ int s = (n * def[0]->reg.size) >> unit[f];
+ uint32_t m = (1 << s) - 1;
+
+ int id = last[f] + 1;
+ int i;
+
+ for (i = 0; (i * 32) < last[f]; ++i) {
+ if (bits[f][i] == 0xffffffff)
+ continue;
+
+ for (id = 0; id < 32; id += s)
+ if (!(bits[f][i] & (m << id)))
+ break;
+ if (id < 32)
+ break;
+ }
+ id += i * 32;
+ if (id > last[f])
+ return false;
+
+ bits[f][id / 32] |= m << (id % 32);
+
+ if (id + (s - 1) > fill[f])
+ fill[f] = id + (s - 1);
+
+ for (i = 0; i < nr; ++i, ++id)
+ if (!def[i]->livei.isEmpty()) // XXX: really increased id if empty ?
+ def[i]->reg.data.id = id;
+ return true;
+}
+
+void
+RegisterSet::occupy(const Value *val)
+{
+ int id = val->reg.data.id;
+ if (id < 0)
+ return;
+ unsigned int f = val->reg.file;
+
+ uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
+
+ INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %x\n", f, id, m);
+
+ bits[f][id / 32] |= m << (id % 32);
+
+ if (fill[f] < id)
+ fill[f] = id;
+}
+
+void
+RegisterSet::release(const Value *val)
+{
+ int id = val->reg.data.id;
+ if (id < 0)
+ return;
+ unsigned int f = val->reg.file;
+
+ uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
+
+ INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %x\n", f, id, m);
+
+ bits[f][id / 32] &= ~(m << (id % 32));
+}
+
+#define JOIN_MASK_PHI (1 << 0)
+#define JOIN_MASK_UNION (1 << 1)
+#define JOIN_MASK_MOV (1 << 2)
+#define JOIN_MASK_TEX (1 << 3)
+#define JOIN_MASK_CONSTRAINT (1 << 4)
+
+class RegAlloc
+{
+public:
+ RegAlloc(Program *program) : prog(program), sequence(0) { }
+
+ bool exec();
+ bool execFunc();
+
+private:
+ bool coalesceValues(unsigned int mask);
+ bool linearScan();
+ bool allocateConstrainedValues();
+
+private:
+ class PhiMovesPass : public Pass {
+ private:
+ virtual bool visit(BasicBlock *);
+ inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
+ };
+
+ class BuildIntervalsPass : public Pass {
+ private:
+ virtual bool visit(BasicBlock *);
+ void collectLiveValues(BasicBlock *);
+ void addLiveRange(Value *, const BasicBlock *, int end);
+ };
+
+ class InsertConstraintsPass : public Pass {
+ public:
+ bool exec(Function *func);
+ private:
+ virtual bool visit(BasicBlock *);
+
+ bool insertConstraintMoves();
+
+ void addHazard(Instruction *i, const ValueRef *src);
+ void textureMask(TexInstruction *);
+ void addConstraint(Instruction *, int s, int n);
+ bool detectConflict(Instruction *, int s);
+
+ DLList constrList;
+ };
+
+ bool buildLiveSets(BasicBlock *);
+ void collectLValues(DLList&, bool assignedOnly);
+
+ void insertOrderedTail(DLList&, Value *);
+ inline Instruction *insnBySerial(int);
+
+private:
+ Program *prog;
+ Function *func;
+
+ // instructions in control flow / chronological order
+ ArrayList insns;
+
+ int sequence; // for manual passes through CFG
+};
+
+Instruction *
+RegAlloc::insnBySerial(int serial)
+{
+ return reinterpret_cast<Instruction *>(insns.get(serial));
+}
+
+void
+RegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
+ const BasicBlock *bb,
+ int end)
+{
+ Instruction *insn = val->getUniqueInsn();
+
+ if (!insn)
+ return;
+ assert(bb->getFirst()->serial <= bb->getExit()->serial);
+ assert(bb->getExit()->serial + 1 >= end);
+
+ int begin = insn->serial;
+ if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
+ begin = bb->getEntry()->serial;
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
+ val->id, begin, insn->serial, end);
+
+ if (begin != end) // empty ranges are only added as hazards for fixed regs
+ val->livei.extend(begin, end);
+}
+
+bool
+RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
+{
+ if (b->cfg.incidentCount() <= 1)
+ return false;
+
+ int n = 0;
+ for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
+ if (ei.getType() == Graph::Edge::TREE ||
+ ei.getType() == Graph::Edge::FORWARD)
+ ++n;
+ return (n == 2);
+}
+
+// For each operand of each PHI in b, generate a new value by inserting a MOV
+// at the end of the block it is coming from and replace the operand with its
+// result. This eliminates liveness conflicts and enables us to let values be
+// copied to the right register if such a conflict exists nonetheless.
+//
+// These MOVs are also crucial in making sure the live intervals of phi srces
+// are extended until the end of the loop, since they are not included in the
+// live-in sets.
+bool
+RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+{
+ Instruction *phi, *mov;
+ BasicBlock *pb, *pn;
+
+ for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+ pb = pn = BasicBlock::get(ei.getNode());
+ assert(pb);
+
+ if (needNewElseBlock(bb, pb)) {
+ pn = new BasicBlock(func);
+
+ // deletes an edge, iterator is invalid after this:
+ pb->cfg.detach(&bb->cfg);
+ pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
+ pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD); // XXX: check order !
+
+ assert(pb->getExit()->op != OP_CALL);
+ if (pb->getExit()->asFlow()->target.bb == bb)
+ pb->getExit()->asFlow()->target.bb = pn;
+ break;
+ }
+ }
+
+ // insert MOVs (phi->src[j] should stem from j-th in-BB)
+ int j = 0;
+ for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+ pb = BasicBlock::get(ei.getNode());
+ if (!pb->isTerminated())
+ pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
+
+ for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+ mov = new_Instruction(func, OP_MOV, TYPE_U32);
+
+ mov->setSrc(0, phi->getSrc(j));
+ mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue()));
+ phi->setSrc(j, mov->getDef(0));
+
+ pb->insertBefore(pb->getExit(), mov);
+ }
+ ++j;
+ }
+
+ return true;
+}
+
+// Build the set of live-in variables of bb.
+bool
+RegAlloc::buildLiveSets(BasicBlock *bb)
+{
+ BasicBlock *bn;
+ Instruction *i;
+ unsigned int s, d;
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
+
+ bb->liveSet.allocate(func->allLValues.getSize(), false);
+
+ int n = 0;
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ bn = BasicBlock::get(ei.getNode());
+ if (bn == bb)
+ continue;
+ if (bn->cfg.visit(sequence))
+ if (!buildLiveSets(bn))
+ return false;
+ if (n++ == 0)
+ bb->liveSet = bn->liveSet;
+ else
+ bb->liveSet |= bn->liveSet;
+ }
+ if (!n && !bb->liveSet.marker)
+ bb->liveSet.fill(0);
+ bb->liveSet.marker = true;
+
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
+ INFO("BB:%i live set of out blocks:\n", bb->getId());
+ bb->liveSet.print();
+ }
+
+ // if (!bb->getEntry())
+ // return true;
+
+ for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
+ for (d = 0; i->defExists(d); ++d)
+ bb->liveSet.clr(i->getDef(d)->id);
+ for (s = 0; i->srcExists(s); ++s)
+ if (i->getSrc(s)->asLValue())
+ bb->liveSet.set(i->getSrc(s)->id);
+ }
+ for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
+ bb->liveSet.clr(i->getDef(0)->id);
+
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
+ INFO("BB:%i live set after propagation:\n", bb->getId());
+ bb->liveSet.print();
+ }
+
+ return true;
+}
+
+void
+RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
+{
+ BasicBlock *bbA = NULL, *bbB = NULL;
+
+ assert(bb->cfg.incidentCount() || bb->liveSet.popCount() == 0);
+
+ if (bb->cfg.outgoingCount()) {
+ // trickery to save a loop of OR'ing liveSets
+ // aliasing works fine with BitSet::setOr
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ if (ei.getType() == Graph::Edge::DUMMY)
+ continue;
+ if (bbA) {
+ bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
+ bbA = bb;
+ } else {
+ bbA = bbB;
+ }
+ bbB = BasicBlock::get(ei.getNode());
+ }
+ bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
+ } else
+ if (bb->cfg.incidentCount()) {
+ bb->liveSet.fill(0);
+ }
+}
+
+bool
+RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
+{
+ collectLiveValues(bb);
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
+
+ // go through out blocks and delete phi sources that do not originate from
+ // the current block from the live set
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ BasicBlock *out = BasicBlock::get(ei.getNode());
+
+ for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
+ bb->liveSet.clr(i->getDef(0)->id);
+
+ for (int s = 0; s < NV50_IR_MAX_SRCS && i->src[s].exists(); ++s) {
+ assert(i->src[s].getInsn());
+ if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
+ bb->liveSet.set(i->getSrc(s)->id);
+ else
+ bb->liveSet.clr(i->getSrc(s)->id);
+ }
+ }
+ }
+
+ // remaining live-outs are live until end
+ if (bb->getExit()) {
+ for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
+ if (bb->liveSet.test(j))
+ addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
+ }
+
+ for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
+ for (int d = 0; i->defExists(d); ++d) {
+ bb->liveSet.clr(i->getDef(d)->id);
+ if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
+ i->getDef(d)->livei.extend(i->serial, i->serial);
+ }
+
+ for (int s = 0; i->srcExists(s); ++s) {
+ if (!i->getSrc(s)->asLValue())
+ continue;
+ if (!bb->liveSet.test(i->getSrc(s)->id)) {
+ bb->liveSet.set(i->getSrc(s)->id);
+ addLiveRange(i->getSrc(s), bb, i->serial);
+ }
+ }
+ }
+
+ return true;
+}
+
+bool
+RegAlloc::coalesceValues(unsigned int mask)
+{
+ int c, n;
+
+ for (n = 0; n < insns.getSize(); ++n) {
+ Instruction *i;
+ Instruction *insn = insnBySerial(n);
+
+ switch (insn->op) {
+ case OP_PHI:
+ if (!(mask & JOIN_MASK_PHI))
+ break;
+ for (c = 0; insn->srcExists(c); ++c)
+ if (!insn->getDef(0)->coalesce(insn->getSrc(c), false)) {
+ ERROR("failed to coalesce phi operands\n");
+ return false;
+ }
+ break;
+ case OP_UNION:
+ if (!(mask & JOIN_MASK_UNION))
+ break;
+ for (c = 0; insn->srcExists(c); ++c)
+ insn->getDef(0)->coalesce(insn->getSrc(c), true);
+ break;
+ case OP_CONSTRAINT:
+ if (!(mask & JOIN_MASK_CONSTRAINT))
+ break;
+ for (c = 0; c < 4 && insn->srcExists(c); ++c)
+ insn->getDef(c)->coalesce(insn->getSrc(c), true);
+ break;
+ case OP_MOV:
+ if (!(mask & JOIN_MASK_MOV))
+ break;
+ i = insn->getSrc(0)->getUniqueInsn();
+ if (i && !i->constrainedDefs())
+ insn->getDef(0)->coalesce(insn->getSrc(0), false);
+ break;
+ case OP_TEX:
+ case OP_TXB:
+ case OP_TXL:
+ case OP_TXF:
+ case OP_TXQ:
+ case OP_TXD:
+ case OP_TXG:
+ case OP_TEXCSAA:
+ if (!(mask & JOIN_MASK_TEX))
+ break;
+ for (c = 0; c < 4 && insn->srcExists(c); ++c)
+ insn->getDef(c)->coalesce(insn->getSrc(c), true);
+ break;
+ default:
+ break;
+ }
+ }
+ return true;
+}
+
+void
+RegAlloc::insertOrderedTail(DLList &list, Value *val)
+{
+ // we insert the live intervals in order, so this should be short
+ DLList::Iterator iter = list.revIterator();
+ const int begin = val->livei.begin();
+ for (; !iter.end(); iter.next()) {
+ if (reinterpret_cast<Value *>(iter.get())->livei.begin() <= begin)
+ break;
+ }
+ iter.insert(val);
+}
+
+static void
+checkList(DLList &list)
+{
+ Value *prev = NULL;
+ Value *next = NULL;
+
+ for (DLList::Iterator iter = list.iterator(); !iter.end(); iter.next()) {
+ next = Value::get(iter);
+ assert(next);
+ if (prev) {
+ assert(prev->livei.begin() <= next->livei.begin());
+ }
+ assert(next->join == next);
+ prev = next;
+ }
+}
+
+void
+RegAlloc::collectLValues(DLList &list, bool assignedOnly)
+{
+ for (int n = 0; n < insns.getSize(); ++n) {
+ Instruction *i = insnBySerial(n);
+
+ for (int d = 0; i->defExists(d); ++d)
+ if (!i->getDef(d)->livei.isEmpty())
+ if (!assignedOnly || i->getDef(d)->reg.data.id >= 0)
+ insertOrderedTail(list, i->getDef(d));
+ }
+ checkList(list);
+}
+
+bool
+RegAlloc::allocateConstrainedValues()
+{
+ Value *defs[4];
+ RegisterSet regSet[4];
+ DLList regVals;
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: allocating constrained values\n");
+
+ collectLValues(regVals, true);
+
+ for (int c = 0; c < 4; ++c)
+ regSet[c].init(prog->getTarget());
+
+ for (int n = 0; n < insns.getSize(); ++n) {
+ Instruction *i = insnBySerial(n);
+
+ const int vecSize = i->defCount(0xf);
+ if (vecSize < 2)
+ continue;
+ assert(vecSize <= 4);
+
+ for (int c = 0; c < vecSize; ++c)
+ defs[c] = i->def[c].rep();
+
+ if (defs[0]->reg.data.id >= 0) {
+ for (int c = 1; c < vecSize; ++c) {
+ assert(defs[c]->reg.data.id >= 0);
+ }
+ continue;
+ }
+
+ for (int c = 0; c < vecSize; ++c) {
+ uint32_t mask;
+ regSet[c].reset();
+
+ for (DLList::Iterator it = regVals.iterator(); !it.end(); it.next()) {
+ Value *rVal = Value::get(it);
+ if (rVal->reg.data.id >= 0 && rVal->livei.overlaps(defs[c]->livei))
+ regSet[c].occupy(rVal);
+ }
+ mask = 0x11111111;
+ if (vecSize == 2) // granularity is 2 instead of 4
+ mask |= 0x11111111 << 2;
+ regSet[c].periodicMask(defs[0]->reg.file, 0, ~(mask << c));
+
+ if (!defs[c]->livei.isEmpty())
+ insertOrderedTail(regVals, defs[c]);
+ }
+ for (int c = 1; c < vecSize; ++c)
+ regSet[0].intersect(defs[0]->reg.file, &regSet[c]);
+
+ if (!regSet[0].assign(&defs[0], vecSize)) // TODO: spilling
+ return false;
+ }
+ for (int c = 0; c < 4; c += 2)
+ if (regSet[c].getMaxAssigned(FILE_GPR) > prog->maxGPR)
+ prog->maxGPR = regSet[c].getMaxAssigned(FILE_GPR);
+ return true;
+}
+
+bool
+RegAlloc::linearScan()
+{
+ Value *cur, *val;
+ DLList unhandled, active, inactive;
+ RegisterSet f(prog->getTarget()), free(prog->getTarget());
+
+ INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: linear scan\n");
+
+ collectLValues(unhandled, false);
+
+ for (DLList::Iterator cI = unhandled.iterator(); !cI.end();) {
+ cur = Value::get(cI);
+ cI.erase();
+
+ for (DLList::Iterator aI = active.iterator(); !aI.end();) {
+ val = Value::get(aI);
+ if (val->livei.end() <= cur->livei.begin()) {
+ free.release(val);
+ aI.erase();
+ } else
+ if (!val->livei.contains(cur->livei.begin())) {
+ free.release(val);
+ aI.moveToList(inactive);
+ } else {
+ aI.next();
+ }
+ }
+
+ for (DLList::Iterator iI = inactive.iterator(); !iI.end();) {
+ val = Value::get(iI);
+ if (val->livei.end() <= cur->livei.begin()) {
+ iI.erase();
+ } else
+ if (val->livei.contains(cur->livei.begin())) {
+ free.occupy(val);
+ iI.moveToList(active);
+ } else {
+ iI.next();
+ }
+ }
+ f = free;
+
+ for (DLList::Iterator iI = inactive.iterator(); !iI.end(); iI.next()) {
+ val = Value::get(iI);
+ if (val->livei.overlaps(cur->livei))
+ f.occupy(val);
+ }
+
+ for (DLList::Iterator uI = unhandled.iterator(); !uI.end(); uI.next()) {
+ val = Value::get(uI);
+ if (val->reg.data.id >= 0 && val->livei.overlaps(cur->livei))
+ f.occupy(val);
+ }
+
+ if (cur->reg.data.id < 0) {
+ bool spill = !f.assign(&cur, 1);
+ if (spill) {
+ ERROR("out of registers of file %u\n", cur->reg.file);
+ abort();
+ }
+ }
+ free.occupy(cur);
+ active.insert(cur);
+ }
+
+ if (f.getMaxAssigned(FILE_GPR) > prog->maxGPR)
+ prog->maxGPR = f.getMaxAssigned(FILE_GPR);
+ if (free.getMaxAssigned(FILE_GPR) > prog->maxGPR)
+ prog->maxGPR = free.getMaxAssigned(FILE_GPR);
+ return true;
+}
+
+bool
+RegAlloc::exec()
+{
+ for (ArrayList::Iterator fi = prog->allFuncs.iterator();
+ !fi.end(); fi.next()) {
+ func = reinterpret_cast<Function *>(fi.get());
+ if (!execFunc())
+ return false;
+ }
+ return true;
+}
+
+bool
+RegAlloc::execFunc()
+{
+ InsertConstraintsPass insertConstr;
+ PhiMovesPass insertMoves;
+ BuildIntervalsPass buildIntervals;
+
+ unsigned int i;
+ bool ret;
+
+ ret = insertConstr.exec(func);
+ if (!ret)
+ goto out;
+
+ ret = insertMoves.run(func);
+ if (!ret)
+ goto out;
+
+ for (sequence = func->cfg.nextSequence(), i = 0;
+ ret && i <= func->loopNestingBound;
+ sequence = func->cfg.nextSequence(), ++i)
+ ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
+ if (!ret)
+ goto out;
+
+ func->orderInstructions(this->insns);
+
+ ret = buildIntervals.run(func);
+ if (!ret)
+ goto out;
+
+ ret = coalesceValues(JOIN_MASK_PHI);
+ if (!ret)
+ goto out;
+ switch (prog->getTarget()->getChipset() & 0xf0) {
+ case 0x50:
+ ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_TEX);
+ break;
+ case 0xc0:
+ ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_CONSTRAINT);
+ break;
+ default:
+ break;
+ }
+ if (!ret)
+ goto out;
+ ret = coalesceValues(JOIN_MASK_MOV);
+ if (!ret)
+ goto out;
+
+ if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
+ func->print();
+ func->printLiveIntervals();
+ }
+
+ ret = allocateConstrainedValues() && linearScan();
+ if (!ret)
+ goto out;
+
+out:
+ // TODO: should probably call destructor on LValues later instead
+ for (ArrayList::Iterator it = func->allLValues.iterator();
+ !it.end(); it.next())
+ reinterpret_cast<LValue *>(it.get())->livei.clear();
+
+ return ret;
+}
+
+bool Program::registerAllocation()
+{
+ RegAlloc ra(this);
+ return ra.exec();
+}
+
+bool
+RegAlloc::InsertConstraintsPass::exec(Function *ir)
+{
+ constrList.clear();
+
+ bool ret = run(ir, true, true);
+ if (ret)
+ ret = insertConstraintMoves();
+ return ret;
+}
+
+// TODO: make part of texture insn
+void
+RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
+{
+ Value *def[4];
+ int c, k, d;
+ uint8_t mask = 0;
+
+ for (d = 0, k = 0, c = 0; c < 4; ++c) {
+ if (!(tex->tex.mask & (1 << c)))
+ continue;
+ if (tex->getDef(k)->refCount()) {
+ mask |= 1 << c;
+ def[d++] = tex->getDef(k);
+ }
+ ++k;
+ }
+ tex->tex.mask = mask;
+
+#if 0 // reorder or set the unused ones NULL ?
+ for (c = 0; c < 4; ++c)
+ if (!(tex->tex.mask & (1 << c)))
+ def[d++] = tex->getDef(c);
+#endif
+ for (c = 0; c < d; ++c)
+ tex->setDef(c, def[c]);
+#if 1
+ for (; c < 4; ++c)
+ tex->setDef(c, NULL);
+#endif
+}
+
+bool
+RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
+{
+ // current register allocation can't handle it if a value participates in
+ // multiple constraints
+ for (ValueRef::Iterator it = cst->src[s].iterator(); !it.end(); it.next()) {
+ Instruction *insn = it.get()->getInsn();
+ if (insn != cst)
+ return true;
+ }
+
+ // can start at s + 1 because detectConflict is called on all sources
+ for (int c = s + 1; cst->srcExists(c); ++c)
+ if (cst->getSrc(c) == cst->getSrc(s))
+ return true;
+
+ Instruction *defi = cst->getSrc(s)->getInsn();
+
+ return (!defi || defi->constrainedDefs());
+}
+
+void
+RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
+{
+ Instruction *cst;
+ int d;
+
+ // first, look for an existing identical constraint op
+ for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
+ cst = reinterpret_cast<Instruction *>(it.get());
+ if (!i->bb->dominatedBy(cst->bb))
+ break;
+ for (d = 0; d < n; ++d)
+ if (cst->getSrc(d) != i->getSrc(d + s))
+ break;
+ if (d >= n) {
+ for (d = 0; d < n; ++d, ++s)
+ i->setSrc(s, cst->getDef(d));
+ return;
+ }
+ }
+ cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
+
+ for (d = 0; d < n; ++s, ++d) {
+ cst->setDef(d, new_LValue(func, FILE_GPR));
+ cst->setSrc(d, i->getSrc(s));
+ i->setSrc(s, cst->getDef(d));
+ }
+ i->bb->insertBefore(i, cst);
+
+ constrList.insert(cst);
+}
+
+// Add a dummy use of the pointer source of >= 8 byte loads after the load
+// to prevent it from being assigned a register which overlapping the load's
+// destination, which would produce random corruptions.
+void
+RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
+{
+ Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
+ hzd->setSrc(0, src->get());
+ i->bb->insertAfter(i, hzd);
+
+}
+
+// Insert constraint markers for instructions whose multiple sources must be
+// located in consecutive registers.
+bool
+RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
+{
+ TexInstruction *tex;
+ Instruction *next;
+ int s, n, size;
+
+ for (Instruction *i = bb->getEntry(); i; i = next) {
+ next = i->next;
+
+ if ((tex = i->asTex())) {
+ textureMask(tex);
+
+ // FIXME: this is target specific
+ if (tex->op == OP_TXQ) {
+ s = tex->srcCount(0xff);
+ n = 0;
+ } else {
+ s = tex->tex.target.getArgCount();
+ if (!tex->tex.target.isArray() &&
+ (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
+ ++s;
+ n = tex->srcCount(0xff) - s;
+ assert(n <= 4);
+ }
+
+ if (s > 1)
+ addConstraint(i, 0, s);
+ if (n > 1)
+ addConstraint(i, s, n);
+ } else
+ if (i->op == OP_EXPORT || i->op == OP_STORE) {
+ for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
+ assert(i->srcExists(s));
+ size -= i->getSrc(s)->reg.size;
+ }
+ if ((s - 1) > 1)
+ addConstraint(i, 1, s - 1);
+ } else
+ if (i->op == OP_LOAD) {
+ if (i->src[0].isIndirect(0) && typeSizeof(i->dType) >= 8)
+ addHazard(i, i->src[0].getIndirect(0));
+ }
+ }
+ return true;
+}
+
+// Insert extra moves so that, if multiple register constraints on a value are
+// in conflict, these conflicts can be resolved.
+bool
+RegAlloc::InsertConstraintsPass::insertConstraintMoves()
+{
+ for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
+ Instruction *cst = reinterpret_cast<Instruction *>(it.get());
+
+ for (int s = 0; cst->srcExists(s); ++s) {
+ if (!detectConflict(cst, s))
+ continue;
+ Instruction *mov = new_Instruction(func, OP_MOV,
+ typeOfSize(cst->src[s].getSize()));
+ mov->setSrc(0, cst->getSrc(s));
+ mov->setDef(0, new_LValue(func, FILE_GPR));
+ cst->setSrc(s, mov->getDef(0));
+
+ cst->bb->insertBefore(cst, mov);
+ }
+ }
+ return true;
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_ssa.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_ssa.cpp
new file mode 100644
index 00000000000..841163b0ac9
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_ssa.cpp
@@ -0,0 +1,463 @@
+
+#include "nv50_ir.h"
+#include "nv50_ir_target.h"
+
+namespace nv50_ir {
+
+// Converts nv50 IR generated from TGSI to SSA form.
+
+// DominatorTree implements an algorithm for finding immediate dominators,
+// as described by T. Lengauer & R. Tarjan.
+class DominatorTree : public Graph
+{
+public:
+ DominatorTree(Graph *cfg);
+ ~DominatorTree() { }
+
+ bool dominates(BasicBlock *, BasicBlock *);
+
+ void findDominanceFrontiers();
+
+private:
+ void build();
+ void buildDFS(Node *);
+
+ void squash(int);
+ inline void link(int, int);
+ inline int eval(int);
+
+ void debugPrint();
+
+ Graph *cfg;
+
+ Node **vert;
+ int *data;
+ const int count;
+
+ #define SEMI(i) (data[(i) + 0 * count])
+ #define ANCESTOR(i) (data[(i) + 1 * count])
+ #define PARENT(i) (data[(i) + 2 * count])
+ #define LABEL(i) (data[(i) + 3 * count])
+ #define DOM(i) (data[(i) + 4 * count])
+};
+
+void DominatorTree::debugPrint()
+{
+ for (int i = 0; i < count; ++i) {
+ INFO("SEMI(%i) = %i\n", i, SEMI(i));
+ INFO("ANCESTOR(%i) = %i\n", i, ANCESTOR(i));
+ INFO("PARENT(%i) = %i\n", i, PARENT(i));
+ INFO("LABEL(%i) = %i\n", i, LABEL(i));
+ INFO("DOM(%i) = %i\n", i, DOM(i));
+ }
+}
+
+DominatorTree::DominatorTree(Graph *cfgraph) : cfg(cfgraph),
+ count(cfg->getSize())
+{
+ Iterator *iter;
+ int i;
+
+ vert = new Node * [count];
+ data = new int[5 * count];
+
+ for (i = 0, iter = cfg->iteratorDFS(true); !iter->end(); iter->next(), ++i) {
+ vert[i] = reinterpret_cast<Node *>(iter->get());
+ vert[i]->tag = i;
+ LABEL(i) = i;
+ SEMI(i) = ANCESTOR(i) = -1;
+ }
+ cfg->putIterator(iter);
+
+ build();
+
+ delete[] vert;
+ delete[] data;
+}
+
+void DominatorTree::buildDFS(Graph::Node *node)
+{
+ SEMI(node->tag) = node->tag;
+
+ for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) {
+ if (SEMI(ei.getNode()->tag) < 0) {
+ buildDFS(ei.getNode());
+ PARENT(ei.getNode()->tag) = node->tag;
+ }
+ }
+}
+
+void DominatorTree::squash(int v)
+{
+ if (ANCESTOR(ANCESTOR(v)) >= 0) {
+ squash(ANCESTOR(v));
+
+ if (SEMI(LABEL(ANCESTOR(v))) < SEMI(LABEL(v)))
+ LABEL(v) = LABEL(ANCESTOR(v));
+ ANCESTOR(v) = ANCESTOR(ANCESTOR(v));
+ }
+}
+
+int DominatorTree::eval(int v)
+{
+ if (ANCESTOR(v) < 0)
+ return v;
+ squash(v);
+ return LABEL(v);
+}
+
+void DominatorTree::link(int v, int w)
+{
+ ANCESTOR(w) = v;
+}
+
+void DominatorTree::build()
+{
+ DLList *bucket = new DLList[count];
+ Node *nv, *nw;
+ int p, u, v, w;
+
+ buildDFS(cfg->getRoot());
+
+ for (w = count - 1; w >= 1; --w) {
+ nw = vert[w];
+ assert(nw->tag == w);
+ for (Graph::EdgeIterator ei = nw->incident(); !ei.end(); ei.next()) {
+ nv = ei.getNode();
+ v = nv->tag;
+ u = eval(v);
+ if (SEMI(u) < SEMI(w))
+ SEMI(w) = SEMI(u);
+ }
+ p = PARENT(w);
+ bucket[SEMI(w)].insert(nw);
+ link(p, w);
+
+ for (DLList::Iterator it = bucket[p].iterator(); !it.end(); it.erase()) {
+ v = reinterpret_cast<Node *>(it.get())->tag;
+ u = eval(v);
+ DOM(v) = (SEMI(u) < SEMI(v)) ? u : p;
+ }
+ }
+ for (w = 1; w < count; ++w) {
+ if (DOM(w) != SEMI(w))
+ DOM(w) = DOM(DOM(w));
+ }
+ DOM(0) = 0;
+
+ insert(&BasicBlock::get(cfg->getRoot())->dom);
+ do {
+ p = 0;
+ for (v = 1; v < count; ++v) {
+ nw = &BasicBlock::get(vert[DOM(v)])->dom;;
+ nv = &BasicBlock::get(vert[v])->dom;
+ if (nw->getGraph() && !nv->getGraph()) {
+ ++p;
+ nw->attach(nv, Graph::Edge::TREE);
+ }
+ }
+ } while (p);
+
+ delete[] bucket;
+}
+
+#undef SEMI
+#undef ANCESTOR
+#undef PARENT
+#undef LABEL
+#undef DOM
+
+void DominatorTree::findDominanceFrontiers()
+{
+ Iterator *dtIter;
+ BasicBlock *bb;
+
+ for (dtIter = this->iteratorDFS(false); !dtIter->end(); dtIter->next()) {
+ EdgeIterator succIter, chldIter;
+
+ bb = BasicBlock::get(reinterpret_cast<Node *>(dtIter->get()));
+ bb->getDF().clear();
+
+ for (succIter = bb->cfg.outgoing(); !succIter.end(); succIter.next()) {
+ BasicBlock *dfLocal = BasicBlock::get(succIter.getNode());
+ if (dfLocal->idom() != bb)
+ bb->getDF().insert(dfLocal);
+ }
+
+ for (chldIter = bb->dom.outgoing(); !chldIter.end(); chldIter.next()) {
+ BasicBlock *cb = BasicBlock::get(chldIter.getNode());
+
+ DLList::Iterator dfIter = cb->getDF().iterator();
+ for (; !dfIter.end(); dfIter.next()) {
+ BasicBlock *dfUp = BasicBlock::get(dfIter);
+ if (dfUp->idom() != bb)
+ bb->getDF().insert(dfUp);
+ }
+ }
+ }
+ this->putIterator(dtIter);
+}
+
+// liveIn(bb) = usedBeforeAssigned(bb) U (liveOut(bb) - assigned(bb))
+void
+Function::buildLiveSetsPreSSA(BasicBlock *bb, const int seq)
+{
+ BitSet usedBeforeAssigned(allLValues.getSize(), true);
+ BitSet assigned(allLValues.getSize(), true);
+
+ bb->liveSet.allocate(allLValues.getSize(), false);
+
+ int n = 0;
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ BasicBlock *out = BasicBlock::get(ei.getNode());
+ if (out == bb)
+ continue;
+ if (out->cfg.visit(seq))
+ buildLiveSetsPreSSA(out, seq);
+ if (!n++)
+ bb->liveSet = out->liveSet;
+ else
+ bb->liveSet |= out->liveSet;
+ }
+ if (!n && !bb->liveSet.marker)
+ bb->liveSet.fill(0);
+ bb->liveSet.marker = true;
+
+ for (Instruction *i = bb->getEntry(); i; i = i->next) {
+ for (int s = 0; i->srcExists(s); ++s)
+ if (i->getSrc(s)->asLValue() && !assigned.test(i->getSrc(s)->id))
+ usedBeforeAssigned.set(i->getSrc(s)->id);
+ for (int d = 0; i->defExists(d); ++d)
+ assigned.set(i->getDef(d)->id);
+ }
+
+ bb->liveSet.andNot(assigned);
+ bb->liveSet |= usedBeforeAssigned;
+}
+
+class RenamePass
+{
+public:
+ RenamePass(Function *);
+ ~RenamePass();
+
+ bool run();
+ void search(BasicBlock *);
+
+ inline LValue *getStackTop(Value *);
+
+private:
+ Stack *stack;
+ Function *func;
+ Program *prog;
+ Instruction *undef;
+};
+
+bool
+Program::convertToSSA()
+{
+ for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
+ Function *fn = reinterpret_cast<Function *>(fi.get());
+ if (!fn->convertToSSA())
+ return false;
+ }
+ return true;
+}
+
+// XXX: add edge from entry to exit ?
+
+// Efficiently Computing Static Single Assignment Form and
+// the Control Dependence Graph,
+// R. Cytron, J. Ferrante, B. K. Rosen, M. N. Wegman, F. K. Zadeck
+bool
+Function::convertToSSA()
+{
+ // 0. calculate live in variables (for pruned SSA)
+ int seq = cfg.nextSequence();
+ for (unsigned i = 0; i <= loopNestingBound; seq = cfg.nextSequence(), ++i)
+ buildLiveSetsPreSSA(BasicBlock::get(cfg.getRoot()), seq);
+
+ // reset liveSet marker for use in regalloc
+ for (ArrayList::Iterator bi = allBBlocks.iterator(); !bi.end(); bi.next())
+ reinterpret_cast<BasicBlock *>(bi.get())->liveSet.marker = false;
+
+ // 1. create the dominator tree
+ domTree = new DominatorTree(&cfg);
+ reinterpret_cast<DominatorTree *>(domTree)->findDominanceFrontiers();
+
+ // 2. insert PHI functions
+ DLList workList;
+ LValue *lval;
+ BasicBlock *bb;
+ int var;
+ int iterCount = 0;
+ int *hasAlready = new int[allBBlocks.getSize() * 2];
+ int *work = &hasAlready[allBBlocks.getSize()];
+
+ memset(hasAlready, 0, allBBlocks.getSize() * 2 * sizeof(int));
+
+ // for each variable
+ for (var = 0; var < allLValues.getSize(); ++var) {
+ if (!allLValues.get(var))
+ continue;
+ lval = reinterpret_cast<Value *>(allLValues.get(var))->asLValue();
+ if (!lval || !lval->defs)
+ continue;
+ ++iterCount;
+
+ // TODO: don't add phi functions for values that aren't used outside
+ // the BB they're defined in
+
+ // gather blocks with assignments to lval in workList
+ for (ValueDef::Iterator d = lval->defs->iterator(); !d.end(); d.next()) {
+ bb = d.get()->getInsn()->bb;
+ if (!bb)
+ continue; // instruction likely been removed but not XXX deleted
+
+ if (work[bb->getId()] == iterCount)
+ continue;
+ work[bb->getId()] = iterCount;
+ workList.insert(bb);
+ }
+
+ // for each block in workList, insert a phi for lval in the block's
+ // dominance frontier (if we haven't already done so)
+ for (DLList::Iterator wI = workList.iterator(); !wI.end(); wI.erase()) {
+ bb = BasicBlock::get(wI);
+
+ DLList::Iterator dfIter = bb->getDF().iterator();
+ for (; !dfIter.end(); dfIter.next()) {
+ Instruction *phi;
+ BasicBlock *dfBB = BasicBlock::get(dfIter);
+
+ if (hasAlready[dfBB->getId()] >= iterCount)
+ continue;
+ hasAlready[dfBB->getId()] = iterCount;
+
+ // pruned SSA: don't need a phi if the value is not live-in
+ if (!dfBB->liveSet.test(lval->id))
+ continue;
+
+ // TODO: use dedicated PhiInstruction to lift this limit
+ assert(dfBB->cfg.incidentCount() <= NV50_IR_MAX_SRCS);
+
+ phi = new_Instruction(this, OP_PHI, typeOfSize(lval->reg.size));
+ dfBB->insertTail(phi);
+
+ phi->setDef(0, lval);
+ for (int s = 0; s < dfBB->cfg.incidentCount(); ++s)
+ phi->setSrc(s, lval);
+
+ if (work[dfBB->getId()] < iterCount) {
+ work[dfBB->getId()] = iterCount;
+ wI.insert(dfBB);
+ }
+ }
+ }
+ }
+ delete[] hasAlready;
+
+ RenamePass rename(this);
+ return rename.run();
+}
+
+RenamePass::RenamePass(Function *fn) : func(fn), prog(fn->getProgram())
+{
+ BasicBlock *root = BasicBlock::get(func->cfg.getRoot());
+
+ undef = new_Instruction(func, OP_NOP, TYPE_U32);
+ undef->setDef(0, new_LValue(func, FILE_GPR));
+ root->insertHead(undef);
+
+ stack = new Stack[func->allLValues.getSize()];
+}
+
+RenamePass::~RenamePass()
+{
+ if (stack)
+ delete[] stack;
+}
+
+LValue *
+RenamePass::getStackTop(Value *val)
+{
+ if (!stack[val->id].getSize())
+ return 0;
+ return reinterpret_cast<LValue *>(stack[val->id].peek().u.p);
+}
+
+bool RenamePass::run()
+{
+ if (!stack)
+ return false;
+ search(BasicBlock::get(func->domTree->getRoot()));
+
+ ArrayList::Iterator iter = func->allInsns.iterator();
+ for (; !iter.end(); iter.next()) {
+ Instruction *insn = reinterpret_cast<Instruction *>(iter.get());
+ for (int d = 0; insn->defExists(d); ++d)
+ insn->def[d].restoreDefList();
+ }
+
+ return true;
+}
+
+void RenamePass::search(BasicBlock *bb)
+{
+ LValue *lval;
+ int d, s;
+ const Target *targ = prog->getTarget();
+
+ for (Instruction *stmt = bb->getFirst(); stmt; stmt = stmt->next) {
+ if (stmt->op != OP_PHI) {
+ for (s = 0; stmt->srcExists(s); ++s) {
+ lval = stmt->getSrc(s)->asLValue();
+ if (!lval)
+ continue;
+ lval = getStackTop(lval);
+ if (!lval)
+ lval = static_cast<LValue *>(undef->getDef(0));
+ stmt->setSrc(s, lval);
+ }
+ }
+ for (d = 0; stmt->defExists(d); ++d) {
+ lval = stmt->def[d].get()->asLValue();
+ assert(lval);
+ stmt->def[d].setSSA(
+ new_LValue(func, targ->nativeFile(lval->reg.file)));
+ stmt->def[d].get()->reg.data.id = lval->reg.data.id;
+ stack[lval->id].push(stmt->def[d].get());
+ }
+ }
+
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ Instruction *phi;
+ int p = 0;
+ BasicBlock *sb = BasicBlock::get(ei.getNode());
+
+ // which predecessor of sb is bb ?
+ for (Graph::EdgeIterator ei = sb->cfg.incident(); !ei.end(); ei.next()) {
+ if (ei.getNode() == &bb->cfg)
+ break;
+ ++p;
+ }
+ assert(p < sb->cfg.incidentCount());
+
+ for (phi = sb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+ lval = getStackTop(phi->getSrc(p));
+ if (!lval)
+ lval = undef->getDef(0)->asLValue();
+ phi->setSrc(p, lval);
+ }
+ }
+
+ for (Graph::EdgeIterator ei = bb->dom.outgoing(); !ei.end(); ei.next())
+ search(BasicBlock::get(ei.getNode()));
+
+ for (Instruction *stmt = bb->getFirst(); stmt; stmt = stmt->next) {
+ for (d = 0; stmt->defExists(d); ++d)
+ stack[stmt->def[d].preSSA()->id].pop();
+ }
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
new file mode 100644
index 00000000000..59fb0c19b0b
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
@@ -0,0 +1,304 @@
+
+#include "nv50/codegen/nv50_ir.h"
+#include "nv50/codegen/nv50_ir_target.h"
+
+namespace nv50_ir {
+
+const uint8_t Target::operationSrcNr[OP_LAST + 1] =
+{
+ 0, 0, // NOP, PHI
+ 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
+ 1, 1, 2, // MOV, LOAD, STORE
+ 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
+ 1, 1, 1, // ABS, NEG, NOT
+ 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
+ 2, 2, 1, // MAX, MIN, SAT
+ 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
+ 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
+ 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2
+ 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW
+ 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
+ 0, 0, 0, // PRERET,CONT,BREAK
+ 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
+ 1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
+ 1, 1, // EMIT, RESTART
+ 1, 1, 1, // TEX, TXB, TXL,
+ 1, 1, 1, 1, 1, // TXF, TXQ, TXD, TXG, TEXCSAA
+ 1, 2, // SULD, SUST
+ 1, 1, // DFDX, DFDY
+ 1, 2, 2, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
+ 2, 3, 2, // POPCNT, INSBF, EXTBF
+ 0
+};
+
+
+extern Target *getTargetNVC0(unsigned int chipset);
+
+Target *Target::create(unsigned int chipset)
+{
+ switch (chipset & 0xf0) {
+ case 0xc0:
+ return getTargetNVC0(chipset);
+ case 0x50:
+ case 0x80:
+ case 0x90:
+ case 0xa0:
+ default:
+ ERROR("unsupported target: NV%x\n", chipset);
+ return 0;
+ }
+}
+
+void Target::destroy(Target *targ)
+{
+ delete targ;
+}
+
+void
+CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
+{
+ code = reinterpret_cast<uint32_t *>(ptr);
+ codeSize = 0;
+ codeSizeLimit = size;
+}
+
+void
+CodeEmitter::printBinary() const
+{
+ uint32_t *bin = code - codeSize / 4;
+ INFO("program binary (%u bytes)", codeSize);
+ for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
+ if ((pos % 8) == 0)
+ INFO("\n");
+ INFO("%08x ", bin[pos]);
+ }
+ INFO("\n");
+}
+
+void
+CodeEmitter::prepareEmission(Program *prog)
+{
+ for (ArrayList::Iterator fi = prog->allFuncs.iterator();
+ !fi.end(); fi.next()) {
+ Function *func = reinterpret_cast<Function *>(fi.get());
+ func->binPos = prog->binSize;
+ prepareEmission(func);
+ prog->binSize += func->binSize;
+ }
+}
+
+void
+CodeEmitter::prepareEmission(Function *func)
+{
+ func->bbCount = 0;
+ func->bbArray = new BasicBlock * [func->cfg.getSize()];
+
+ BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
+
+ Graph::GraphIterator *iter;
+ for (iter = func->cfg.iteratorCFG(); !iter->end(); iter->next())
+ prepareEmission(BasicBlock::get(*iter));
+ func->cfg.putIterator(iter);
+}
+
+void
+CodeEmitter::prepareEmission(BasicBlock *bb)
+{
+ Instruction *i, *next;
+ Function *func = bb->getFunction();
+ int j;
+ unsigned int nShort;
+
+ for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
+
+ for (; j >= 0; --j) {
+ BasicBlock *in = func->bbArray[j];
+ Instruction *exit = in->getExit();
+
+ if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
+ in->binSize -= 8;
+ func->binSize -= 8;
+
+ for (++j; j < func->bbCount; ++j)
+ func->bbArray[j]->binPos -= 8;
+
+ in->remove(exit);
+ }
+ bb->binPos = in->binPos + in->binSize;
+ if (in->binSize) // no more no-op branches to bb
+ break;
+ }
+ func->bbArray[func->bbCount++] = bb;
+
+ if (!bb->getExit())
+ return;
+
+ // determine encoding size, try to group short instructions
+ nShort = 0;
+ for (i = bb->getEntry(); i; i = next) {
+ next = i->next;
+
+ i->encSize = getMinEncodingSize(i);
+ if (next && i->encSize < 8)
+ ++nShort;
+ else
+ if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
+ if (i->isCommutationLegal(i->next)) {
+ bb->permuteAdjacent(i, next);
+ next->encSize = 4;
+ next = i;
+ i = i->prev;
+ ++nShort;
+ } else
+ if (i->isCommutationLegal(i->prev) && next->next) {
+ bb->permuteAdjacent(i->prev, i);
+ next->encSize = 4;
+ next = next->next;
+ bb->binSize += 4;
+ ++nShort;
+ } else {
+ i->encSize = 8;
+ i->prev->encSize = 8;
+ bb->binSize += 4;
+ nShort = 0;
+ }
+ } else {
+ i->encSize = 8;
+ if (nShort & 1) {
+ i->prev->encSize = 8;
+ bb->binSize += 4;
+ }
+ nShort = 0;
+ }
+ bb->binSize += i->encSize;
+ }
+
+ if (bb->getExit()->encSize == 4) {
+ assert(nShort);
+ bb->getExit()->encSize = 8;
+ bb->binSize += 4;
+
+ if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
+ bb->binSize += 8;
+ bb->getExit()->prev->encSize = 8;
+ }
+ }
+ assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
+
+ func->binSize += bb->binSize;
+}
+
+bool
+Program::emitBinary(struct nv50_ir_prog_info *info)
+{
+ CodeEmitter *emit = target->getCodeEmitter(progType);
+
+ emit->prepareEmission(this);
+
+ if (dbgFlags & NV50_IR_DEBUG_BASIC)
+ this->print();
+
+ if (!binSize) {
+ code = NULL;
+ return false;
+ }
+ code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
+ if (!code)
+ return false;
+ emit->setCodeLocation(code, binSize);
+
+ for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
+ Function *fn = reinterpret_cast<Function *>(fi.get());
+
+ assert(emit->getCodeSize() == fn->binPos);
+
+ for (int b = 0; b < fn->bbCount; ++b)
+ for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next)
+ emit->emitInstruction(i);
+ }
+ info->bin.relocData = emit->getRelocInfo();
+
+ delete emit;
+ return true;
+}
+
+#define RELOC_ALLOC_INCREMENT 8
+
+bool
+CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
+ int s)
+{
+ unsigned int n = relocInfo ? relocInfo->count : 0;
+
+ if (!(n % RELOC_ALLOC_INCREMENT)) {
+ size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
+ relocInfo = reinterpret_cast<RelocInfo *>(
+ REALLOC(relocInfo, n ? size : 0,
+ size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
+ if (!relocInfo)
+ return false;
+ }
+ ++relocInfo->count;
+
+ relocInfo->entry[n].data = data;
+ relocInfo->entry[n].mask = m;
+ relocInfo->entry[n].offset = codeSize + w * 4;
+ relocInfo->entry[n].bitPos = s;
+ relocInfo->entry[n].type = ty;
+
+ return true;
+}
+
+void
+RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
+{
+ uint32_t value = 0;
+
+ switch (type) {
+ case TYPE_CODE: value = info->codePos; break;
+ case TYPE_BUILTIN: value = info->libPos; break;
+ case TYPE_DATA: value = info->dataPos; break;
+ default:
+ assert(0);
+ break;
+ }
+ value += data;
+ value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
+
+ binary[offset / 4] &= ~mask;
+ binary[offset / 4] |= value & mask;
+}
+
+} // namespace nv50_ir
+
+
+#include "nv50/codegen/nv50_ir_driver.h"
+
+extern "C" {
+
+void
+nv50_ir_relocate_code(void *relocData, uint32_t *code,
+ uint32_t codePos,
+ uint32_t libPos,
+ uint32_t dataPos)
+{
+ nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
+
+ info->codePos = codePos;
+ info->libPos = libPos;
+ info->dataPos = dataPos;
+
+ for (unsigned int i = 0; i < info->count; ++i)
+ info->entry[i].apply(code, info);
+}
+
+void
+nv50_ir_get_target_library(uint32_t chipset,
+ const uint32_t **code, uint32_t *size)
+{
+ nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
+ targ->getBuiltinCode(code, size);
+ nv50_ir::Target::destroy(targ);
+}
+
+}
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.h b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h
new file mode 100644
index 00000000000..ddde5586890
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h
@@ -0,0 +1,164 @@
+
+#ifndef __NV50_IR_TARGET_H__
+#define __NV50_IR_TARGET_H__
+
+#include "nv50_ir.h"
+
+namespace nv50_ir {
+
+struct RelocInfo;
+
+struct RelocEntry
+{
+ enum Type
+ {
+ TYPE_CODE,
+ TYPE_BUILTIN,
+ TYPE_DATA
+ };
+
+ uint32_t data;
+ uint32_t mask;
+ uint32_t offset;
+ int8_t bitPos;
+ Type type;
+
+ inline void apply(uint32_t *binary, const RelocInfo *info) const;
+};
+
+struct RelocInfo
+{
+ uint32_t codePos;
+ uint32_t libPos;
+ uint32_t dataPos;
+
+ uint32_t count;
+
+ RelocEntry entry[0];
+};
+
+class CodeEmitter
+{
+public:
+ // returns whether the instruction was encodable and written
+ virtual bool emitInstruction(Instruction *) = 0;
+
+ virtual uint32_t getMinEncodingSize(const Instruction *) const = 0;
+
+ void setCodeLocation(void *, uint32_t size);
+ inline void *getCodeLocation() const { return code; }
+ inline uint32_t getCodeSize() const { return codeSize; }
+
+ bool addReloc(RelocEntry::Type, int w, uint32_t data, uint32_t m,
+ int s);
+
+ inline void *getRelocInfo() const { return relocInfo; }
+
+ void prepareEmission(Program *);
+ void prepareEmission(Function *);
+ virtual void prepareEmission(BasicBlock *);
+
+ void printBinary() const;
+
+protected:
+ uint32_t *code;
+ uint32_t codeSize;
+ uint32_t codeSizeLimit;
+
+ RelocInfo *relocInfo;
+};
+
+class Target
+{
+public:
+ static Target *create(uint32_t chipset);
+ static void destroy(Target *);
+
+ // 0x50 and 0x84 to 0xaf for nv50
+ // 0xc0 to 0xdf for nvc0
+ inline uint32_t getChipset() const { return chipset; }
+
+ virtual CodeEmitter *getCodeEmitter(Program::Type) = 0;
+
+ // Drivers should upload this so we can use it from all programs.
+ // The address chosen is supplied to the relocation routine.
+ virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const = 0;
+
+ virtual bool runLegalizePass(Program *, CGStage stage) const = 0;
+
+public:
+ struct OpInfo
+ {
+ OpInfo *variants;
+ operation op;
+ uint16_t srcTypes;
+ uint16_t dstTypes;
+ uint32_t immdBits;
+ uint8_t srcNr;
+ uint8_t srcMods[3];
+ uint8_t dstMods;
+ uint8_t srcFiles[3];
+ uint8_t dstFiles;
+ unsigned int minEncSize : 4;
+ unsigned int vector : 1;
+ unsigned int predicate : 1;
+ unsigned int commutative : 1;
+ unsigned int pseudo : 1;
+ unsigned int flow : 1;
+ unsigned int hasDest : 1;
+ unsigned int terminator : 1;
+ };
+
+ inline const OpInfo& getOpInfo(const Instruction *) const;
+ inline const OpInfo& getOpInfo(const operation) const;
+
+ inline DataFile nativeFile(DataFile f) const;
+
+ virtual bool insnCanLoad(const Instruction *insn, int s,
+ const Instruction *ld) const = 0;
+ virtual bool isOpSupported(operation, DataType) const = 0;
+ virtual bool isModSupported(const Instruction *,
+ int s, Modifier) const = 0;
+ virtual bool isSatSupported(const Instruction *) const = 0;
+ virtual bool mayPredicate(const Instruction *,
+ const Value *) const = 0;
+
+ virtual int getLatency(const Instruction *) const { return 1; }
+ virtual int getThroughput(const Instruction *) const { return 1; }
+
+ virtual unsigned int getFileSize(DataFile) const = 0;
+ virtual unsigned int getFileUnit(DataFile) const = 0;
+
+ virtual uint32_t getSVAddress(DataFile, const Symbol *) const = 0;
+
+public:
+ bool joinAnterior; // true if join is executed before the op
+
+ static const uint8_t operationSrcNr[OP_LAST + 1];
+
+protected:
+ uint32_t chipset;
+
+ DataFile nativeFileMap[DATA_FILE_COUNT];
+
+ OpInfo opInfo[OP_LAST + 1];
+};
+
+const Target::OpInfo& Target::getOpInfo(const Instruction *insn) const
+{
+ return opInfo[MIN2(insn->op, OP_LAST)];
+}
+
+const Target::OpInfo& Target::getOpInfo(const operation op) const
+{
+ return opInfo[op];
+}
+
+inline DataFile Target::nativeFile(DataFile f) const
+{
+ return nativeFileMap[f];
+}
+
+} // namespace nv50_ir
+
+#endif // __NV50_IR_TARGET_H__
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_util.cpp
new file mode 100644
index 00000000000..97f47a3ddbc
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_util.cpp
@@ -0,0 +1,253 @@
+
+#include "nv50_ir_util.h"
+
+namespace nv50_ir {
+
+void DLList::clear()
+{
+ for (Item *next, *item = head.next; item != &head; item = next) {
+ next = item->next;
+ delete item;
+ }
+ head.next = head.prev = &head;
+}
+
+void
+DLList::Iterator::erase()
+{
+ Item *rem = pos;
+
+ if (rem == term)
+ return;
+ pos = pos->next;
+
+ DLLIST_DEL(rem);
+ delete rem;
+}
+
+void DLList::Iterator::moveToList(DLList& dest)
+{
+ Item *item = pos;
+
+ assert(term != &dest.head);
+ assert(pos != term);
+
+ pos = pos->next;
+
+ DLLIST_DEL(item);
+ DLLIST_ADDHEAD(&dest.head, item);
+}
+
+bool
+DLList::Iterator::insert(void *data)
+{
+ Item *ins = new Item(data);
+
+ ins->next = pos->next;
+ ins->prev = pos;
+ pos->next->prev = ins;
+ pos->next = ins;
+
+ if (pos == term)
+ term = ins;
+
+ return true;
+}
+
+void
+Stack::moveTo(Stack& that)
+{
+ unsigned int newSize = this->size + that.size;
+
+ while (newSize > that.limit)
+ that.resize();
+ memcpy(&that.array[that.size], &array[0], this->size * sizeof(Item));
+
+ that.size = newSize;
+ this->size = 0;
+}
+
+Interval::~Interval()
+{
+ clear();
+}
+
+void
+Interval::clear()
+{
+ for (Range *next, *r = head; r; r = next) {
+ next = r->next;
+ delete r;
+ }
+}
+
+bool
+Interval::extend(int a, int b)
+{
+ Range *r, **nextp = &head;
+
+ // NOTE: we need empty intervals for fixed registers
+ // if (a == b)
+ // return false;
+ assert(a <= b);
+
+ for (r = head; r; r = r->next) {
+ if (b < r->bgn)
+ break; // insert before
+ if (a > r->end) {
+ // insert after
+ nextp = &r->next;
+ continue;
+ }
+
+ // overlap
+ if (a < r->bgn) {
+ r->bgn = a;
+ if (b > r->end)
+ r->end = b;
+ r->coalesce(&tail);
+ return true;
+ }
+ if (b > r->end) {
+ r->end = b;
+ r->coalesce(&tail);
+ return true;
+ }
+ assert(a >= r->bgn);
+ assert(b <= r->end);
+ return true;
+ }
+
+ (*nextp) = new Range(a, b);
+ (*nextp)->next = r;
+
+ for (r = (*nextp); r->next; r = r->next);
+ tail = r;
+ return true;
+}
+
+bool Interval::contains(int pos)
+{
+ for (Range *r = head; r && r->bgn <= pos; r = r->next)
+ if (r->end > pos)
+ return true;
+ return false;
+}
+
+bool Interval::overlaps(const Interval &iv) const
+{
+ for (Range *rA = this->head; rA; rA = rA->next)
+ for (Range *rB = iv.head; rB; rB = rB->next)
+ if (rB->bgn < rA->end &&
+ rB->end > rA->bgn)
+ return true;
+ return false;
+}
+
+void Interval::unify(Interval &that)
+{
+ assert(this != &that);
+ for (Range *next, *r = that.head; r; r = next) {
+ next = r->next;
+ this->extend(r->bgn, r->end);
+ delete r;
+ }
+ that.head = NULL;
+}
+
+void Interval::print() const
+{
+ if (!head)
+ return;
+ INFO("[%i %i)", head->bgn, head->end);
+ for (const Range *r = head->next; r; r = r->next)
+ INFO(" [%i %i)", r->bgn, r->end);
+ INFO("\n");
+}
+
+void
+BitSet::andNot(const BitSet &set)
+{
+ assert(data && set.data);
+ assert(size >= set.size);
+ for (unsigned int i = 0; i < (set.size + 31) / 32; ++i)
+ data[i] &= ~set.data[i];
+}
+
+BitSet& BitSet::operator|=(const BitSet &set)
+{
+ assert(data && set.data);
+ assert(size >= set.size);
+ for (unsigned int i = 0; i < (set.size + 31) / 32; ++i)
+ data[i] |= set.data[i];
+ return *this;
+}
+
+bool BitSet::allocate(unsigned int nBits, bool zero)
+{
+ if (data && size < nBits) {
+ FREE(data);
+ data = NULL;
+ }
+ size = nBits;
+
+ if (!data)
+ data = reinterpret_cast<uint32_t *>(CALLOC((size + 31) / 32, 4));
+
+ if (zero)
+ memset(data, 0, (size + 7) / 8);
+ else
+ data[(size + 31) / 32 - 1] = 0; // clear unused bits (e.g. for popCount)
+
+ return data;
+}
+
+unsigned int BitSet::popCount() const
+{
+ unsigned int count = 0;
+
+ for (unsigned int i = 0; i < (size + 31) / 32; ++i)
+ if (data[i])
+ count += util_bitcount(data[i]);
+ return count;
+}
+
+void BitSet::fill(uint32_t val)
+{
+ unsigned int i;
+ for (i = 0; i < (size + 31) / 32; ++i)
+ data[i] = val;
+ if (val)
+ data[i] &= ~(0xffffffff << (size % 32)); // BE ?
+}
+
+void BitSet::setOr(BitSet *pA, BitSet *pB)
+{
+ if (!pB) {
+ *this = *pA;
+ } else {
+ for (unsigned int i = 0; i < (size + 31) / 32; ++i)
+ data[i] = pA->data[i] | pB->data[i];
+ }
+}
+
+void BitSet::print() const
+{
+ unsigned int n = 0;
+ INFO("BitSet of size %u:\n", size);
+ for (unsigned int i = 0; i < (size + 31) / 32; ++i) {
+ uint32_t bits = data[i];
+ while (bits) {
+ int pos = ffs(bits) - 1;
+ bits &= ~(1 << pos);
+ INFO(" %i", i * 32 + pos);
+ ++n;
+ if ((n % 16) == 0)
+ INFO("\n");
+ }
+ }
+ if (n % 16)
+ INFO("\n");
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_util.h
new file mode 100644
index 00000000000..2ffdcd65568
--- /dev/null
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_util.h
@@ -0,0 +1,585 @@
+
+#ifndef __NV50_IR_UTIL_H__
+#define __NV50_IR_UTIL_H__
+
+#include <new>
+#include <assert.h>
+#include <stdio.h>
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+#define ERROR(args...) debug_printf("ERROR: " args)
+#define WARN(args...) debug_printf("WARNING: " args)
+#define INFO(args...) debug_printf(args)
+
+#define INFO_DBG(m, f, args...) \
+ do { \
+ if (m & NV50_IR_DEBUG_##f) \
+ debug_printf(args); \
+ } while(0)
+
+#define FATAL(args...) \
+ do { \
+ fprintf(stderr, args); \
+ abort(); \
+ } while(0)
+
+
+#define NV50_IR_FUNC_ALLOC_OBJ_DEF(obj, f, args...) \
+ new ((f)->getProgram()->mem_##obj.allocate()) obj(f, args)
+
+#define new_Instruction(f, args...) \
+ NV50_IR_FUNC_ALLOC_OBJ_DEF(Instruction, f, args)
+#define new_CmpInstruction(f, args...) \
+ NV50_IR_FUNC_ALLOC_OBJ_DEF(CmpInstruction, f, args)
+#define new_TexInstruction(f, args...) \
+ NV50_IR_FUNC_ALLOC_OBJ_DEF(TexInstruction, f, args)
+#define new_FlowInstruction(f, args...) \
+ NV50_IR_FUNC_ALLOC_OBJ_DEF(FlowInstruction, f, args)
+
+#define new_LValue(f, args...) \
+ NV50_IR_FUNC_ALLOC_OBJ_DEF(LValue, f, args)
+
+
+#define NV50_IR_PROG_ALLOC_OBJ_DEF(obj, p, args...) \
+ new ((p)->mem_##obj.allocate()) obj(p, args)
+
+#define new_Symbol(p, args...) \
+ NV50_IR_PROG_ALLOC_OBJ_DEF(Symbol, p, args)
+#define new_ImmediateValue(p, args...) \
+ NV50_IR_PROG_ALLOC_OBJ_DEF(ImmediateValue, p, args)
+
+
+#define delete_Instruction(p, insn) (p)->releaseInstruction(insn)
+#define delete_Value(p, val) (p)->releaseValue(val)
+
+
+namespace nv50_ir {
+
+class Iterator
+{
+public:
+ virtual void next() = 0;
+ virtual void *get() const = 0;
+ virtual bool end() const = 0; // if true, get will return 0
+};
+
+class ManipIterator : public Iterator
+{
+public:
+ virtual bool insert(void *) = 0; // insert after current position
+ virtual void erase() = 0;
+};
+
+// WARNING: do not use a->prev/next for __item or __list
+
+#define DLLIST_DEL(__item) \
+ do { \
+ (__item)->prev->next = (__item)->next; \
+ (__item)->next->prev = (__item)->prev; \
+ (__item)->next = (__item); \
+ (__item)->prev = (__item); \
+ } while(0)
+
+#define DLLIST_ADDTAIL(__list, __item) \
+ do { \
+ (__item)->next = (__list); \
+ (__item)->prev = (__list)->prev; \
+ (__list)->prev->next = (__item); \
+ (__list)->prev = (__item); \
+ } while(0)
+
+#define DLLIST_ADDHEAD(__list, __item) \
+ do { \
+ (__item)->prev = (__list); \
+ (__item)->next = (__list)->next; \
+ (__list)->next->prev = (__item); \
+ (__list)->next = (__item); \
+ } while(0)
+
+#define DLLIST_MERGE(__listA, __listB, ty) \
+ do { \
+ ty prevB = (__listB)->prev; \
+ (__listA)->prev->next = (__listB); \
+ (__listB)->prev->next = (__listA); \
+ (__listB)->prev = (__listA)->prev; \
+ (__listA)->prev = prevB; \
+ } while(0)
+
+#define DLLIST_FOR_EACH(list, it) \
+ for (DLList::Iterator (it) = (list)->iterator(); !(it).end(); (it).next())
+
+class DLList
+{
+public:
+ class Item
+ {
+ public:
+ Item(void *priv) : next(this), prev(this), data(priv) { }
+
+ public:
+ Item *next;
+ Item *prev;
+ void *data;
+ };
+
+ DLList() : head(0) { }
+ ~DLList() { clear(); }
+
+ inline void insertHead(void *data)
+ {
+ Item *item = new Item(data);
+
+ assert(data);
+
+ item->prev = &head;
+ item->next = head.next;
+ head.next->prev = item;
+ head.next = item;
+ }
+
+ inline void insertTail(void *data)
+ {
+ Item *item = new Item(data);
+
+ assert(data);
+
+ DLLIST_ADDTAIL(&head, item);
+ }
+
+ inline void insert(void *data) { insertTail(data); }
+
+ void clear();
+
+ class Iterator : public ManipIterator
+ {
+ public:
+ Iterator(Item *head, bool r) : rev(r), pos(r ? head->prev : head->next),
+ term(head) { }
+
+ virtual void next() { if (!end()) pos = rev ? pos->prev : pos->next; }
+ virtual void *get() const { return pos->data; }
+ virtual bool end() const { return pos == term; }
+
+ // caution: if you're at end-2 and erase it, then do next, you're at end
+ virtual void erase();
+ virtual bool insert(void *data);
+
+ // move item to a another list, no consistency with its iterators though
+ void moveToList(DLList&);
+
+ private:
+ const bool rev;
+ Item *pos;
+ Item *term;
+
+ friend class DLList;
+ };
+
+ inline void erase(Iterator& pos)
+ {
+ pos.erase();
+ }
+
+ Iterator iterator()
+ {
+ return Iterator(&head, false);
+ }
+
+ Iterator revIterator()
+ {
+ return Iterator(&head, true);
+ }
+
+private:
+ Item head;
+};
+
+class Stack
+{
+public:
+ class Item {
+ public:
+ union {
+ void *p;
+ int i;
+ unsigned int u;
+ float f;
+ double d;
+ } u;
+
+ Item() { memset(&u, 0, sizeof(u)); }
+ };
+
+ Stack() : size(0), limit(0), array(0) { }
+ ~Stack() { if (array) FREE(array); }
+
+ inline void push(int i) { Item data; data.u.i = i; push(data); }
+ inline void push(unsigned int u) { Item data; data.u.u = u; push(data); }
+ inline void push(void *p) { Item data; data.u.p = p; push(data); }
+ inline void push(float f) { Item data; data.u.f = f; push(data); }
+
+ inline void push(Item data)
+ {
+ if (size == limit)
+ resize();
+ array[size++] = data;
+ }
+
+ inline Item pop()
+ {
+ if (!size) {
+ Item data;
+ assert(0);
+ return data;
+ }
+ return array[--size];
+ }
+
+ inline unsigned int getSize() { return size; }
+
+ inline Item& peek() { assert(size); return array[size - 1]; }
+
+ void clear(bool releaseStorage = false)
+ {
+ if (releaseStorage && array)
+ FREE(array);
+ size = limit = 0;
+ }
+
+ void moveTo(Stack&); // move all items to target (not like push(pop()))
+
+private:
+ void resize()
+ {
+ unsigned int sizeOld, sizeNew;
+
+ sizeOld = limit * sizeof(Item);
+ limit = MAX2(4, limit + limit);
+ sizeNew = limit * sizeof(Item);
+
+ array = (Item *)REALLOC(array, sizeOld, sizeNew);
+ }
+
+ unsigned int size;
+ unsigned int limit;
+ Item *array;
+};
+
+class DynArray
+{
+public:
+ class Item
+ {
+ public:
+ union {
+ uint32_t u32;
+ void *p;
+ };
+ };
+
+ DynArray() : data(NULL), size(0) { }
+
+ ~DynArray() { if (data) FREE(data); }
+
+ inline Item& operator[](unsigned int i)
+ {
+ if (i >= size)
+ resize(i);
+ return data[i];
+ }
+
+ inline const Item operator[](unsigned int i) const
+ {
+ return data[i];
+ }
+
+ void resize(unsigned int index)
+ {
+ const unsigned int oldSize = size * sizeof(Item);
+
+ if (!size)
+ size = 8;
+ while (size <= index)
+ size <<= 1;
+
+ data = (Item *)REALLOC(data, oldSize, size * sizeof(Item));
+ }
+
+private:
+ Item *data;
+ unsigned int size;
+};
+
+class ArrayList
+{
+public:
+ ArrayList() : size(0) { }
+
+ void insert(void *item, int& id)
+ {
+ id = ids.getSize() ? ids.pop().u.i : size++;
+ data[id].p = item;
+ }
+
+ void remove(int& id)
+ {
+ const unsigned int uid = id;
+ assert(uid < size && data[id].p);
+ ids.push(uid);
+ data[uid].p = NULL;
+ id = -1;
+ }
+
+ inline int getSize() const { return size; }
+
+ inline void *get(unsigned int id) { assert(id < size); return data[id].p; }
+
+ class Iterator : public nv50_ir::Iterator
+ {
+ public:
+ Iterator(const ArrayList *array) : pos(0), data(array->data)
+ {
+ size = array->getSize();
+ if (size)
+ nextValid();
+ }
+
+ void nextValid() { while ((pos < size) && !data[pos].p) ++pos; }
+
+ void next() { if (pos < size) { ++pos; nextValid(); } }
+ void *get() const { assert(pos < size); return data[pos].p; }
+ bool end() const { return pos >= size; }
+
+ private:
+ unsigned int pos;
+ unsigned int size;
+ const DynArray& data;
+
+ friend class ArrayList;
+ };
+
+ Iterator iterator() const { return Iterator(this); }
+
+private:
+ DynArray data;
+ Stack ids;
+ unsigned int size;
+};
+
+class Interval
+{
+public:
+ Interval() : head(0), tail(0) { }
+ ~Interval();
+
+ bool extend(int, int);
+ void unify(Interval&); // clears source interval
+ void clear();
+
+ inline int begin() { return head ? head->bgn : -1; }
+ inline int end() { checkTail(); return tail ? tail->end : -1; }
+ inline bool isEmpty() const { return !head; }
+ bool overlaps(const Interval&) const;
+ bool contains(int pos);
+
+ void print() const;
+
+ inline void checkTail() const;
+
+private:
+ class Range
+ {
+ public:
+ Range(int a, int b) : next(0), bgn(a), end(b) { }
+
+ Range *next;
+ int bgn;
+ int end;
+
+ void coalesce(Range **ptail)
+ {
+ Range *rnn;
+
+ while (next && end >= next->bgn) {
+ assert(bgn <= next->bgn);
+ rnn = next->next;
+ end = MAX2(end, next->end);
+ delete next;
+ next = rnn;
+ }
+ if (!next)
+ *ptail = this;
+ }
+ };
+
+ Range *head;
+ Range *tail;
+};
+
+class BitSet
+{
+public:
+ BitSet() : marker(false), data(0), size(0) { }
+ BitSet(unsigned int nBits, bool zero) : marker(false), data(0), size(0)
+ {
+ allocate(nBits, zero);
+ }
+ ~BitSet()
+ {
+ if (data)
+ FREE(data);
+ }
+
+ bool allocate(unsigned int nBits, bool zero);
+
+ inline unsigned int getSize() const { return size; }
+
+ void fill(uint32_t val);
+
+ void setOr(BitSet *, BitSet *); // second BitSet may be NULL
+
+ inline void set(unsigned int i)
+ {
+ assert(i < size);
+ data[i / 32] |= 1 << (i % 32);
+ }
+
+ inline void clr(unsigned int i)
+ {
+ assert(i < size);
+ data[i / 32] &= ~(1 << (i % 32));
+ }
+
+ inline bool test(unsigned int i) const
+ {
+ assert(i < size);
+ return data[i / 32] & (1 << (i % 32));
+ }
+
+ BitSet& operator|=(const BitSet&);
+
+ BitSet& operator=(const BitSet& set)
+ {
+ assert(data && set.data);
+ assert(size == set.size);
+ memcpy(data, set.data, (set.size + 7) / 8);
+ return *this;
+ }
+
+ void andNot(const BitSet&);
+
+ unsigned int popCount() const;
+
+ void print() const;
+
+public:
+ bool marker; // for user
+
+private:
+ uint32_t *data;
+ unsigned int size;
+};
+
+void Interval::checkTail() const
+{
+#if NV50_DEBUG & NV50_DEBUG_PROG_RA
+ Range *r = head;
+ while (r->next)
+ r = r->next;
+ assert(tail == r);
+#endif
+}
+
+class MemoryPool
+{
+private:
+ inline bool enlargeAllocationsArray(const unsigned int id, unsigned int nr)
+ {
+ const unsigned int size = sizeof(uint8_t *) * id;
+ const unsigned int incr = sizeof(uint8_t *) * nr;
+
+ uint8_t **alloc = (uint8_t **)REALLOC(allocArray, size, size + incr);
+ if (!alloc)
+ return false;
+ allocArray = alloc;
+ return true;
+ }
+
+ inline bool enlargeCapacity()
+ {
+ const unsigned int id = count >> objStepLog2;
+
+ uint8_t *const mem = (uint8_t *)MALLOC(objSize << objStepLog2);
+ if (!mem)
+ return false;
+
+ if (!(id % 32)) {
+ if (!enlargeAllocationsArray(id, 32)) {
+ FREE(mem);
+ return false;
+ }
+ }
+ allocArray[id] = mem;
+ return true;
+ }
+
+public:
+ MemoryPool(unsigned int size, unsigned int incr) : objSize(size),
+ objStepLog2(incr)
+ {
+ allocArray = NULL;
+ released = NULL;
+ count = 0;
+ }
+
+ ~MemoryPool()
+ {
+ unsigned int allocCount = (count + (1 << objStepLog2) - 1) >> objStepLog2;
+ for (unsigned int i = 0; i < allocCount && allocArray[i]; ++i)
+ FREE(allocArray[i]);
+ if (allocArray)
+ FREE(allocArray);
+ }
+
+ void *allocate()
+ {
+ void *ret;
+ const unsigned int mask = (1 << objStepLog2) - 1;
+
+ if (released) {
+ ret = released;
+ released = *(void **)released;
+ return ret;
+ }
+
+ if (!(count & mask))
+ if (!enlargeCapacity())
+ return NULL;
+
+ ret = allocArray[count >> objStepLog2] + (count & mask) * objSize;
+ ++count;
+ return ret;
+ }
+
+ void release(void *ptr)
+ {
+ *(void **)ptr = released;
+ released = ptr;
+ }
+
+private:
+ uint8_t **allocArray; // array (list) of MALLOC allocations
+
+ void *released; // list of released objects
+
+ unsigned int count; // highest allocated object
+
+ const unsigned int objSize;
+ const unsigned int objStepLog2;
+};
+
+} // namespace nv50_ir
+
+#endif // __NV50_IR_UTIL_H__
diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile
index 3a5314625e6..c41262559cd 100644
--- a/src/gallium/drivers/nvc0/Makefile
+++ b/src/gallium/drivers/nvc0/Makefile
@@ -3,7 +3,7 @@ include $(TOP)/configs/current
LIBNAME = nvc0
-# get C_SOURCES
+# get C/CPP_SOURCES
include Makefile.sources
LIBRARY_INCLUDES = \
diff --git a/src/gallium/drivers/nvc0/Makefile.sources b/src/gallium/drivers/nvc0/Makefile.sources
index a057f060130..9b1fb97f0cb 100644
--- a/src/gallium/drivers/nvc0/Makefile.sources
+++ b/src/gallium/drivers/nvc0/Makefile.sources
@@ -22,3 +22,8 @@ C_SOURCES := \
nvc0_push.c \
nvc0_push2.c \
nvc0_query.c
+
+CPP_SOURCES := \
+ codegen/nv50_ir_emit_nvc0.cpp \
+ codegen/nv50_ir_lowering_nvc0.cpp \
+ codegen/nv50_ir_target_nvc0.cpp
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
new file mode 100644
index 00000000000..2ab06f426e5
--- /dev/null
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
@@ -0,0 +1,1714 @@
+
+#include "nv50_ir_target_nvc0.h"
+
+namespace nv50_ir {
+
+// Argh, all these assertions ...
+
+class CodeEmitterNVC0 : public CodeEmitter
+{
+public:
+ CodeEmitterNVC0(const TargetNVC0 *);
+
+ virtual bool emitInstruction(Instruction *);
+ virtual uint32_t getMinEncodingSize(const Instruction *) const;
+
+ inline void setProgramType(Program::Type pType) { progType = pType; }
+
+private:
+ const TargetNVC0 *targ;
+
+ Program::Type progType;
+
+private:
+ void emitForm_A(const Instruction *, uint64_t);
+ void emitForm_B(const Instruction *, uint64_t);
+ void emitForm_S(const Instruction *, uint32_t, bool pred);
+
+ void emitPredicate(const Instruction *);
+
+ void setAddress16(const ValueRef&);
+ void setImmediate(const Instruction *, const int s); // needs op already set
+ void setImmediateS8(const ValueRef&);
+
+ void emitCondCode(CondCode cc, int pos);
+ void emitInterpMode(const Instruction *);
+ void emitLoadStoreType(DataType ty);
+ void emitCachingMode(CacheMode c);
+
+ void emitShortSrc2(const ValueRef&);
+
+ inline uint8_t getSRegEncoding(const ValueRef&);
+
+ void roundMode_A(const Instruction *);
+ void roundMode_C(const Instruction *);
+ void roundMode_CS(const Instruction *);
+
+ void emitNegAbs12(const Instruction *);
+
+ void emitNOP(const Instruction *);
+
+ void emitLOAD(const Instruction *);
+ void emitSTORE(const Instruction *);
+ void emitMOV(const Instruction *);
+
+ void emitINTERP(const Instruction *);
+ void emitPFETCH(const Instruction *);
+ void emitVFETCH(const Instruction *);
+ void emitEXPORT(const Instruction *);
+ void emitOUT(const Instruction *);
+
+ void emitUADD(const Instruction *);
+ void emitFADD(const Instruction *);
+ void emitUMUL(const Instruction *);
+ void emitFMUL(const Instruction *);
+ void emitIMAD(const Instruction *);
+ void emitFMAD(const Instruction *);
+
+ void emitNOT(Instruction *);
+ void emitLogicOp(const Instruction *, uint8_t subOp);
+ void emitPOPC(const Instruction *);
+ void emitINSBF(const Instruction *);
+ void emitShift(const Instruction *);
+
+ void emitSFnOp(const Instruction *, uint8_t subOp);
+
+ void emitCVT(Instruction *);
+ void emitMINMAX(const Instruction *);
+ void emitPreOp(const Instruction *);
+
+ void emitSET(const CmpInstruction *);
+ void emitSLCT(const CmpInstruction *);
+ void emitSELP(const Instruction *);
+
+ void emitTEX(const TexInstruction *);
+ void emitTEXCSAA(const TexInstruction *);
+ void emitTXQ(const TexInstruction *);
+ void emitPIXLD(const TexInstruction *);
+
+ void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
+
+ void emitFlow(const Instruction *);
+
+ inline void defId(const ValueDef&, const int pos);
+ inline void srcId(const ValueRef&, const int pos);
+
+ inline void srcAddr32(const ValueRef&, const int pos); // address / 4
+
+ inline void srcId(const ValueRef *, const int pos);
+
+ inline bool isLIMM(const ValueRef&, DataType ty);
+};
+
+// for better visibility
+#define HEX64(h, l) 0x##h##l##ULL
+
+#define SDATA(a) ((a).rep()->reg.data)
+#define DDATA(a) ((a).rep()->reg.data)
+
+void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
+{
+ code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
+}
+
+void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
+{
+ code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
+}
+
+void CodeEmitterNVC0::srcAddr32(const ValueRef& src, const int pos)
+{
+ code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
+}
+
+void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
+{
+ code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
+}
+
+bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
+{
+ const ImmediateValue *imm = ref.get()->asImm();
+
+ return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
+}
+
+void
+CodeEmitterNVC0::roundMode_A(const Instruction *insn)
+{
+ switch (insn->rnd) {
+ case ROUND_M: code[1] |= 1 << 23; break;
+ case ROUND_P: code[1] |= 2 << 23; break;
+ case ROUND_Z: code[1] |= 3 << 23; break;
+ default:
+ assert(insn->rnd == ROUND_N);
+ break;
+ }
+}
+
+void
+CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
+{
+ if (i->src[1].mod.abs()) code[0] |= 1 << 6;
+ if (i->src[0].mod.abs()) code[0] |= 1 << 7;
+ if (i->src[1].mod.neg()) code[0] |= 1 << 8;
+ if (i->src[0].mod.neg()) code[0] |= 1 << 9;
+}
+
+void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
+{
+ uint8_t val;
+
+ switch (cc) {
+ case CC_LT: val = 0x1; break;
+ case CC_LTU: val = 0x9; break;
+ case CC_EQ: val = 0x2; break;
+ case CC_EQU: val = 0xa; break;
+ case CC_LE: val = 0x3; break;
+ case CC_LEU: val = 0xb; break;
+ case CC_GT: val = 0x4; break;
+ case CC_GTU: val = 0xc; break;
+ case CC_NE: val = 0x5; break;
+ case CC_NEU: val = 0xd; break;
+ case CC_GE: val = 0x6; break;
+ case CC_GEU: val = 0xe; break;
+ case CC_TR: val = 0xf; break;
+ case CC_FL: val = 0x0; break;
+
+ case CC_A: val = 0x14; break;
+ case CC_NA: val = 0x13; break;
+ case CC_S: val = 0x15; break;
+ case CC_NS: val = 0x12; break;
+ case CC_C: val = 0x16; break;
+ case CC_NC: val = 0x11; break;
+ case CC_O: val = 0x17; break;
+ case CC_NO: val = 0x10; break;
+
+ default:
+ val = 0;
+ assert(!"invalid condition code");
+ break;
+ }
+ code[pos / 32] |= val << (pos % 32);
+}
+
+void
+CodeEmitterNVC0::emitPredicate(const Instruction *i)
+{
+ if (i->predSrc >= 0) {
+ assert(i->getPredicate()->reg.file == FILE_PREDICATE);
+ srcId(i->src[i->predSrc], 10);
+ if (i->cc == CC_NOT_P)
+ code[0] |= 0x2000; // negate
+ } else {
+ code[0] |= 0x1c00;
+ }
+}
+
+void
+CodeEmitterNVC0::setAddress16(const ValueRef& src)
+{
+ Symbol *sym = src.get()->asSym();
+
+ assert(sym);
+
+ code[0] |= (sym->reg.data.offset & 0x003f) << 26;
+ code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
+}
+
+void
+CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
+{
+ const ImmediateValue *imm = i->src[s].get()->asImm();
+ uint32_t u32;
+
+ assert(imm);
+ u32 = imm->reg.data.u32;
+
+ if ((code[0] & 0xf) == 0x2) {
+ // LIMM
+ code[0] |= (u32 & 0x3f) << 26;
+ code[1] |= u32 >> 6;
+ } else
+ if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
+ // integer immediate
+ assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
+ assert(!(code[1] & 0xc000));
+ u32 &= 0xfffff;
+ code[0] |= (u32 & 0x3f) << 26;
+ code[1] |= 0xc000 | (u32 >> 6);
+ } else {
+ // float immediate
+ assert(!(u32 & 0x00000fff));
+ assert(!(code[1] & 0xc000));
+ code[0] |= ((u32 >> 12) & 0x3f) << 26;
+ code[1] |= 0xc000 | (u32 >> 18);
+ }
+}
+
+void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
+{
+ const ImmediateValue *imm = ref.get()->asImm();
+
+ int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
+
+ assert(s8 == imm->reg.data.s32);
+
+ code[0] |= (s8 & 0x3f) << 26;
+ code[0] |= (s8 >> 6) << 8;
+}
+
+void
+CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
+{
+ code[0] = opc;
+ code[1] = opc >> 32;
+
+ emitPredicate(i);
+
+ defId(i->def[0], 14);
+
+ int s1 = 26;
+ if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
+ s1 = 49;
+
+ for (int s = 0; s < 3 && i->srcExists(s); ++s) {
+ switch (i->getSrc(s)->reg.file) {
+ case FILE_MEMORY_CONST:
+ assert(!(code[1] & 0xc000));
+ code[1] |= (s == 2) ? 0x8000 : 0x4000;
+ code[1] |= i->getSrc(s)->reg.fileIndex << 10;
+ setAddress16(i->src[s]);
+ break;
+ case FILE_IMMEDIATE:
+ assert(s == 1 ||
+ i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
+ assert(!(code[1] & 0xc000));
+ setImmediate(i, s);
+ break;
+ case FILE_GPR:
+ if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
+ break;
+ srcId(i->src[s], s ? ((s == 2) ? 49 : s1) : 20);
+ break;
+ default:
+ // ignore here, can be predicate or flags, but must not be address
+ break;
+ }
+ }
+}
+
+void
+CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
+{
+ code[0] = opc;
+ code[1] = opc >> 32;
+
+ emitPredicate(i);
+
+ defId(i->def[0], 14);
+
+ switch (i->src[0].getFile()) {
+ case FILE_MEMORY_CONST:
+ assert(!(code[1] & 0xc000));
+ code[1] |= 0x4000 | (i->src[0].get()->reg.fileIndex << 10);
+ setAddress16(i->src[0]);
+ break;
+ case FILE_IMMEDIATE:
+ assert(!(code[1] & 0xc000));
+ setImmediate(i, 0);
+ break;
+ case FILE_GPR:
+ srcId(i->src[0], 26);
+ break;
+ default:
+ // ignore here, can be predicate or flags, but must not be address
+ break;
+ }
+}
+
+void
+CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
+{
+ code[0] = opc;
+
+ int ss2a = 0;
+ if (opc == 0x0d || opc == 0x0e)
+ ss2a = 2;
+
+ defId(i->def[0], 14);
+ srcId(i->src[0], 20);
+
+ assert(pred || (i->predSrc < 0));
+ if (pred)
+ emitPredicate(i);
+
+ for (int s = 1; s < 3 && i->srcExists(s); ++s) {
+ if (i->src[s].get()->reg.file == FILE_MEMORY_CONST) {
+ assert(!(code[0] & (0x300 >> ss2a)));
+ switch (i->src[s].get()->reg.fileIndex) {
+ case 0: code[0] |= 0x100 >> ss2a; break;
+ case 1: code[0] |= 0x200 >> ss2a; break;
+ case 16: code[0] |= 0x300 >> ss2a; break;
+ default:
+ ERROR("invalid c[] space for short form\n");
+ break;
+ }
+ if (s == 1)
+ code[0] |= i->getSrc(s)->reg.data.offset << 24;
+ else
+ code[0] |= i->getSrc(s)->reg.data.offset << 6;
+ } else
+ if (i->src[s].getFile() == FILE_IMMEDIATE) {
+ assert(s == 1);
+ setImmediateS8(i->src[s]);
+ } else
+ if (i->src[s].getFile() == FILE_GPR) {
+ srcId(i->src[s], (s == 1) ? 26 : 8);
+ }
+ }
+}
+
+void
+CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
+{
+ if (src.getFile() == FILE_MEMORY_CONST) {
+ switch (src.get()->reg.fileIndex) {
+ case 0: code[0] |= 0x100; break;
+ case 1: code[0] |= 0x200; break;
+ case 16: code[0] |= 0x300; break;
+ default:
+ assert(!"unsupported file index for short op");
+ break;
+ }
+ srcAddr32(src, 20);
+ } else {
+ srcId(src, 20);
+ assert(src.getFile() == FILE_GPR);
+ }
+}
+
+void
+CodeEmitterNVC0::emitNOP(const Instruction *i)
+{
+ code[0] = 0x000001e4;
+ code[1] = 0x40000000;
+ emitPredicate(i);
+}
+
+void
+CodeEmitterNVC0::emitFMAD(const Instruction *i)
+{
+ bool neg1 = (i->src[0].mod ^ i->src[1].mod).neg();
+
+ if (i->encSize == 8) {
+ if (isLIMM(i->src[1], TYPE_F32)) {
+ emitForm_A(i, HEX64(20000000, 00000002));
+ } else {
+ emitForm_A(i, HEX64(30000000, 00000000));
+
+ if (i->src[2].mod.neg())
+ code[0] |= 1 << 8;
+ }
+ roundMode_A(i);
+
+ if (neg1)
+ code[0] |= 1 << 9;
+
+ if (i->saturate)
+ code[0] |= 1 << 5;
+ if (i->ftz)
+ code[0] |= 1 << 6;
+ } else {
+ assert(!i->saturate && !i->src[2].mod.neg());
+ emitForm_S(i, (i->src[2].getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
+ false);
+ if (neg1)
+ code[0] |= 1 << 4;
+ }
+}
+
+void
+CodeEmitterNVC0::emitFMUL(const Instruction *i)
+{
+ bool neg = (i->src[0].mod ^ i->src[1].mod).neg();
+
+ assert(i->postFactor >= -3 && i->postFactor <= 3);
+
+ if (i->encSize == 8) {
+ if (isLIMM(i->src[1], TYPE_F32)) {
+ assert(i->postFactor == 0); // constant folded, hopefully
+ emitForm_A(i, HEX64(30000000, 00000002));
+ } else {
+ emitForm_A(i, HEX64(58000000, 00000000));
+ roundMode_A(i);
+ code[1] |= ((i->postFactor > 0) ?
+ (7 - i->postFactor) : (0 - i->postFactor)) << 17;
+ }
+ if (neg)
+ code[1] ^= 1 << 25; // aliases with LIMM sign bit
+
+ if (i->saturate)
+ code[0] |= 1 << 5;
+
+ if (i->dnz)
+ code[0] |= 1 << 7;
+ else
+ if (i->ftz)
+ code[0] |= 1 << 6;
+ } else {
+ assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
+ emitForm_S(i, 0xa8, true);
+ }
+}
+
+void
+CodeEmitterNVC0::emitUMUL(const Instruction *i)
+{
+ if (i->encSize == 8) {
+ if (i->src[1].getFile() == FILE_IMMEDIATE) {
+ emitForm_A(i, HEX64(10000000, 00000002));
+ } else {
+ emitForm_A(i, HEX64(50000000, 00000003));
+ }
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
+ code[0] |= 1 << 6;
+ if (i->sType == TYPE_S32)
+ code[0] |= 1 << 5;
+ if (i->dType == TYPE_S32)
+ code[0] |= 1 << 7;
+ } else {
+ emitForm_S(i, i->src[1].getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
+
+ if (i->sType == TYPE_S32)
+ code[0] |= 1 << 6;
+ }
+}
+
+void
+CodeEmitterNVC0::emitFADD(const Instruction *i)
+{
+ if (i->encSize == 8) {
+ if (isLIMM(i->src[1], TYPE_F32)) {
+ emitForm_A(i, HEX64(28000000, 00000002));
+
+ assert(!i->src[1].mod.neg() && !i->src[1].mod.abs() && !i->saturate);
+ } else {
+ emitForm_A(i, HEX64(50000000, 00000000));
+
+ roundMode_A(i);
+ if (i->saturate)
+ code[1] |= 1 << 17;
+ }
+ emitNegAbs12(i);
+
+ if (i->op == OP_SUB) code[0] ^= 1 << 8;
+
+ if (i->ftz)
+ code[0] |= 1 << 5;
+ } else {
+ assert(!i->saturate && i->op != OP_SUB &&
+ !i->src[0].mod.abs() &&
+ !i->src[1].mod.neg() && !i->src[1].mod.abs());
+
+ emitForm_S(i, 0x49, true);
+
+ if (i->src[0].mod.neg())
+ code[0] |= 1 << 7;
+ }
+}
+
+void
+CodeEmitterNVC0::emitUADD(const Instruction *i)
+{
+ uint32_t addOp = 0;
+
+ assert(!i->src[0].mod.abs() && !i->src[1].mod.abs());
+ assert(!i->src[0].mod.neg() || !i->src[1].mod.neg());
+
+ if (i->src[0].mod.neg())
+ addOp |= 0x200;
+ if (i->src[1].mod.neg())
+ addOp |= 0x100;
+ if (i->op == OP_SUB) {
+ addOp ^= 0x100;
+ assert(addOp != 0x300); // would be add-plus-one
+ }
+
+ if (i->encSize == 8) {
+ if (isLIMM(i->src[1], TYPE_U32)) {
+ emitForm_A(i, HEX64(08000000, 00000002));
+ if (i->def[1].exists())
+ code[1] |= 1 << 26; // write carry
+ } else {
+ emitForm_A(i, HEX64(48000000, 00000003));
+ if (i->def[1].exists())
+ code[1] |= 1 << 16; // write carry
+ }
+ code[0] |= addOp;
+
+ if (i->saturate)
+ code[0] |= 1 << 5;
+ if (i->flagsSrc >= 0) // add carry
+ code[0] |= 1 << 6;
+ } else {
+ assert(!(addOp & 0x100));
+ emitForm_S(i, (addOp >> 3) |
+ ((i->src[1].getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
+ }
+}
+
+// TODO: shl-add
+void
+CodeEmitterNVC0::emitIMAD(const Instruction *i)
+{
+ assert(i->encSize == 8);
+ emitForm_A(i, HEX64(20000000, 00000003));
+
+ if (isSignedType(i->dType))
+ code[0] |= 1 << 7;
+ if (isSignedType(i->sType))
+ code[0] |= 1 << 5;
+
+ code[1] |= i->saturate << 24;
+
+ if (i->flagsDef >= 0) code[1] |= 1 << 16;
+ if (i->flagsSrc >= 0) code[1] |= 1 << 23;
+
+ if (i->src[2].mod.neg()) code[0] |= 0x10;
+ if (i->src[1].mod.neg() ^
+ i->src[0].mod.neg()) code[0] |= 0x20;
+
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
+ code[0] |= 1 << 6;
+}
+
+void
+CodeEmitterNVC0::emitNOT(Instruction *i)
+{
+ assert(i->encSize == 8);
+ i->src[1].set(i->src[0]);
+ emitForm_A(i, HEX64(68000000, 000001c3));
+}
+
+void
+CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
+{
+ if (i->encSize == 8) {
+ if (isLIMM(i->src[1], TYPE_U32)) {
+ emitForm_A(i, HEX64(38000000, 00000002));
+
+ if (i->src[2].exists())
+ code[1] |= 1 << 26;
+ } else {
+ emitForm_A(i, HEX64(68000000, 00000003));
+
+ if (i->src[2].exists())
+ code[1] |= 1 << 16;
+ }
+ code[0] |= subOp << 6;
+
+ if (i->src[2].exists()) // carry
+ code[0] |= 1 << 5;
+
+ if (i->src[0].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
+ if (i->src[1].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
+ } else {
+ emitForm_S(i, (subOp << 5) |
+ ((i->src[1].getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
+ }
+}
+
+void
+CodeEmitterNVC0::emitPOPC(const Instruction *i)
+{
+ emitForm_A(i, HEX64(54000000, 00000004));
+
+ if (i->src[0].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
+ if (i->src[1].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
+}
+
+void
+CodeEmitterNVC0::emitINSBF(const Instruction *i)
+{
+ emitForm_A(i, HEX64(28000000, 30000000));
+}
+
+void
+CodeEmitterNVC0::emitShift(const Instruction *i)
+{
+ if (i->op == OP_SHR) {
+ emitForm_A(i, HEX64(58000000, 00000003)
+ | (isSignedType(i->dType) ? 0x20 : 0x00));
+ } else {
+ emitForm_A(i, HEX64(60000000, 00000003));
+ }
+
+ if (0)
+ code[0] |= 1 << 9; // clamp shift amount
+}
+
+void
+CodeEmitterNVC0::emitPreOp(const Instruction *i)
+{
+ if (i->encSize == 8) {
+ emitForm_B(i, HEX64(60000000, 00000000));
+
+ if (i->op == OP_PREEX2)
+ code[0] |= 0x20;
+
+ if (i->src[0].mod.abs()) code[0] |= 1 << 6;
+ if (i->src[0].mod.neg()) code[0] |= 1 << 8;
+ } else {
+ emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
+ }
+}
+
+void
+CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
+{
+ if (i->encSize == 8) {
+ code[0] = 0x00000000 | (subOp << 26);
+ code[1] = 0xc8000000;
+
+ emitPredicate(i);
+
+ defId(i->def[0], 14);
+ srcId(i->src[0], 20);
+
+ assert(i->src[0].getFile() == FILE_GPR);
+
+ if (i->saturate) code[0] |= 1 << 5;
+
+ if (i->src[0].mod.abs()) code[0] |= 1 << 7;
+ if (i->src[0].mod.neg()) code[0] |= 1 << 9;
+ } else {
+ emitForm_S(i, 0x80000008 | (subOp << 26), true);
+
+ assert(!i->src[0].mod.neg());
+ if (i->src[0].mod.abs()) code[0] |= 1 << 30;
+ }
+}
+
+void
+CodeEmitterNVC0::emitMINMAX(const Instruction *i)
+{
+ uint64_t op;
+
+ assert(i->encSize == 8);
+
+ op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
+
+ if (i->ftz)
+ op |= 1 << 5;
+ else
+ if (!isFloatType(i->dType))
+ op |= isSignedType(i->dType) ? 0x23 : 0x03;
+
+ emitForm_A(i, op);
+ emitNegAbs12(i);
+}
+
+void
+CodeEmitterNVC0::roundMode_C(const Instruction *i)
+{
+ switch (i->rnd) {
+ case ROUND_M: code[1] |= 1 << 17; break;
+ case ROUND_P: code[1] |= 2 << 17; break;
+ case ROUND_Z: code[1] |= 3 << 17; break;
+ case ROUND_NI: code[0] |= 1 << 7; break;
+ case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
+ case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
+ case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
+ case ROUND_N: break;
+ default:
+ assert(!"invalid round mode");
+ break;
+ }
+}
+
+void
+CodeEmitterNVC0::roundMode_CS(const Instruction *i)
+{
+ switch (i->rnd) {
+ case ROUND_M:
+ case ROUND_MI: code[0] |= 1 << 16; break;
+ case ROUND_P:
+ case ROUND_PI: code[0] |= 2 << 16; break;
+ case ROUND_Z:
+ case ROUND_ZI: code[0] |= 3 << 16; break;
+ default:
+ break;
+ }
+}
+
+void
+CodeEmitterNVC0::emitCVT(Instruction *i)
+{
+ const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
+
+ switch (i->op) {
+ case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;
+ case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
+ case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
+ default:
+ break;
+ }
+
+ const bool sat = (i->op == OP_SAT) || i->saturate;
+ const bool abs = (i->op == OP_ABS) || i->src[0].mod.abs();
+ const bool neg = (i->op == OP_NEG) || i->src[0].mod.neg();
+
+ if (i->encSize == 8) {
+ emitForm_B(i, HEX64(10000000, 00000004));
+
+ roundMode_C(i);
+
+ code[0] |= util_logbase2(i->def[0].getSize()) << 20;
+ code[0] |= util_logbase2(i->src[0].getSize()) << 23;
+
+ if (sat)
+ code[0] |= 0x20;
+ if (abs)
+ code[0] |= 1 << 6;
+ if (neg && i->op != OP_ABS)
+ code[0] |= 1 << 8;
+
+ if (i->ftz)
+ code[1] |= 1 << 23;
+
+ if (isSignedIntType(i->dType))
+ code[0] |= 0x080;
+ if (isSignedIntType(i->sType))
+ code[0] |= 0x200;
+
+ if (isFloatType(i->dType)) {
+ if (!isFloatType(i->sType))
+ code[1] |= 0x08000000;
+ } else {
+ if (isFloatType(i->sType))
+ code[1] |= 0x04000000;
+ else
+ code[1] |= 0x0c000000;
+ }
+ } else {
+ if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
+ code[0] = 0x298;
+ } else
+ if (isFloatType(i->dType)) {
+ if (isFloatType(i->sType))
+ code[0] = 0x098;
+ else
+ code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
+ } else {
+ assert(isFloatType(i->sType));
+
+ code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
+ }
+
+ if (neg) code[0] |= 1 << 16;
+ if (sat) code[0] |= 1 << 18;
+ if (abs) code[0] |= 1 << 19;
+
+ roundMode_CS(i);
+ }
+}
+
+void
+CodeEmitterNVC0::emitSET(const CmpInstruction *i)
+{
+ uint32_t hi;
+ uint32_t lo = 0;
+
+ if (i->sType == TYPE_F64)
+ lo = 0x1;
+ else
+ if (!isFloatType(i->sType))
+ lo = 0x3;
+
+ if (isFloatType(i->dType) || isSignedIntType(i->sType))
+ lo |= 0x20;
+
+ switch (i->op) {
+ case OP_SET_AND: hi = 0x10000000; break;
+ case OP_SET_OR: hi = 0x10200000; break;
+ case OP_SET_XOR: hi = 0x10400000; break;
+ default:
+ hi = 0x100e0000;
+ break;
+ }
+ emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
+
+ if (i->def[0].getFile() == FILE_PREDICATE) {
+ if (i->sType == TYPE_F32)
+ code[1] += 0x10000000;
+ else
+ code[1] += 0x08000000;
+
+ code[0] &= ~0xfc000;
+ defId(i->def[0], 17);
+ if (i->defExists(1))
+ defId(i->def[1], 14);
+ else
+ code[0] |= 0x1c000;
+ }
+
+ if (i->ftz)
+ code[1] |= 1 << 27;
+
+ emitCondCode(i->setCond, 32 + 23);
+ emitNegAbs12(i);
+}
+
+void
+CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
+{
+ uint64_t op;
+
+ switch (i->dType) {
+ case TYPE_S32:
+ op = HEX64(30000000, 00000023);
+ break;
+ case TYPE_U32:
+ op = HEX64(30000000, 00000003);
+ break;
+ case TYPE_F32:
+ op = HEX64(38000000, 00000000);
+ break;
+ default:
+ assert(!"invalid type for SLCT");
+ op = 0;
+ break;
+ }
+ emitForm_A(i, op);
+
+ CondCode cc = i->setCond;
+
+ if (i->src[2].mod.neg())
+ cc = reverseCondCode(cc);
+
+ emitCondCode(cc, 32 + 23);
+
+ if (i->ftz)
+ code[0] |= 1 << 5;
+}
+
+void CodeEmitterNVC0::emitSELP(const Instruction *i)
+{
+ emitForm_A(i, HEX64(20000000, 00000004));
+
+ if (i->cc == CC_NOT_P || i->src[2].mod & Modifier(NV50_IR_MOD_NOT))
+ code[1] |= 1 << 20;
+}
+
+void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
+{
+ code[0] = 0x00000086;
+ code[1] = 0xd0000000;
+
+ code[1] |= i->tex.r;
+ code[1] |= i->tex.s << 8;
+
+ if (i->tex.liveOnly)
+ code[0] |= 1 << 9;
+
+ defId(i->def[0], 14);
+ srcId(i->src[0], 20);
+}
+
+void
+CodeEmitterNVC0::emitTEX(const TexInstruction *i)
+{
+ code[0] = 0x00000006;
+
+ if (1)
+ code[0] |= 0x80; // normal/t/p mode = t, XXX: what is this ?
+
+ if (i->tex.liveOnly)
+ code[0] |= 1 << 9;
+
+ switch (i->op) {
+ case OP_TEX: code[1] = 0x80000000; break;
+ case OP_TXB: code[1] = 0x84000000; break;
+ case OP_TXL: code[1] = 0x86000000; break;
+ case OP_TXF: code[1] = 0x92000000; break;
+ case OP_TXG: code[1] = 0xa0000000; break;
+ case OP_TXD: code[1] = 0xe0000000; break;
+ default:
+ assert(!"invalid texture op");
+ break;
+ }
+ defId(i->def[0], 14);
+ srcId(i->src[0], 20);
+
+ emitPredicate(i);
+
+ if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
+
+ code[1] |= i->tex.mask << 14;
+
+ code[1] |= i->tex.r;
+ code[1] |= i->tex.s << 8;
+ if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
+ code[1] |= 1 << 18; // in 1st source (with array index)
+
+ // texture target:
+ code[1] |= (i->tex.target.getDim() - 1) << 20;
+ if (i->tex.target.isCube())
+ code[1] += 2 << 20;
+ if (i->tex.target.isArray())
+ code[1] |= 1 << 19;
+ if (i->tex.target.isShadow())
+ code[1] |= 1 << 24;
+
+ int src1 = i->tex.target.getArgCount();
+
+ if (i->src[src1].getFile() == FILE_IMMEDIATE) { // lzero
+ if (i->op == OP_TXL)
+ code[1] &= ~(1 << 26);
+ else
+ if (i->op == OP_TXF)
+ code[1] &= ~(1 << 25);
+ }
+ if (i->tex.target == TEX_TARGET_2D_MS ||
+ i->tex.target == TEX_TARGET_2D_MS_ARRAY)
+ code[1] |= 1 << 23;
+
+ if (i->tex.useOffsets) // in vecSrc0.w
+ code[1] |= 1 << 22;
+
+ srcId(i->src[src1], 26);
+}
+
+void
+CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
+{
+ code[0] = 0x00000086;
+ code[1] = 0xc0000000;
+
+ switch (i->tex.query) {
+ case TXQ_DIMS: code[1] |= 0 << 22; break;
+ case TXQ_TYPE: code[1] |= 1 << 22; break;
+ case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
+ case TXQ_FILTER: code[1] |= 3 << 22; break;
+ case TXQ_LOD: code[1] |= 4 << 22; break;
+ case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;
+ default:
+ assert(!"invalid texture query");
+ break;
+ }
+
+ code[1] |= i->tex.mask << 14;
+
+ code[1] |= i->tex.r;
+ code[1] |= i->tex.s << 8;
+ if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
+ code[1] |= 1 << 18;
+
+ defId(i->def[0], 14);
+ srcId(i->src[0], 20);
+ srcId(i->src[1], 26);
+
+ emitPredicate(i);
+}
+
+void
+CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
+{
+ code[0] = 0x00000000 | (laneMask << 6);
+ code[1] = 0x48000000 | qOp;
+
+ defId(i->def[0], 14);
+ srcId(i->src[0], 20);
+ srcId(i->srcExists(1) ? i->src[1] : i->src[0], 26);
+
+ emitPredicate(i);
+}
+
+void
+CodeEmitterNVC0::emitFlow(const Instruction *i)
+{
+ const FlowInstruction *f = i->asFlow();
+
+ unsigned mask; // bit 0: predicate, bit 1: target
+
+ code[0] = 0x00000007;
+
+ switch (i->op) {
+ case OP_BRA:
+ code[1] = f->absolute ? 0x00000000 : 0x40000000;
+ if (i->src[0].getFile() == FILE_MEMORY_CONST ||
+ i->src[1].getFile() == FILE_MEMORY_CONST)
+ code[1] |= 0x4000;
+ mask = 3;
+ break;
+ case OP_CALL:
+ code[1] = f->absolute ? 0x10000000 : 0x50000000;
+ if (i->src[0].getFile() == FILE_MEMORY_CONST)
+ code[1] |= 0x4000;
+ mask = 2;
+ break;
+
+ case OP_EXIT: code[1] = 0x80000000; mask = 1; break;
+ case OP_RET: code[1] = 0x90000000; mask = 1; break;
+ case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
+ case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;
+ case OP_CONT: code[1] = 0xb0000000; mask = 1; break;
+
+ case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;
+ case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
+ case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;
+ case OP_PRERET: code[1] = 0x78000000; mask = 2; break;
+
+ case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;
+ case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
+ case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;
+ default:
+ assert(!"invalid flow operation");
+ return;
+ }
+
+ if (mask & 1) {
+ emitPredicate(i);
+ if (i->flagsSrc < 0)
+ code[0] |= 0x1e0;
+ }
+
+ if (!f)
+ return;
+
+ if (f->allWarp)
+ code[0] |= 1 << 15;
+ if (f->limit)
+ code[0] |= 1 << 16;
+
+ if (f->op == OP_CALL) {
+ if (f->builtin) {
+ assert(f->absolute);
+ uint32_t pcAbs = targ->getBuiltinOffset(f->target.builtin);
+ addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
+ addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
+ } else {
+ assert(!f->absolute);
+ int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
+ code[0] |= (pcRel & 0x3f) << 26;
+ code[1] |= (pcRel >> 6) & 0x3ffff;
+ }
+ } else
+ if (mask & 2) {
+ int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
+ // currently we don't want absolute branches
+ assert(!f->absolute);
+ code[0] |= (pcRel & 0x3f) << 26;
+ code[1] |= (pcRel >> 6) & 0x3ffff;
+ }
+}
+
+void
+CodeEmitterNVC0::emitPFETCH(const Instruction *i)
+{
+ uint32_t prim = i->src[0].get()->reg.data.u32;
+
+ code[0] = 0x00000006 | ((prim & 0x3f) << 26);
+ code[1] = 0x00000000 | (prim >> 6);
+
+ emitPredicate(i);
+
+ defId(i->def[0], 14);
+ srcId(i->src[1], 20);
+}
+
+void
+CodeEmitterNVC0::emitVFETCH(const Instruction *i)
+{
+ code[0] = 0x00000006;
+ code[1] = 0x06000000 | i->src[0].get()->reg.data.offset;
+
+ if (i->perPatch)
+ code[0] |= 0x100;
+ if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
+ code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
+
+ emitPredicate(i);
+
+ code[0] |= (i->defCount(0xf) - 1) << 5;
+
+ defId(i->def[0], 14);
+ srcId(i->src[0].getIndirect(0), 20);
+ srcId(i->src[0].getIndirect(1), 26); // vertex address
+}
+
+void
+CodeEmitterNVC0::emitEXPORT(const Instruction *i)
+{
+ unsigned int size = typeSizeof(i->dType);
+
+ code[0] = 0x00000006 | ((size / 4 - 1) << 5);
+ code[1] = 0x0a000000 | i->src[0].get()->reg.data.offset;
+
+ assert(size != 12 && !(code[1] & (size - 1)));
+
+ if (i->perPatch)
+ code[0] |= 0x100;
+
+ emitPredicate(i);
+
+ assert(i->src[1].getFile() == FILE_GPR);
+
+ srcId(i->src[0].getIndirect(0), 20);
+ srcId(i->src[0].getIndirect(1), 32 + 17); // vertex base address
+ srcId(i->src[1], 26);
+}
+
+void
+CodeEmitterNVC0::emitOUT(const Instruction *i)
+{
+ code[0] = 0x00000006;
+ code[1] = 0x1c000000;
+
+ emitPredicate(i);
+
+ defId(i->def[0], 14); // new secret address
+ srcId(i->src[0], 20); // old secret address, should be 0 initially
+
+ assert(i->src[0].getFile() == FILE_GPR);
+
+ if (i->op == OP_EMIT)
+ code[0] |= 1 << 5;
+ if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
+ code[0] |= 1 << 6;
+
+ // vertex stream
+ if (i->src[1].getFile() == FILE_IMMEDIATE) {
+ code[1] |= 0xc000;
+ code[0] |= SDATA(i->src[1]).u32 << 26;
+ } else {
+ srcId(i->src[1], 26);
+ }
+}
+
+void
+CodeEmitterNVC0::emitInterpMode(const Instruction *i)
+{
+ if (i->encSize == 8) {
+ code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
+ } else {
+ if (i->getInterpMode() == NV50_IR_INTERP_SC)
+ code[0] |= 0x80;
+ assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
+ }
+}
+
+void
+CodeEmitterNVC0::emitINTERP(const Instruction *i)
+{
+ const uint32_t base = i->getSrc(0)->reg.data.offset;
+
+ if (i->encSize == 8) {
+ code[0] = 0x00000000;
+ code[1] = 0xc0000000 | (base & 0xffff);
+
+ if (i->saturate)
+ code[0] |= 1 << 5;
+
+ if (i->op == OP_PINTERP)
+ srcId(i->src[1], 26);
+ else
+ code[0] |= 0x3f << 26;
+
+ srcId(i->src[0].getIndirect(0), 20);
+ } else {
+ assert(i->op == OP_PINTERP);
+ code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
+ srcId(i->src[1], 20);
+ }
+ emitInterpMode(i);
+
+ emitPredicate(i);
+ defId(i->def[0], 14);
+
+ if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
+ srcId(i->src[i->op == OP_PINTERP ? 2 : 1], 17);
+ else
+ code[1] |= 0x3f << 17;
+}
+
+void
+CodeEmitterNVC0::emitLoadStoreType(DataType ty)
+{
+ uint8_t val;
+
+ switch (ty) {
+ case TYPE_U8:
+ val = 0x00;
+ break;
+ case TYPE_S8:
+ val = 0x20;
+ break;
+ case TYPE_F16:
+ case TYPE_U16:
+ val = 0x40;
+ break;
+ case TYPE_S16:
+ val = 0x60;
+ break;
+ case TYPE_F32:
+ case TYPE_U32:
+ case TYPE_S32:
+ val = 0x80;
+ break;
+ case TYPE_F64:
+ case TYPE_U64:
+ case TYPE_S64:
+ val = 0xa0;
+ break;
+ case TYPE_B128:
+ val = 0xc0;
+ break;
+ default:
+ val = 0x80;
+ assert(!"invalid type");
+ break;
+ }
+ code[0] |= val;
+}
+
+void
+CodeEmitterNVC0::emitCachingMode(CacheMode c)
+{
+ uint32_t val;
+
+ switch (c) {
+ case CACHE_CA:
+// case CACHE_WB:
+ val = 0x000;
+ break;
+ case CACHE_CG:
+ val = 0x100;
+ break;
+ case CACHE_CS:
+ val = 0x200;
+ break;
+ case CACHE_CV:
+// case CACHE_WT:
+ val = 0x300;
+ break;
+ default:
+ val = 0;
+ assert(!"invalid caching mode");
+ break;
+ }
+ code[0] |= val;
+}
+
+void
+CodeEmitterNVC0::emitSTORE(const Instruction *i)
+{
+ uint32_t opc;
+
+ switch (i->src[0].getFile()) {
+ case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
+ case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
+ case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
+ default:
+ assert(!"invalid memory file");
+ opc = 0;
+ break;
+ }
+ code[0] = 0x00000005;
+ code[1] = opc;
+
+ setAddress16(i->src[0]);
+ srcId(i->src[1], 14);
+ srcId(i->src[0].getIndirect(0), 20);
+
+ emitPredicate(i);
+
+ emitLoadStoreType(i->dType);
+ emitCachingMode(i->cache);
+}
+
+void
+CodeEmitterNVC0::emitLOAD(const Instruction *i)
+{
+ uint32_t opc;
+
+ code[0] = 0x00000005;
+
+ switch (i->src[0].getFile()) {
+ case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
+ case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
+ case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
+ case FILE_MEMORY_CONST:
+ if (!i->src[0].isIndirect(0) && typeSizeof(i->dType) == 4) {
+ emitMOV(i); // not sure if this is any better
+ return;
+ }
+ opc = 0x14000000 | (i->src[0].get()->reg.fileIndex << 10);
+ code[0] = 0x00000006 | (i->subOp << 8);
+ break;
+ default:
+ assert(!"invalid memory file");
+ opc = 0;
+ break;
+ }
+ code[1] = opc;
+
+ defId(i->def[0], 14);
+
+ setAddress16(i->src[0]);
+ srcId(i->src[0].getIndirect(0), 20);
+
+ emitPredicate(i);
+
+ emitLoadStoreType(i->dType);
+ emitCachingMode(i->cache);
+}
+
+uint8_t
+CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
+{
+ switch (SDATA(ref).sv.sv) {
+ case SV_LANEID: return 0x00;
+ case SV_PHYSID: return 0x03;
+ case SV_VERTEX_COUNT: return 0x10;
+ case SV_INVOCATION_ID: return 0x11;
+ case SV_YDIR: return 0x12;
+ case SV_TID: return 0x21 + SDATA(ref).sv.index;
+ case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
+ case SV_NTID: return 0x29 + SDATA(ref).sv.index;
+ case SV_GRIDID: return 0x2c;
+ case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
+ case SV_LBASE: return 0x34;
+ case SV_SBASE: return 0x30;
+ case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
+ default:
+ assert(!"no sreg for system value");
+ return 0;
+ }
+}
+
+void
+CodeEmitterNVC0::emitMOV(const Instruction *i)
+{
+ if (i->src[0].getFile() == FILE_SYSTEM_VALUE) {
+ uint8_t sr = getSRegEncoding(i->src[0]);
+
+ if (i->encSize == 8) {
+ code[0] = 0x00000004 | (sr << 26);
+ code[1] = 0x2c000000;
+ } else {
+ code[0] = 0x40000008 | (sr << 20);
+ }
+ defId(i->def[0], 14);
+
+ emitPredicate(i);
+ } else
+ if (i->encSize == 8) {
+ uint64_t opc;
+
+ if (i->src[0].getFile() == FILE_IMMEDIATE)
+ opc = HEX64(18000000, 000001e2);
+ else
+ if (i->src[0].getFile() == FILE_PREDICATE)
+ opc = HEX64(080e0000, 1c000004);
+ else
+ opc = HEX64(28000000, 00000004);
+
+ opc |= i->lanes << 5;
+
+ emitForm_B(i, opc);
+ } else {
+ uint32_t imm;
+
+ if (i->src[0].getFile() == FILE_IMMEDIATE) {
+ imm = SDATA(i->src[0]).u32;
+ if (imm & 0xfff00000) {
+ assert(!(imm & 0x000fffff));
+ code[0] = 0x00000318 | imm;
+ } else {
+ assert(imm < 0x800 || ((int32_t)imm >= -0x800));
+ code[0] = 0x00000118 | (imm << 20);
+ }
+ } else {
+ code[0] = 0x0028;
+ emitShortSrc2(i->src[0]);
+ }
+ defId(i->def[0], 14);
+
+ emitPredicate(i);
+ }
+}
+
+bool
+CodeEmitterNVC0::emitInstruction(Instruction *insn)
+{
+ if (!insn->encSize) {
+ ERROR("skipping unencodable instruction: "); insn->print();
+ return false;
+ } else
+ if (codeSize + insn->encSize > codeSizeLimit) {
+ ERROR("code emitter output buffer too small\n");
+ return false;
+ }
+
+ // assert that instructions with multiple defs don't corrupt registers
+ for (int d = 0; insn->defExists(d); ++d)
+ assert(insn->asTex() || insn->def[d].rep()->reg.data.id >= 0);
+
+ switch (insn->op) {
+ case OP_MOV:
+ case OP_RDSV:
+ emitMOV(insn);
+ break;
+ case OP_NOP:
+ break;
+ case OP_LOAD:
+ emitLOAD(insn);
+ break;
+ case OP_STORE:
+ emitSTORE(insn);
+ break;
+ case OP_LINTERP:
+ case OP_PINTERP:
+ emitINTERP(insn);
+ break;
+ case OP_VFETCH:
+ emitVFETCH(insn);
+ break;
+ case OP_EXPORT:
+ emitEXPORT(insn);
+ break;
+ case OP_PFETCH:
+ emitPFETCH(insn);
+ break;
+ case OP_EMIT:
+ case OP_RESTART:
+ emitOUT(insn);
+ break;
+ case OP_ADD:
+ case OP_SUB:
+ if (isFloatType(insn->dType))
+ emitFADD(insn);
+ else
+ emitUADD(insn);
+ break;
+ case OP_MUL:
+ if (isFloatType(insn->dType))
+ emitFMUL(insn);
+ else
+ emitUMUL(insn);
+ break;
+ case OP_MAD:
+ case OP_FMA:
+ if (isFloatType(insn->dType))
+ emitFMAD(insn);
+ else
+ emitIMAD(insn);
+ break;
+ case OP_NOT:
+ emitNOT(insn);
+ break;
+ case OP_AND:
+ emitLogicOp(insn, 0);
+ break;
+ case OP_OR:
+ emitLogicOp(insn, 1);
+ break;
+ case OP_XOR:
+ emitLogicOp(insn, 2);
+ break;
+ case OP_SHL:
+ case OP_SHR:
+ emitShift(insn);
+ break;
+ case OP_SET:
+ case OP_SET_AND:
+ case OP_SET_OR:
+ case OP_SET_XOR:
+ emitSET(insn->asCmp());
+ break;
+ case OP_SELP:
+ emitSELP(insn);
+ break;
+ case OP_SLCT:
+ emitSLCT(insn->asCmp());
+ break;
+ case OP_MIN:
+ case OP_MAX:
+ emitMINMAX(insn);
+ break;
+ case OP_ABS:
+ case OP_NEG:
+ case OP_CEIL:
+ case OP_FLOOR:
+ case OP_TRUNC:
+ case OP_CVT:
+ case OP_SAT:
+ emitCVT(insn);
+ break;
+ case OP_RSQ:
+ emitSFnOp(insn, 5);
+ break;
+ case OP_RCP:
+ emitSFnOp(insn, 4);
+ break;
+ case OP_LG2:
+ emitSFnOp(insn, 3);
+ break;
+ case OP_EX2:
+ emitSFnOp(insn, 2);
+ break;
+ case OP_SIN:
+ emitSFnOp(insn, 1);
+ break;
+ case OP_COS:
+ emitSFnOp(insn, 0);
+ break;
+ case OP_PRESIN:
+ case OP_PREEX2:
+ emitPreOp(insn);
+ break;
+ case OP_TEX:
+ case OP_TXB:
+ case OP_TXL:
+ case OP_TXD:
+ case OP_TXF:
+ emitTEX(insn->asTex());
+ break;
+ case OP_TXQ:
+ emitTXQ(insn->asTex());
+ break;
+ case OP_BRA:
+ case OP_CALL:
+ case OP_PRERET:
+ case OP_RET:
+ case OP_DISCARD:
+ case OP_EXIT:
+ case OP_PRECONT:
+ case OP_CONT:
+ case OP_PREBREAK:
+ case OP_BREAK:
+ case OP_JOINAT:
+ case OP_BRKPT:
+ case OP_QUADON:
+ case OP_QUADPOP:
+ emitFlow(insn);
+ break;
+ case OP_QUADOP:
+ emitQUADOP(insn, insn->subOp, insn->lanes);
+ break;
+ case OP_DFDX:
+ emitQUADOP(insn, insn->src[0].mod.neg() ? 0x66 : 0x99, 0x4);
+ break;
+ case OP_DFDY:
+ emitQUADOP(insn, insn->src[0].mod.neg() ? 0x5a : 0xa5, 0x5);
+ break;
+ case OP_POPCNT:
+ emitPOPC(insn);
+ break;
+ case OP_JOIN:
+ emitNOP(insn);
+ insn->join = 1;
+ break;
+ case OP_PHI:
+ case OP_UNION:
+ case OP_CONSTRAINT:
+ ERROR("operation should have been eliminated");
+ return false;
+ case OP_EXP:
+ case OP_LOG:
+ case OP_SQRT:
+ case OP_POW:
+ ERROR("operation should have been lowered\n");
+ return false;
+ default:
+ ERROR("unknow op\n");
+ return false;
+ }
+
+ if (insn->join) {
+ code[0] |= 0x10;
+ assert(insn->encSize == 8);
+ }
+
+ code += insn->encSize / 4;
+ codeSize += insn->encSize;
+ return true;
+}
+
+uint32_t
+CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
+{
+ const Target::OpInfo &info = targ->getOpInfo(i);
+
+ if (info.minEncSize == 8 || 1)
+ return 8;
+
+ if (i->ftz || i->saturate || i->join)
+ return 8;
+ if (i->rnd != ROUND_N)
+ return 8;
+ if (i->predSrc >= 0 && i->op == OP_MAD)
+ return 8;
+
+ if (i->op == OP_PINTERP) {
+ if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
+ return 8;
+ } else
+ if (i->op == OP_MOV && i->lanes != 0xf) {
+ return 8;
+ }
+
+ for (int s = 0; i->srcExists(s); ++s) {
+ if (i->src[s].isIndirect(0))
+ return 8;
+
+ if (i->src[s].getFile() == FILE_MEMORY_CONST) {
+ if (SDATA(i->src[s]).offset >= 0x100)
+ return 8;
+ if (i->getSrc(s)->reg.fileIndex > 1 &&
+ i->getSrc(s)->reg.fileIndex != 16)
+ return 8;
+ } else
+ if (i->src[s].getFile() == FILE_IMMEDIATE) {
+ if (i->dType == TYPE_F32) {
+ if (SDATA(i->src[s]).u32 >= 0x100)
+ return 8;
+ } else {
+ if (SDATA(i->src[s]).u32 > 0xff)
+ return 8;
+ }
+ }
+
+ if (i->op == OP_CVT)
+ continue;
+ if (i->src[s].mod != Modifier(0)) {
+ if (i->src[s].mod == Modifier(NV50_IR_MOD_ABS))
+ if (i->op != OP_RSQ)
+ return 8;
+ if (i->src[s].mod == Modifier(NV50_IR_MOD_NEG))
+ if (i->op != OP_ADD || s != 0)
+ return 8;
+ }
+ }
+
+ return 4;
+}
+
+CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target) : targ(target)
+{
+ code = NULL;
+ codeSize = codeSizeLimit = 0;
+ relocInfo = NULL;
+}
+
+CodeEmitter *
+TargetNVC0::getCodeEmitter(Program::Type type)
+{
+ CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
+ emit->setProgramType(type);
+ return emit;
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
new file mode 100644
index 00000000000..de73efcc56a
--- /dev/null
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
@@ -0,0 +1,705 @@
+
+#include "nv50/codegen/nv50_ir.h"
+#include "nv50/codegen/nv50_ir_build_util.h"
+
+#include "nv50_ir_target_nvc0.h"
+
+namespace nv50_ir {
+
+#define QOP_ADD 0
+#define QOP_SUBR 1
+#define QOP_SUB 2
+#define QOP_MOV2 3
+
+#define QUADOP(q, r, s, t) \
+ ((QOP_##q << 0) | (QOP_##r << 2) | \
+ (QOP_##s << 4) | (QOP_##t << 6))
+
+class NVC0LegalizeSSA : public Pass
+{
+private:
+ virtual bool visit(BasicBlock *);
+ virtual bool visit(Function *);
+
+ // we want to insert calls to the builtin library only after optimization
+ void handleDIV(Instruction *); // integer division, modulus
+ void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
+
+private:
+ BuildUtil bld;
+};
+
+void
+NVC0LegalizeSSA::handleDIV(Instruction *i)
+{
+ FlowInstruction *call;
+ int builtin;
+ Value *def[2];
+
+ bld.setPosition(i, false);
+ def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0);
+ def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0);
+ switch (i->dType) {
+ case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break;
+ case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break;
+ default:
+ return;
+ }
+ call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
+ bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]);
+ bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2);
+ bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0);
+
+ call->fixed = 1;
+ call->absolute = call->builtin = 1;
+ call->target.builtin = builtin;
+ delete_Instruction(prog, i);
+}
+
+void
+NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
+{
+ // TODO
+}
+
+bool
+NVC0LegalizeSSA::visit(Function *fn)
+{
+ bld.setProgram(fn->getProgram());
+ return true;
+}
+
+bool
+NVC0LegalizeSSA::visit(BasicBlock *bb)
+{
+ Instruction *next;
+ for (Instruction *i = bb->getEntry(); i; i = next) {
+ next = i->next;
+ if (i->dType == TYPE_F32)
+ continue;
+ switch (i->op) {
+ case OP_DIV:
+ case OP_MOD:
+ handleDIV(i);
+ break;
+ case OP_RCP:
+ case OP_RSQ:
+ if (i->dType == TYPE_F64)
+ handleRCPRSQ(i);
+ break;
+ default:
+ break;
+ }
+ }
+ return true;
+}
+
+class NVC0LegalizePostRA : public Pass
+{
+private:
+ virtual bool visit(Function *);
+ virtual bool visit(BasicBlock *);
+
+ void replaceZero(Instruction *);
+ void split64BitOp(Instruction *);
+ bool tryReplaceContWithBra(BasicBlock *);
+ void propagateJoin(BasicBlock *);
+
+ LValue *r63;
+};
+
+bool
+NVC0LegalizePostRA::visit(Function *fn)
+{
+ r63 = new_LValue(fn, FILE_GPR);
+ r63->reg.data.id = 63;
+ return true;
+}
+
+void
+NVC0LegalizePostRA::replaceZero(Instruction *i)
+{
+ for (int s = 0; i->srcExists(s); ++s) {
+ ImmediateValue *imm = i->getSrc(s)->asImm();
+ if (imm && imm->reg.data.u64 == 0)
+ i->setSrc(s, r63);
+ }
+}
+
+void
+NVC0LegalizePostRA::split64BitOp(Instruction *i)
+{
+ if (i->dType == TYPE_F64) {
+ if (i->op == OP_MAD)
+ i->op = OP_FMA;
+ if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA ||
+ i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX ||
+ i->op == OP_SET)
+ return;
+ i->dType = i->sType = TYPE_U32;
+
+ i->bb->insertAfter(i, i->clone(true)); // deep cloning
+ }
+}
+
+// replace CONT with BRA for single unconditional continue
+bool
+NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb)
+{
+ if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT)
+ return false;
+ Graph::EdgeIterator ei = bb->cfg.incident();
+ if (ei.getType() != Graph::Edge::BACK)
+ ei.next();
+ if (ei.getType() != Graph::Edge::BACK)
+ return false;
+ BasicBlock *contBB = BasicBlock::get(ei.getNode());
+
+ if (!contBB->getExit() || contBB->getExit()->op != OP_CONT ||
+ contBB->getExit()->getPredicate())
+ return false;
+ contBB->getExit()->op = OP_BRA;
+ bb->remove(bb->getEntry()); // delete PRECONT
+
+ ei.next();
+ assert(ei.end() || ei.getType() != Graph::Edge::BACK);
+ return true;
+}
+
+// replace branches to join blocks with join ops
+void
+NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
+{
+ if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit)
+ return;
+ for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+ BasicBlock *in = BasicBlock::get(ei.getNode());
+ Instruction *exit = in->getExit();
+ if (!exit) {
+ in->insertTail(new FlowInstruction(func, OP_JOIN, bb));
+ // there should always be a terminator instruction
+ WARN("inserted missing terminator in BB:%i\n", in->getId());
+ } else
+ if (exit->op == OP_BRA) {
+ exit->op = OP_JOIN;
+ exit->asFlow()->limit = 1; // must-not-propagate marker
+ }
+ }
+ bb->remove(bb->getEntry());
+}
+
+bool
+NVC0LegalizePostRA::visit(BasicBlock *bb)
+{
+ Instruction *i, *next;
+
+ // remove pseudo operations and non-fixed no-ops, split 64 bit operations
+ for (i = bb->getFirst(); i; i = next) {
+ next = i->next;
+ if (i->op == OP_EMIT || i->op == OP_RESTART) {
+ if (!i->getDef(0)->refCount())
+ i->setDef(0, NULL);
+ if (i->src[0].getFile() == FILE_IMMEDIATE)
+ i->setSrc(0, r63); // initial value must be 0
+ } else
+ if (i->isNop()) {
+ bb->remove(i);
+ } else {
+ if (i->op != OP_MOV && i->op != OP_PFETCH)
+ replaceZero(i);
+ if (typeSizeof(i->dType) == 8)
+ split64BitOp(i);
+ }
+ }
+ if (!bb->getEntry())
+ return true;
+
+ if (!tryReplaceContWithBra(bb))
+ propagateJoin(bb);
+
+ return true;
+}
+
+class NVC0LoweringPass : public Pass
+{
+public:
+ NVC0LoweringPass(Program *);
+
+private:
+ virtual bool visit(Function *);
+ virtual bool visit(BasicBlock *);
+ virtual bool visit(Instruction *);
+
+ bool handleRDSV(Instruction *);
+ bool handleWRSV(Instruction *);
+ bool handleEXPORT(Instruction *);
+ bool handleOUT(Instruction *);
+ bool handleDIV(Instruction *);
+ bool handleMOD(Instruction *);
+ bool handleSQRT(Instruction *);
+ bool handlePOW(Instruction *);
+ bool handleTEX(TexInstruction *);
+ bool handleTXD(TexInstruction *);
+ bool handleManualTXD(TexInstruction *);
+
+ void checkPredicate(Instruction *);
+
+ void readTessCoord(LValue *dst, int c);
+
+private:
+ const Target *const targ;
+
+ BuildUtil bld;
+
+ LValue *gpEmitAddress;
+};
+
+NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget())
+{
+ bld.setProgram(prog);
+}
+
+bool
+NVC0LoweringPass::visit(Function *fn)
+{
+ if (prog->getType() == Program::TYPE_GEOMETRY) {
+ assert(!strncmp(fn->getName(), "MAIN", 4));
+ // TODO: when we generate actual functions pass this value along somehow
+ bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false);
+ gpEmitAddress = bld.loadImm(NULL, 0)->asLValue();
+ }
+ return true;
+}
+
+bool
+NVC0LoweringPass::visit(BasicBlock *bb)
+{
+ return true;
+}
+
+// move array source to first slot, convert to u16, add indirections
+bool
+NVC0LoweringPass::handleTEX(TexInstruction *i)
+{
+ const int dim = i->tex.target.getDim();
+ const int arg = i->tex.target.getDim() + i->tex.target.isArray();
+
+ // generate and move the tsc/tic/array source to the front
+ if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
+ LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
+
+ Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(dim) : NULL;
+ for (int s = dim; s >= 1; --s)
+ i->setSrc(s, i->getSrc(s - 1));
+ i->setSrc(0, arrayIndex);
+
+ Value *ticRel = i->getIndirectR();
+ Value *tscRel = i->getIndirectS();
+
+ if (arrayIndex)
+ bld.mkCvt(OP_CVT, TYPE_U16, src, TYPE_F32, arrayIndex);
+ else
+ bld.loadImm(src, 0);
+
+ if (ticRel) {
+ i->setSrc(i->tex.rIndirectSrc, NULL);
+ bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src);
+ }
+ if (tscRel) {
+ i->setSrc(i->tex.sIndirectSrc, NULL);
+ bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src);
+ }
+
+ i->setSrc(0, src);
+ }
+
+ // offset is last source (lod 1st, dc 2nd)
+ if (i->tex.useOffsets) {
+ uint32_t value = 0;
+ int n, c;
+ int s = i->srcCount(0xff);
+ for (n = 0; n < i->tex.useOffsets; ++n)
+ for (c = 0; c < 3; ++c)
+ value |= (i->tex.offset[n][c] & 0xf) << (n * 12 + c * 4);
+ i->setSrc(s, bld.loadImm(NULL, value));
+ }
+
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleManualTXD(TexInstruction *i)
+{
+ static const uint8_t qOps[4][2] =
+ {
+ { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0
+ { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1
+ { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
+ { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
+ };
+ Value *def[4][4];
+ Value *crd[3];
+ Instruction *tex;
+ Value *zero = bld.loadImm(bld.getSSA(), 0);
+ int l, c;
+ const int dim = i->tex.target.getDim();
+
+ i->op = OP_TEX; // no need to clone dPdx/dPdy later
+
+ for (c = 0; c < dim; ++c)
+ crd[c] = bld.getScratch();
+
+ bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
+ for (l = 0; l < 4; ++l) {
+ // mov coordinates from lane l to all lanes
+ for (c = 0; c < dim; ++c)
+ bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
+ // add dPdx from lane l to lanes dx
+ for (c = 0; c < dim; ++c)
+ bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]);
+ // add dPdy from lane l to lanes dy
+ for (c = 0; c < dim; ++c)
+ bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
+ // texture
+ bld.insert(tex = i->clone(true));
+ for (c = 0; c < dim; ++c)
+ tex->setSrc(c, crd[c]);
+ // save results
+ for (c = 0; i->defExists(c); ++c) {
+ Instruction *mov;
+ def[c][l] = bld.getSSA();
+ mov = bld.mkMov(def[c][l], tex->getDef(c));
+ mov->fixed = 1;
+ mov->lanes = 1 << l;
+ }
+ }
+ bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
+
+ for (c = 0; i->defExists(c); ++c) {
+ Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
+ for (l = 0; l < 4; ++l)
+ u->setSrc(l, def[c][l]);
+ }
+
+ i->bb->remove(i);
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleTXD(TexInstruction *txd)
+{
+ int dim = txd->tex.target.getDim();
+ int arg = txd->tex.target.getDim() + txd->tex.target.isArray();
+
+ handleTEX(txd);
+ if (txd->src[arg].exists())
+ ++arg;
+
+ if (dim > 2 || txd->tex.target.isShadow())
+ return handleManualTXD(txd);
+
+ // at most s/t/array, x, y, offset
+ assert(arg <= 4 && !txd->src[arg].exists());
+
+ for (int c = 0; c < dim; ++c) {
+ txd->src[arg + c * 2 + 0].set(txd->dPdx[c]);
+ txd->src[arg + c * 2 + 1].set(txd->dPdy[c]);
+ txd->dPdx[c] = NULL;
+ txd->dPdy[c] = NULL;
+ }
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleWRSV(Instruction *i)
+{
+ Instruction *st;
+ Symbol *sym;
+ uint32_t addr;
+
+ // must replace, $sreg are not writeable
+ addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym());
+ if (addr >= 0x400)
+ return false;
+ sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr);
+
+ st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0),
+ i->getSrc(1));
+ st->perPatch = i->perPatch;
+
+ bld.getBB()->remove(i);
+ return true;
+}
+
+void
+NVC0LoweringPass::readTessCoord(LValue *dst, int c)
+{
+ Value *laneid = bld.getSSA();
+ Value *x, *y;
+
+ bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0));
+
+ if (c == 0) {
+ x = dst;
+ y = NULL;
+ } else
+ if (c == 1) {
+ x = NULL;
+ y = dst;
+ } else {
+ assert(c == 2);
+ x = bld.getSSA();
+ y = bld.getSSA();
+ }
+ if (x)
+ bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid);
+ if (y)
+ bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid);
+
+ if (c == 2) {
+ bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y);
+ bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst);
+ }
+}
+
+bool
+NVC0LoweringPass::handleRDSV(Instruction *i)
+{
+ Symbol *sym = i->getSrc(0)->asSym();
+ Value *vtx = NULL;
+ Instruction *ld;
+ uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym);
+
+ if (addr >= 0x400) // mov $sreg
+ return true;
+
+ switch (i->getSrc(0)->reg.data.sv.sv) {
+ case SV_POSITION:
+ assert(prog->getType() == Program::TYPE_FRAGMENT);
+ ld = new_Instruction(func, OP_LINTERP, TYPE_F32);
+ ld->setDef(0, i->getDef(0));
+ ld->setSrc(0, bld.mkSymbol(FILE_SHADER_INPUT, 0, TYPE_F32, addr));
+ ld->setInterpolate(NV50_IR_INTERP_LINEAR);
+ bld.getBB()->insertAfter(i, ld);
+ break;
+ case SV_TESS_COORD:
+ assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL);
+ readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index);
+ break;
+ default:
+ if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
+ vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
+ ld = bld.mkFetch(i->getDef(0), i->dType,
+ FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
+ ld->perPatch = i->perPatch;
+ break;
+ }
+ bld.getBB()->remove(i);
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleDIV(Instruction *i)
+{
+ if (!isFloatType(i->dType))
+ return true;
+ Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1));
+ i->op = OP_MUL;
+ i->setSrc(1, rcp->getDef(0));
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleMOD(Instruction *i)
+{
+ if (i->dType != TYPE_F32)
+ return true;
+ LValue *value = bld.getScratch();
+ bld.mkOp1(OP_RCP, TYPE_F32, value, i->getSrc(1));
+ bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(0), value);
+ bld.mkOp1(OP_TRUNC, TYPE_F32, value, value);
+ bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(1), value);
+ i->op = OP_SUB;
+ i->setSrc(1, value);
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleSQRT(Instruction *i)
+{
+ Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
+ bld.getSSA(), i->getSrc(0));
+ i->op = OP_MUL;
+ i->setSrc(1, rsq->getDef(0));
+
+ return true;
+}
+
+bool
+NVC0LoweringPass::handlePOW(Instruction *i)
+{
+ LValue *val = bld.getScratch();
+
+ bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0));
+ bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1;
+ bld.mkOp1(OP_PREEX2, TYPE_F32, val, val);
+
+ i->op = OP_EX2;
+ i->setSrc(0, val);
+ i->setSrc(1, NULL);
+
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleEXPORT(Instruction *i)
+{
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+ int id = i->getSrc(0)->reg.data.offset / 4;
+
+ if (i->src[0].isIndirect(0)) // TODO, ugly
+ return false;
+ i->op = OP_MOV;
+ i->src[0].set(i->src[1]);
+ i->setSrc(1, NULL);
+ i->setDef(0, new_LValue(func, FILE_GPR));
+ i->getDef(0)->reg.data.id = id;
+
+ prog->maxGPR = MAX2(prog->maxGPR, id);
+ } else
+ if (prog->getType() == Program::TYPE_GEOMETRY) {
+ i->setIndirect(0, 1, gpEmitAddress);
+ }
+ return true;
+}
+
+bool
+NVC0LoweringPass::handleOUT(Instruction *i)
+{
+ if (i->op == OP_RESTART && i->prev && i->prev->op == OP_EMIT) {
+ i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART;
+ delete_Instruction(prog, i);
+ } else {
+ assert(gpEmitAddress);
+ i->setDef(0, gpEmitAddress);
+ if (i->srcExists(0))
+ i->setSrc(1, i->getSrc(0));
+ i->setSrc(0, gpEmitAddress);
+ }
+ return true;
+}
+
+// Generate a binary predicate if an instruction is predicated by
+// e.g. an f32 value.
+void
+NVC0LoweringPass::checkPredicate(Instruction *insn)
+{
+ Value *pred = insn->getPredicate();
+ Value *pdst;
+
+ if (!pred || pred->reg.file == FILE_PREDICATE)
+ return;
+ pdst = new_LValue(func, FILE_PREDICATE);
+
+ // CAUTION: don't use pdst->getInsn, the definition might not be unique,
+ // delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
+
+ bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, pdst, bld.mkImm(0), pred);
+
+ insn->setPredicate(insn->cc, pdst);
+}
+
+//
+// - add quadop dance for texturing
+// - put FP outputs in GPRs
+// - convert instruction sequences
+//
+bool
+NVC0LoweringPass::visit(Instruction *i)
+{
+ if (i->prev)
+ bld.setPosition(i->prev, true);
+ else
+ if (i->next)
+ bld.setPosition(i->next, false);
+ else
+ bld.setPosition(i->bb, true);
+
+ if (i->cc != CC_ALWAYS)
+ checkPredicate(i);
+
+ switch (i->op) {
+ case OP_TEX:
+ case OP_TXB:
+ case OP_TXL:
+ case OP_TXF:
+ case OP_TXQ:
+ case OP_TXG:
+ return handleTEX(i->asTex());
+ case OP_TXD:
+ return handleTXD(i->asTex());
+ case OP_EX2:
+ bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
+ i->setSrc(0, i->getDef(0));
+ break;
+ case OP_POW:
+ return handlePOW(i);
+ case OP_DIV:
+ return handleDIV(i);
+ case OP_MOD:
+ return handleMOD(i);
+ case OP_SQRT:
+ return handleSQRT(i);
+ case OP_EXPORT:
+ return handleEXPORT(i);
+ case OP_EMIT:
+ case OP_RESTART:
+ return handleOUT(i);
+ case OP_RDSV:
+ return handleRDSV(i);
+ case OP_WRSV:
+ return handleWRSV(i);
+ case OP_LOAD:
+ if (i->src[0].getFile() == FILE_SHADER_INPUT) {
+ i->op = OP_VFETCH;
+ assert(prog->getType() != Program::TYPE_FRAGMENT);
+ }
+ break;
+ case OP_PINTERP:
+ if (i->getSrc(0)->reg.data.offset >= 0x280 &&
+ i->getSrc(0)->reg.data.offset < 0x2c0)
+ i->setInterpolate(i->getSampleMode() | NV50_IR_INTERP_SC);
+ break;
+ case OP_LINTERP:
+ if (i->getSrc(0)->reg.data.offset == 0x3fc) {
+ Value *face = i->getDef(0);
+ bld.setPosition(i, true);
+ bld.mkOp2(OP_SHL, TYPE_U32, face, face, bld.mkImm(31));
+ bld.mkOp2(OP_XOR, TYPE_U32, face, face, bld.mkImm(0xbf800000));
+ }
+ break;
+ default:
+ break;
+ }
+ return true;
+}
+
+bool
+TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const
+{
+ if (stage == CG_STAGE_PRE_SSA) {
+ NVC0LoweringPass pass(prog);
+ return pass.run(prog, false, true);
+ } else
+ if (stage == CG_STAGE_POST_RA) {
+ NVC0LegalizePostRA pass;
+ return pass.run(prog, false, true);
+ } else
+ if (stage == CG_STAGE_SSA) {
+ NVC0LegalizeSSA pass;
+ return pass.run(prog, false, true);
+ }
+ return false;
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
new file mode 100644
index 00000000000..60b2016878e
--- /dev/null
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
@@ -0,0 +1,568 @@
+
+#include "nv50_ir_target_nvc0.h"
+
+namespace nv50_ir {
+
+Target *getTargetNVC0(unsigned int chipset)
+{
+ return new TargetNVC0(chipset);
+}
+
+TargetNVC0::TargetNVC0(unsigned int card)
+{
+ chipset = card;
+ initOpInfo();
+}
+
+// BULTINS / LIBRARY FUNCTIONS:
+
+// lazyness -> will just hardcode everything for the time being
+
+// Will probably make this nicer once we support subroutines properly,
+// i.e. when we have an input IR that provides function declarations.
+
+static const uint32_t nvc0_builtin_code[] =
+{
+// DIV U32: slow unsigned integer division
+//
+// UNR recurrence (q = a / b):
+// look for z such that 2^32 - b <= b * z < 2^32
+// then q - 1 <= (a * z) / 2^32 <= q
+//
+// INPUT: $r0: dividend, $r1: divisor
+// OUTPUT: $r0: result, $r1: modulus
+// CLOBBER: $r2 - $r3, $p0 - $p1
+// SIZE: 22 / 14 * 8 bytes
+//
+#if 1
+ 0x04009c03, 0x78000000,
+ 0x7c209cdd,
+ 0x0010dd18,
+ 0x08309c03, 0x60000000,
+ 0x05605c18,
+ 0x0810dc2a,
+ 0x0c209c43, 0x20040000,
+ 0x0810dc03, 0x50000000,
+ 0x0c209c43, 0x20040000,
+ 0x0810dc03, 0x50000000,
+ 0x0c209c43, 0x20040000,
+ 0x0810dc03, 0x50000000,
+ 0x0c209c43, 0x20040000,
+ 0x0810dc03, 0x50000000,
+ 0x0c209c43, 0x20040000,
+ 0x0000dde4, 0x28000000,
+ 0x08001c43, 0x50000000,
+ 0x05609c18,
+ 0x0010430d,
+ 0x0811dc03, 0x1b0e0000,
+ 0x08104103, 0x48000000,
+ 0x04000002, 0x08000000,
+ 0x0811c003, 0x1b0e0000,
+ 0x08104103, 0x48000000,
+ 0x040000ac,
+ 0x90001dff,
+#else
+ 0x0401dc03, 0x1b0e0000,
+ 0x00008003, 0x78000000,
+ 0x0400c003, 0x78000000,
+ 0x0c20c103, 0x48000000,
+ 0x0c108003, 0x60000000,
+ 0x00005c28,
+ 0x00001d18,
+ 0x0031c023, 0x1b0ec000,
+ 0xb000a1e7, 0x40000000,
+ 0x04000003, 0x6000c000,
+ 0x0813dc03, 0x1b000000,
+ 0x0420446c,
+ 0x040004bd,
+ 0x04208003, 0x5800c000,
+ 0x0430c103, 0x4800c000,
+ 0x0ffc5dff,
+ 0x90001dff,
+#endif
+
+// DIV S32: slow signed integer division
+//
+// INPUT: $r0: dividend, $r1: divisor
+// OUTPUT: $r0: result, $r1: modulus
+// CLOBBER: $r2 - $r3, $p0 - $p3
+// SIZE: 18 * 8 bytes
+//
+ 0xfc05dc23, 0x188e0000,
+ 0xfc17dc23, 0x18c40000,
+ 0x03301e18,
+ 0x07305e18,
+ 0x0401dc03, 0x1b0e0000,
+ 0x00008003, 0x78000000,
+ 0x0400c003, 0x78000000,
+ 0x0c20c103, 0x48000000,
+ 0x0c108003, 0x60000000,
+ 0x00005c28,
+ 0x00001d18,
+ 0x0031c023, 0x1b0ec000,
+ 0xb000a1e7, 0x40000000,
+ 0x04000003, 0x6000c000,
+ 0x0813dc03, 0x1b000000,
+ 0x0420446c,
+ 0x040004bd,
+ 0x04208003, 0x5800c000,
+ 0x0430c103, 0x4800c000,
+ 0x0ffc5dff,
+ 0x01700e18,
+ 0x05704a18,
+ 0x90001dff,
+
+// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
+//
+// INPUT: $r0d (x)
+// OUTPUT: $r0d (rcp(x))
+// CLOBBER: $r2 - $r7
+// SIZE: 9 * 8 bytes
+//
+ 0x9810dc08,
+ 0x00009c28,
+ 0x4001df18,
+ 0x00019d18,
+ 0x08011e01, 0x200c0000,
+ 0x10209c01, 0x50000000,
+ 0x08011e01, 0x200c0000,
+ 0x10209c01, 0x50000000,
+ 0x08011e01, 0x200c0000,
+ 0x10201c01, 0x50000000,
+ 0x00001de7, 0x90000000,
+
+// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
+//
+// INPUT: $r0d (x)
+// OUTPUT: $r0d (rsqrt(x))
+// CLOBBER: $r2 - $r7
+// SIZE: 14 * 8 bytes
+//
+ 0x9c10dc08,
+ 0x00009c28,
+ 0x00019d18,
+ 0x3fe1df18,
+ 0x18001c01, 0x50000000,
+ 0x0001dde2, 0x18ffe000,
+ 0x08211c01, 0x50000000,
+ 0x10011e01, 0x200c0000,
+ 0x10209c01, 0x50000000,
+ 0x08211c01, 0x50000000,
+ 0x10011e01, 0x200c0000,
+ 0x10209c01, 0x50000000,
+ 0x08211c01, 0x50000000,
+ 0x10011e01, 0x200c0000,
+ 0x10201c01, 0x50000000,
+ 0x00001de7, 0x90000000,
+};
+
+static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] =
+{
+ 0,
+ 8 * (22),
+ 8 * (22 + 18),
+ 8 * (22 + 18 + 9)
+};
+
+void
+TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const
+{
+ *code = &nvc0_builtin_code[0];
+ *size = sizeof(nvc0_builtin_code);
+}
+
+uint32_t
+TargetNVC0::getBuiltinOffset(int builtin) const
+{
+ assert(builtin < NVC0_BUILTIN_COUNT);
+ return nvc0_builtin_offsets[builtin];
+}
+
+struct opProperties
+{
+ operation op;
+ unsigned int mNeg : 4;
+ unsigned int mAbs : 4;
+ unsigned int mNot : 4;
+ unsigned int mSat : 4;
+ unsigned int fConst : 3;
+ unsigned int fImmd : 4; // last bit indicates if full immediate is suppoted
+};
+
+static const struct opProperties _initProps[] =
+{
+ // neg abs not sat c[] imm
+ { OP_ADD, 0x3, 0x3, 0x0, 0x8, 0x2, 0x2 | 0x8 },
+ { OP_SUB, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 | 0x8 },
+ { OP_MUL, 0x3, 0x0, 0x0, 0x8, 0x2, 0x2 | 0x8 },
+ { OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
+ { OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
+ { OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint
+ { OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
+ { OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
+ { OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
+ { OP_AND, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
+ { OP_OR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
+ { OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
+ { OP_SHL, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
+ { OP_SHR, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
+ { OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
+ { OP_SLCT, 0x4, 0x0, 0x0, 0x0, 0x6, 0x2 }, // special c[] constraint
+ { OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
+ { OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
+ { OP_COS, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+ { OP_SIN, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+ { OP_EX2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+ { OP_LG2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+ { OP_RCP, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+ { OP_RSQ, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+ { OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
+ { OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
+ { OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
+ { OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
+ { OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
+ { OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
+ { OP_SET_XOR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
+ // saturate only:
+ { OP_LINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
+ { OP_PINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
+};
+
+void TargetNVC0::initOpInfo()
+{
+ unsigned int i, j;
+
+ static const uint32_t commutative[(OP_LAST + 31) / 32] =
+ {
+ // ADD, MAD, MUL, AND, OR, XOR, MAX, MIN
+ 0x0670ca00, 0x0000003f, 0x00000000
+ };
+
+ static const uint32_t shortForm[(OP_LAST + 31) / 32] =
+ {
+ // ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV
+ 0x0670ca00, 0x00000000, 0x00000000
+ };
+
+ static const operation noDest[] =
+ {
+ OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
+ OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
+ OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
+ OP_QUADON, OP_QUADPOP
+ };
+
+ joinAnterior = false;
+
+ for (i = 0; i < DATA_FILE_COUNT; ++i)
+ nativeFileMap[i] = (DataFile)i;
+ nativeFileMap[FILE_ADDRESS] = FILE_GPR;
+
+ for (i = 0; i < OP_LAST; ++i) {
+ opInfo[i].variants = NULL;
+ opInfo[i].op = (operation)i;
+ opInfo[i].srcTypes = 1 << (int)TYPE_F32;
+ opInfo[i].dstTypes = 1 << (int)TYPE_F32;
+ opInfo[i].immdBits = 0;
+ opInfo[i].srcNr = operationSrcNr[i];
+
+ for (j = 0; j < opInfo[i].srcNr; ++j) {
+ opInfo[i].srcMods[j] = 0;
+ opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
+ }
+ opInfo[i].dstMods = 0;
+ opInfo[i].dstFiles = 1 << (int)FILE_GPR;
+
+ opInfo[i].hasDest = 1;
+ opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
+ opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
+ opInfo[i].pseudo = (i < OP_MOV);
+ opInfo[i].predicate = !opInfo[i].pseudo;
+ opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
+ opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
+ }
+ for (i = 0; i < sizeof(noDest) / sizeof(noDest[0]); ++i)
+ opInfo[noDest[i]].hasDest = 0;
+
+ for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) {
+ const struct opProperties *prop = &_initProps[i];
+
+ for (int s = 0; s < 3; ++s) {
+ if (prop->mNeg & (1 << s))
+ opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG;
+ if (prop->mAbs & (1 << s))
+ opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS;
+ if (prop->mNot & (1 << s))
+ opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT;
+ if (prop->fConst & (1 << s))
+ opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST;
+ if (prop->fImmd & (1 << s))
+ opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE;
+ if (prop->fImmd & 8)
+ opInfo[prop->op].immdBits = 0xffffffff;
+ }
+ if (prop->mSat & 8)
+ opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
+ }
+}
+
+unsigned int
+TargetNVC0::getFileSize(DataFile file) const
+{
+ switch (file) {
+ case FILE_NULL: return 0;
+ case FILE_GPR: return 63;
+ case FILE_PREDICATE: return 7;
+ case FILE_FLAGS: return 1;
+ case FILE_ADDRESS: return 0;
+ case FILE_IMMEDIATE: return 0;
+ case FILE_MEMORY_CONST: return 65536;
+ case FILE_SHADER_INPUT: return 0x400;
+ case FILE_SHADER_OUTPUT: return 0x400;
+ case FILE_MEMORY_GLOBAL: return 0xffffffff;
+ case FILE_MEMORY_SHARED: return 16 << 10;
+ case FILE_MEMORY_LOCAL: return 48 << 10;
+ case FILE_SYSTEM_VALUE: return 32;
+ default:
+ assert(!"invalid file");
+ return 0;
+ }
+}
+
+unsigned int
+TargetNVC0::getFileUnit(DataFile file) const
+{
+ if (file == FILE_GPR || file == FILE_ADDRESS || file == FILE_SYSTEM_VALUE)
+ return 2;
+ return 0;
+}
+
+uint32_t
+TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
+{
+ const int idx = sym->reg.data.sv.index;
+ const SVSemantic sv = sym->reg.data.sv.sv;
+
+ const bool isInput = shaderFile == FILE_SHADER_INPUT;
+
+ switch (sv) {
+ case SV_POSITION: return 0x070 + idx * 4;
+ case SV_INSTANCE_ID: return 0x2f8;
+ case SV_VERTEX_ID: return 0x2fc;
+ case SV_PRIMITIVE_ID: return isInput ? 0x060 : 0x040;
+ case SV_LAYER: return 0x064;
+ case SV_VIEWPORT_INDEX: return 0x068;
+ case SV_POINT_SIZE: return 0x06c;
+ case SV_CLIP_DISTANCE: return 0x2c0 + idx * 4;
+ case SV_POINT_COORD: return 0x2e0 + idx * 4;
+ case SV_FACE: return 0x3fc;
+ case SV_TESS_FACTOR: return 0x000 + idx * 4;
+ case SV_TESS_COORD: return 0x2f0 + idx * 4;
+ default:
+ return 0xffffffff;
+ }
+}
+
+bool
+TargetNVC0::insnCanLoad(const Instruction *i, int s,
+ const Instruction *ld) const
+{
+ DataFile sf = ld->src[0].getFile();
+
+ // immediate 0 can be represented by GPR $r63
+ if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
+ return (!i->asTex() && i->op != OP_EXPORT && i->op != OP_STORE);
+
+ if (s > opInfo[i->op].srcNr)
+ return false;
+ if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf)))
+ return false;
+
+ // indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0
+ if (ld->src[0].isIndirect(0))
+ return false;
+
+ for (int k = 0; i->srcExists(k); ++k) {
+ if (i->src[k].getFile() == FILE_IMMEDIATE) {
+ if (i->getSrc(k)->reg.data.u64 != 0)
+ return false;
+ } else
+ if (i->src[k].getFile() != FILE_GPR &&
+ i->src[k].getFile() != FILE_PREDICATE) {
+ return false;
+ }
+ }
+
+ // not all instructions support full 32 bit immediates
+ if (sf == FILE_IMMEDIATE) {
+ Storage &reg = ld->getSrc(0)->asImm()->reg;
+
+ if (opInfo[i->op].immdBits != 0xffffffff) {
+ if (i->sType == TYPE_F32) {
+ if (reg.data.u32 & 0xfff)
+ return false;
+ } else
+ if (i->sType == TYPE_S32 || i->sType == TYPE_U32) {
+ // with u32, 0xfffff counts as 0xffffffff as well
+ if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
+ return false;
+ }
+ } else
+ if (i->op == OP_MAD || i->op == OP_FMA) {
+ // requires src == dst, cannot decide before RA
+ // (except if we implement more constraints)
+ if (ld->getSrc(0)->asImm()->reg.data.u32 & 0xfff)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool
+TargetNVC0::isOpSupported(operation op, DataType ty) const
+{
+ if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32))
+ return false;
+ if (op == OP_SAD && ty != TYPE_S32)
+ return false;
+ if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
+ return false;
+ return true;
+}
+
+bool
+TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
+{
+ if (!isFloatType(insn->dType)) {
+ switch (insn->op) {
+ case OP_ABS:
+ case OP_NEG:
+ case OP_CVT:
+ case OP_CEIL:
+ case OP_FLOOR:
+ case OP_TRUNC:
+ case OP_AND:
+ case OP_OR:
+ case OP_XOR:
+ break;
+ case OP_ADD:
+ if (insn->src[s ? 0 : 1].mod.neg())
+ return false;
+ break;
+ case OP_SUB:
+ if (s == 0)
+ return insn->src[1].mod.neg() ? false : true;
+ break;
+ default:
+ return false;
+ }
+ }
+ if (s > 3)
+ return false;
+ return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
+}
+
+bool
+TargetNVC0::mayPredicate(const Instruction *insn, const Value *pred) const
+{
+ if (insn->getPredicate())
+ return false;
+ return opInfo[insn->op].predicate;
+}
+
+bool
+TargetNVC0::isSatSupported(const Instruction *insn) const
+{
+ if (insn->op == OP_CVT)
+ return true;
+ if (!(opInfo[insn->op].dstMods & NV50_IR_MOD_SAT))
+ return false;
+
+ if (insn->dType == TYPE_U32)
+ return (insn->op == OP_ADD) || (insn->op == OP_MAD);
+
+ return insn->dType == TYPE_F32;
+}
+
+// TODO: better values
+int TargetNVC0::getLatency(const Instruction *i) const
+{
+ if (i->op == OP_LOAD) {
+ if (i->cache == CACHE_CV)
+ return 700;
+ return 48;
+ }
+ return 24;
+}
+
+// These are "inverse" throughput values, i.e. the number of cycles required
+// to issue a specific instruction for a full warp (32 threads).
+//
+// Assuming we have more than 1 warp in flight, a higher issue latency results
+// in a lower result latency since the MP will have spent more time with other
+// warps.
+// This also helps to determine the number of cycles between instructions in
+// a single warp.
+//
+int TargetNVC0::getThroughput(const Instruction *i) const
+{
+ // TODO: better values
+ if (i->dType == TYPE_F32) {
+ switch (i->op) {
+ case OP_ADD:
+ case OP_MUL:
+ case OP_MAD:
+ case OP_FMA:
+ return 1;
+ case OP_CVT:
+ case OP_CEIL:
+ case OP_FLOOR:
+ case OP_TRUNC:
+ case OP_SET:
+ case OP_SLCT:
+ case OP_MIN:
+ case OP_MAX:
+ return 2;
+ case OP_RCP:
+ case OP_RSQ:
+ case OP_LG2:
+ case OP_SIN:
+ case OP_COS:
+ case OP_PRESIN:
+ case OP_PREEX2:
+ default:
+ return 8;
+ }
+ } else
+ if (i->dType == TYPE_U32 || i->dType == TYPE_S32) {
+ switch (i->op) {
+ case OP_ADD:
+ case OP_AND:
+ case OP_OR:
+ case OP_XOR:
+ case OP_NOT:
+ return 1;
+ case OP_MUL:
+ case OP_MAD:
+ case OP_CVT:
+ case OP_SET:
+ case OP_SLCT:
+ case OP_SHL:
+ case OP_SHR:
+ case OP_NEG:
+ case OP_ABS:
+ case OP_MIN:
+ case OP_MAX:
+ default:
+ return 2;
+ }
+ } else
+ if (i->dType == TYPE_F64) {
+ return 2;
+ } else {
+ return 1;
+ }
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h
new file mode 100644
index 00000000000..f96bfbeaa6a
--- /dev/null
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.h
@@ -0,0 +1,46 @@
+
+#include "nv50/codegen/nv50_ir_target.h"
+
+namespace nv50_ir {
+
+#define NVC0_BUILTIN_DIV_U32 0
+#define NVC0_BUILTIN_DIV_S32 1
+#define NVC0_BUILTIN_RCP_F64 2
+#define NVC0_BUILTIN_RSQ_F64 3
+
+#define NVC0_BUILTIN_COUNT 4
+
+class TargetNVC0 : public Target
+{
+public:
+ TargetNVC0(unsigned int chipset);
+
+ virtual CodeEmitter *getCodeEmitter(Program::Type);
+
+ virtual bool runLegalizePass(Program *, CGStage stage) const;
+
+ virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const;
+
+ virtual bool insnCanLoad(const Instruction *insn, int s,
+ const Instruction *ld) const;
+ virtual bool isOpSupported(operation, DataType) const;
+ virtual bool isModSupported(const Instruction *, int s, Modifier) const;
+ virtual bool isSatSupported(const Instruction *) const;
+ virtual bool mayPredicate(const Instruction *, const Value *) const;
+
+ virtual int getLatency(const Instruction *) const;
+ virtual int getThroughput(const Instruction *) const;
+
+ virtual unsigned int getFileSize(DataFile) const;
+ virtual unsigned int getFileUnit(DataFile) const;
+
+ virtual uint32_t getSVAddress(DataFile shaderFile, const Symbol *sv) const;
+
+ uint32_t getBuiltinOffset(int builtin) const;
+
+private:
+ void initOpInfo();
+
+};
+
+} // namespace nv50_ir
diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile
index 033a1acaaf9..c516588f95f 100644
--- a/src/gallium/targets/gbm/Makefile
+++ b/src/gallium/targets/gbm/Makefile
@@ -118,6 +118,7 @@ pipe_SOURCES += pipe_i965.c
endif
ifneq ($(findstring nouveau/drm,$(GALLIUM_WINSYS_DIRS)),)
+LDFLAGS += -lstdc++
pipe_TARGETS += $(PIPE_PREFIX)nouveau.so
pipe_SOURCES += pipe_nouveau.c
endif