diff options
Diffstat (limited to 'src/gallium/drivers/nouveau')
150 files changed, 74368 insertions, 9 deletions
diff --git a/src/gallium/drivers/nouveau/Android.mk b/src/gallium/drivers/nouveau/Android.mk index 782b7cec188..5275aa60157 100644 --- a/src/gallium/drivers/nouveau/Android.mk +++ b/src/gallium/drivers/nouveau/Android.mk @@ -28,7 +28,13 @@ include $(LOCAL_PATH)/Makefile.sources include $(CLEAR_VARS) -LOCAL_SRC_FILES := $(C_SOURCES) +LOCAL_SRC_FILES := $(C_SOURCES) \ + $(NV30_C_SOURCES) \ + $(NV50_CODEGEN_SOURCES) \ + $(NV50_C_SOURES) \ + $(NVC0_CODEGEN_SOURCES) \ + $(NVC0_C_SOURCES) + LOCAL_C_INCLUDES := $(DRM_TOP) \ $(DRM_TOP)/include/drm \ $(DRM_TOP)/nouveau diff --git a/src/gallium/drivers/nouveau/Makefile.am b/src/gallium/drivers/nouveau/Makefile.am index 12e3da0334a..c4b51d9dff2 100644 --- a/src/gallium/drivers/nouveau/Makefile.am +++ b/src/gallium/drivers/nouveau/Makefile.am @@ -27,9 +27,15 @@ noinst_LTLIBRARIES = libnouveau.la AM_CPPFLAGS = \ -I$(top_srcdir)/src/gallium/drivers/nouveau/include \ - -I$(top_srcdir)/src/gallium/drivers \ - -I$(top_srcdir)/include \ $(GALLIUM_CFLAGS) \ - $(LIBDRM_CFLAGS) + $(LIBDRM_CFLAGS) \ + $(NOUVEAU_CFLAGS) \ + $(VISIBILITY_CFLAGS) -libnouveau_la_SOURCES = $(C_SOURCES) +libnouveau_la_SOURCES = \ + $(C_SOURCES) \ + $(NV30_C_SOURCES) \ + $(NV50_CODEGEN_SOURCES) \ + $(NV50_C_SOURCES) \ + $(NVC0_CODEGEN_SOURCES) \ + $(NVC0_C_SOURCES) diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 7912f6703e3..cc84ec3edb6 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -8,3 +8,94 @@ C_SOURCES := \ nouveau_vp3_video.c \ nouveau_vp3_video_bsp.c \ nouveau_vp3_video_vp.c + +NV30_C_SOURCES := \ + nv30/nv30_screen.c \ + nv30/nv30_context.c \ + nv30/nv30_format.c \ + nv30/nv30_resource.c \ + nv30/nv30_transfer.c \ + nv30/nv30_miptree.c \ + nv30/nv30_state.c \ + nv30/nv30_state_validate.c \ + nv30/nv30_texture.c \ + nv30/nv30_fragtex.c \ + nv30/nv40_verttex.c \ + nv30/nv30_fragprog.c \ + nv30/nv30_vertprog.c \ + nv30/nv30_clear.c \ + nv30/nv30_vbo.c \ + nv30/nv30_push.c \ + nv30/nv30_draw.c \ + nv30/nv30_query.c \ + nv30/nvfx_vertprog.c \ + nv30/nvfx_fragprog.c + +NV50_C_SOURCES := \ + nv50/nv50_context.c \ + nv50/nv50_formats.c \ + nv50/nv50_miptree.c \ + nv50/nv50_resource.c \ + nv50/nv50_screen.c \ + nv50/nv50_state.c \ + nv50/nv50_state_validate.c \ + nv50/nv50_surface.c \ + nv50/nv50_tex.c \ + nv50/nv50_transfer.c \ + nv50/nv50_vbo.c \ + nv50/nv50_program.c \ + nv50/nv50_shader_state.c \ + nv50/nv50_push.c \ + nv50/nv50_query.c \ + nv50/nv84_video.c \ + nv50/nv84_video_bsp.c \ + nv50/nv84_video_vp.c \ + nv50/nv98_video.c \ + nv50/nv98_video_bsp.c \ + nv50/nv98_video_vp.c \ + nv50/nv98_video_ppp.c + +NV50_CODEGEN_SOURCES := \ + codegen/nv50_ir.cpp \ + codegen/nv50_ir_bb.cpp \ + codegen/nv50_ir_build_util.cpp \ + codegen/nv50_ir_emit_nv50.cpp \ + codegen/nv50_ir_from_tgsi.cpp \ + codegen/nv50_ir_graph.cpp \ + codegen/nv50_ir_lowering_nv50.cpp \ + codegen/nv50_ir_peephole.cpp \ + codegen/nv50_ir_print.cpp \ + codegen/nv50_ir_ra.cpp \ + codegen/nv50_ir_ssa.cpp \ + codegen/nv50_ir_target.cpp \ + codegen/nv50_ir_target_nv50.cpp \ + codegen/nv50_ir_util.cpp + +NVC0_CODEGEN_SOURCES := \ + codegen/nv50_ir_emit_gk110.cpp \ + codegen/nv50_ir_emit_nvc0.cpp \ + codegen/nv50_ir_lowering_nvc0.cpp \ + codegen/nv50_ir_target_nvc0.cpp + +NVC0_C_SOURCES := \ + nvc0/nvc0_compute.c \ + nvc0/nvc0_context.c \ + nvc0/nvc0_formats.c \ + nvc0/nvc0_miptree.c \ + nvc0/nvc0_resource.c \ + nvc0/nvc0_screen.c \ + nvc0/nvc0_state.c \ + nvc0/nvc0_state_validate.c \ + nvc0/nvc0_surface.c \ + nvc0/nvc0_tex.c \ + nvc0/nvc0_transfer.c \ + nvc0/nvc0_vbo.c \ + nvc0/nvc0_vbo_translate.c \ + nvc0/nvc0_program.c \ + nvc0/nvc0_shader_state.c \ + nvc0/nvc0_query.c \ + nvc0/nve4_compute.c \ + nvc0/nvc0_video.c \ + nvc0/nvc0_video_bsp.c \ + nvc0/nvc0_video_vp.c \ + nvc0/nvc0_video_ppp.c diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp new file mode 100644 index 00000000000..90fb51c26df --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -0,0 +1,1231 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_target.h" +#include "codegen/nv50_ir_driver.h" + +extern "C" { +#include "nv50/nv50_program.h" +#include "nv50/nv50_debug.h" +} + +namespace nv50_ir { + +Modifier::Modifier(operation op) +{ + switch (op) { + case OP_NEG: bits = NV50_IR_MOD_NEG; break; + case OP_ABS: bits = NV50_IR_MOD_ABS; break; + case OP_SAT: bits = NV50_IR_MOD_SAT; break; + case OP_NOT: bits = NV50_IR_MOD_NOT; break; + default: + bits = 0; + break; + } +} + +Modifier Modifier::operator*(const Modifier m) const +{ + unsigned int a, b, c; + + b = m.bits; + if (this->bits & NV50_IR_MOD_ABS) + b &= ~NV50_IR_MOD_NEG; + + a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG); + c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT); + + return Modifier(a | c); +} + +ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL) +{ + indirect[0] = -1; + indirect[1] = -1; + usedAsPtr = false; + set(v); +} + +ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn) +{ + set(ref); + usedAsPtr = ref.usedAsPtr; +} + +ValueRef::~ValueRef() +{ + this->set(NULL); +} + +bool ValueRef::getImmediate(ImmediateValue &imm) const +{ + const ValueRef *src = this; + Modifier m; + DataType type = src->insn->sType; + + while (src) { + if (src->mod) { + if (src->insn->sType != type) + break; + m *= src->mod; + } + if (src->getFile() == FILE_IMMEDIATE) { + imm = *(src->value->asImm()); + // The immediate's type isn't required to match its use, it's + // more of a hint; applying a modifier makes use of that hint. + imm.reg.type = type; + m.applyTo(imm); + return true; + } + + Instruction *insn = src->value->getUniqueInsn(); + + if (insn && insn->op == OP_MOV) { + src = &insn->src(0); + if (src->mod) + WARN("OP_MOV with modifier encountered !\n"); + } else { + src = NULL; + } + } + return false; +} + +ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL) +{ + set(v); +} + +ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL) +{ + set(def.get()); +} + +ValueDef::~ValueDef() +{ + this->set(NULL); +} + +void +ValueRef::set(const ValueRef &ref) +{ + this->set(ref.get()); + mod = ref.mod; + indirect[0] = ref.indirect[0]; + indirect[1] = ref.indirect[1]; +} + +void +ValueRef::set(Value *refVal) +{ + if (value == refVal) + return; + if (value) + value->uses.remove(this); + if (refVal) + refVal->uses.push_back(this); + + value = refVal; +} + +void +ValueDef::set(Value *defVal) +{ + if (value == defVal) + return; + if (value) + value->defs.remove(this); + if (defVal) + defVal->defs.push_back(this); + + value = defVal; +} + +// Check if we can replace this definition's value by the value in @rep, +// including the source modifiers, i.e. make sure that all uses support +// @rep.mod. +bool +ValueDef::mayReplace(const ValueRef &rep) +{ + if (!rep.mod) + return true; + + if (!insn || !insn->bb) // Unbound instruction ? + return false; + + const Target *target = insn->bb->getProgram()->getTarget(); + + for (Value::UseIterator it = value->uses.begin(); it != value->uses.end(); + ++it) { + Instruction *insn = (*it)->getInsn(); + int s = -1; + + for (int i = 0; insn->srcExists(i); ++i) { + if (insn->src(i).get() == value) { + // If there are multiple references to us we'd have to check if the + // combination of mods is still supported, but just bail for now. + if (&insn->src(i) != (*it)) + return false; + s = i; + } + } + assert(s >= 0); // integrity of uses list + + if (!target->isModSupported(insn, s, rep.mod)) + return false; + } + return true; +} + +void +ValueDef::replace(const ValueRef &repVal, bool doSet) +{ + assert(mayReplace(repVal)); + + if (value == repVal.get()) + return; + + while (!value->uses.empty()) { + ValueRef *ref = value->uses.front(); + ref->set(repVal.get()); + ref->mod *= repVal.mod; + } + + if (doSet) + set(repVal.get()); +} + +Value::Value() +{ + join = this; + memset(®, 0, sizeof(reg)); + reg.size = 4; +} + +LValue::LValue(Function *fn, DataFile file) +{ + reg.file = file; + reg.size = (file != FILE_PREDICATE) ? 4 : 1; + reg.data.id = -1; + + compMask = 0; + compound = 0; + ssa = 0; + fixedReg = 0; + noSpill = 0; + + fn->add(this, this->id); +} + +LValue::LValue(Function *fn, LValue *lval) +{ + assert(lval); + + reg.file = lval->reg.file; + reg.size = lval->reg.size; + reg.data.id = -1; + + compMask = 0; + compound = 0; + ssa = 0; + fixedReg = 0; + noSpill = 0; + + fn->add(this, this->id); +} + +LValue * +LValue::clone(ClonePolicy<Function>& pol) const +{ + LValue *that = new_LValue(pol.context(), reg.file); + + pol.set<Value>(this, that); + + that->reg.size = this->reg.size; + that->reg.type = this->reg.type; + that->reg.data = this->reg.data; + + return that; +} + +bool +LValue::isUniform() const +{ + if (defs.size() > 1) + return false; + Instruction *insn = getInsn(); + // let's not try too hard here for now ... + return !insn->srcExists(1) && insn->getSrc(0)->isUniform(); +} + +Symbol::Symbol(Program *prog, DataFile f, ubyte fidx) +{ + baseSym = NULL; + + reg.file = f; + reg.fileIndex = fidx; + reg.data.offset = 0; + + prog->add(this, this->id); +} + +Symbol * +Symbol::clone(ClonePolicy<Function>& pol) const +{ + Program *prog = pol.context()->getProgram(); + + Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex); + + pol.set<Value>(this, that); + + that->reg.size = this->reg.size; + that->reg.type = this->reg.type; + that->reg.data = this->reg.data; + + that->baseSym = this->baseSym; + + return that; +} + +bool +Symbol::isUniform() const +{ + return + reg.file != FILE_SYSTEM_VALUE && + reg.file != FILE_MEMORY_LOCAL && + reg.file != FILE_SHADER_INPUT; +} + +ImmediateValue::ImmediateValue(Program *prog, uint32_t uval) +{ + memset(®, 0, sizeof(reg)); + + reg.file = FILE_IMMEDIATE; + reg.size = 4; + reg.type = TYPE_U32; + + reg.data.u32 = uval; + + prog->add(this, this->id); +} + +ImmediateValue::ImmediateValue(Program *prog, float fval) +{ + memset(®, 0, sizeof(reg)); + + reg.file = FILE_IMMEDIATE; + reg.size = 4; + reg.type = TYPE_F32; + + reg.data.f32 = fval; + + prog->add(this, this->id); +} + +ImmediateValue::ImmediateValue(Program *prog, double dval) +{ + memset(®, 0, sizeof(reg)); + + reg.file = FILE_IMMEDIATE; + reg.size = 8; + reg.type = TYPE_F64; + + reg.data.f64 = dval; + + prog->add(this, this->id); +} + +ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty) +{ + reg = proto->reg; + + reg.type = ty; + reg.size = typeSizeof(ty); +} + +ImmediateValue * +ImmediateValue::clone(ClonePolicy<Function>& pol) const +{ + Program *prog = pol.context()->getProgram(); + ImmediateValue *that = new_ImmediateValue(prog, 0u); + + pol.set<Value>(this, that); + + that->reg.size = this->reg.size; + that->reg.type = this->reg.type; + that->reg.data = this->reg.data; + + return that; +} + +bool +ImmediateValue::isInteger(const int i) const +{ + switch (reg.type) { + case TYPE_S8: + return reg.data.s8 == i; + case TYPE_U8: + return reg.data.u8 == i; + case TYPE_S16: + return reg.data.s16 == i; + case TYPE_U16: + return reg.data.u16 == i; + case TYPE_S32: + case TYPE_U32: + return reg.data.s32 == i; // as if ... + case TYPE_F32: + return reg.data.f32 == static_cast<float>(i); + case TYPE_F64: + return reg.data.f64 == static_cast<double>(i); + default: + return false; + } +} + +bool +ImmediateValue::isNegative() const +{ + switch (reg.type) { + case TYPE_S8: return reg.data.s8 < 0; + case TYPE_S16: return reg.data.s16 < 0; + case TYPE_S32: + case TYPE_U32: return reg.data.s32 < 0; + case TYPE_F32: return reg.data.u32 & (1 << 31); + case TYPE_F64: return reg.data.u64 & (1ULL << 63); + default: + return false; + } +} + +bool +ImmediateValue::isPow2() const +{ + switch (reg.type) { + case TYPE_U8: + case TYPE_U16: + case TYPE_U32: return util_is_power_of_two(reg.data.u32); + default: + return false; + } +} + +void +ImmediateValue::applyLog2() +{ + switch (reg.type) { + case TYPE_S8: + case TYPE_S16: + case TYPE_S32: + assert(!this->isNegative()); + // fall through + case TYPE_U8: + case TYPE_U16: + case TYPE_U32: + reg.data.u32 = util_logbase2(reg.data.u32); + break; + case TYPE_F32: + reg.data.f32 = log2f(reg.data.f32); + break; + case TYPE_F64: + reg.data.f64 = log2(reg.data.f64); + break; + default: + assert(0); + break; + } +} + +bool +ImmediateValue::compare(CondCode cc, float fval) const +{ + if (reg.type != TYPE_F32) + ERROR("immediate value is not of type f32"); + + switch (static_cast<CondCode>(cc & 7)) { + case CC_TR: return true; + case CC_FL: return false; + case CC_LT: return reg.data.f32 < fval; + case CC_LE: return reg.data.f32 <= fval; + case CC_GT: return reg.data.f32 > fval; + case CC_GE: return reg.data.f32 >= fval; + case CC_EQ: return reg.data.f32 == fval; + case CC_NE: return reg.data.f32 != fval; + default: + assert(0); + return false; + } +} + +ImmediateValue& +ImmediateValue::operator=(const ImmediateValue &that) +{ + this->reg = that.reg; + return (*this); +} + +bool +Value::interfers(const Value *that) const +{ + uint32_t idA, idB; + + if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) + return false; + if (this->asImm()) + return false; + + if (this->asSym()) { + idA = this->join->reg.data.offset; + idB = that->join->reg.data.offset; + } else { + idA = this->join->reg.data.id * MIN2(this->reg.size, 4); + idB = that->join->reg.data.id * MIN2(that->reg.size, 4); + } + + if (idA < idB) + return (idA + this->reg.size > idB); + else + if (idA > idB) + return (idB + that->reg.size > idA); + else + return (idA == idB); +} + +bool +Value::equals(const Value *that, bool strict) const +{ + if (strict) + return this == that; + + if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) + return false; + if (that->reg.size != this->reg.size) + return false; + + if (that->reg.data.id != this->reg.data.id) + return false; + + return true; +} + +bool +ImmediateValue::equals(const Value *that, bool strict) const +{ + const ImmediateValue *imm = that->asImm(); + if (!imm) + return false; + return reg.data.u64 == imm->reg.data.u64; +} + +bool +Symbol::equals(const Value *that, bool strict) const +{ + if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex) + return false; + assert(that->asSym()); + + if (this->baseSym != that->asSym()->baseSym) + return false; + + if (reg.file == FILE_SYSTEM_VALUE) + return (this->reg.data.sv.sv == that->reg.data.sv.sv && + this->reg.data.sv.index == that->reg.data.sv.index); + return this->reg.data.offset == that->reg.data.offset; +} + +void Instruction::init() +{ + next = prev = 0; + + cc = CC_ALWAYS; + rnd = ROUND_N; + cache = CACHE_CA; + subOp = 0; + + saturate = 0; + join = 0; + exit = 0; + terminator = 0; + ftz = 0; + dnz = 0; + perPatch = 0; + fixed = 0; + encSize = 0; + ipa = 0; + mask = 0; + + lanes = 0xf; + + postFactor = 0; + + predSrc = -1; + flagsDef = -1; + flagsSrc = -1; +} + +Instruction::Instruction() +{ + init(); + + op = OP_NOP; + dType = sType = TYPE_F32; + + id = -1; + bb = 0; +} + +Instruction::Instruction(Function *fn, operation opr, DataType ty) +{ + init(); + + op = opr; + dType = sType = ty; + + fn->add(this, id); +} + +Instruction::~Instruction() +{ + if (bb) { + Function *fn = bb->getFunction(); + bb->remove(this); + fn->allInsns.remove(id); + } + + for (int s = 0; srcExists(s); ++s) + setSrc(s, NULL); + // must unlink defs too since the list pointers will get deallocated + for (int d = 0; defExists(d); ++d) + setDef(d, NULL); +} + +void +Instruction::setDef(int i, Value *val) +{ + int size = defs.size(); + if (i >= size) { + defs.resize(i + 1); + while (size <= i) + defs[size++].setInsn(this); + } + defs[i].set(val); +} + +void +Instruction::setSrc(int s, Value *val) +{ + int size = srcs.size(); + if (s >= size) { + srcs.resize(s + 1); + while (size <= s) + srcs[size++].setInsn(this); + } + srcs[s].set(val); +} + +void +Instruction::setSrc(int s, const ValueRef& ref) +{ + setSrc(s, ref.get()); + srcs[s].mod = ref.mod; +} + +void +Instruction::swapSources(int a, int b) +{ + Value *value = srcs[a].get(); + Modifier m = srcs[a].mod; + + setSrc(a, srcs[b]); + + srcs[b].set(value); + srcs[b].mod = m; +} + +static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta) +{ + if (index >= s) + index += delta; + else + if ((delta < 0) && (index >= (s + delta))) + index = -1; +} + +// Moves sources [@s,last_source] by @delta. +// If @delta < 0, sources [@s - abs(@delta), @s) are erased. +void +Instruction::moveSources(const int s, const int delta) +{ + if (delta == 0) + return; + assert(s + delta >= 0); + + int k; + + for (k = 0; srcExists(k); ++k) { + for (int i = 0; i < 2; ++i) + moveSourcesAdjustIndex(src(k).indirect[i], s, delta); + } + moveSourcesAdjustIndex(predSrc, s, delta); + moveSourcesAdjustIndex(flagsSrc, s, delta); + if (asTex()) { + TexInstruction *tex = asTex(); + moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta); + moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta); + } + + if (delta > 0) { + --k; + for (int p = k + delta; k >= s; --k, --p) + setSrc(p, src(k)); + } else { + int p; + for (p = s; p < k; ++p) + setSrc(p + delta, src(p)); + for (; (p + delta) < k; ++p) + setSrc(p + delta, NULL); + } +} + +void +Instruction::takeExtraSources(int s, Value *values[3]) +{ + values[0] = getIndirect(s, 0); + if (values[0]) + setIndirect(s, 0, NULL); + + values[1] = getIndirect(s, 1); + if (values[1]) + setIndirect(s, 1, NULL); + + values[2] = getPredicate(); + if (values[2]) + setPredicate(cc, NULL); +} + +void +Instruction::putExtraSources(int s, Value *values[3]) +{ + if (values[0]) + setIndirect(s, 0, values[0]); + if (values[1]) + setIndirect(s, 1, values[1]); + if (values[2]) + setPredicate(cc, values[2]); +} + +Instruction * +Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const +{ + if (!i) + i = new_Instruction(pol.context(), op, dType); +#ifndef NDEBUG // non-conformant assert, so this is required + assert(typeid(*i) == typeid(*this)); +#endif + + pol.set<Instruction>(this, i); + + i->sType = sType; + + i->rnd = rnd; + i->cache = cache; + i->subOp = subOp; + + i->saturate = saturate; + i->join = join; + i->exit = exit; + i->mask = mask; + i->ftz = ftz; + i->dnz = dnz; + i->ipa = ipa; + i->lanes = lanes; + i->perPatch = perPatch; + + i->postFactor = postFactor; + + for (int d = 0; defExists(d); ++d) + i->setDef(d, pol.get(getDef(d))); + + for (int s = 0; srcExists(s); ++s) { + i->setSrc(s, pol.get(getSrc(s))); + i->src(s).mod = src(s).mod; + } + + i->cc = cc; + i->predSrc = predSrc; + i->flagsDef = flagsDef; + i->flagsSrc = flagsSrc; + + return i; +} + +unsigned int +Instruction::defCount(unsigned int mask, bool singleFile) const +{ + unsigned int i, n; + + if (singleFile) { + unsigned int d = ffs(mask); + if (!d) + return 0; + for (i = d--; defExists(i); ++i) + if (getDef(i)->reg.file != getDef(d)->reg.file) + mask &= ~(1 << i); + } + + for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1) + n += mask & 1; + return n; +} + +unsigned int +Instruction::srcCount(unsigned int mask, bool singleFile) const +{ + unsigned int i, n; + + if (singleFile) { + unsigned int s = ffs(mask); + if (!s) + return 0; + for (i = s--; srcExists(i); ++i) + if (getSrc(i)->reg.file != getSrc(s)->reg.file) + mask &= ~(1 << i); + } + + for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1) + n += mask & 1; + return n; +} + +bool +Instruction::setIndirect(int s, int dim, Value *value) +{ + assert(this->srcExists(s)); + + int p = srcs[s].indirect[dim]; + if (p < 0) { + if (!value) + return true; + p = srcs.size(); + while (p > 0 && !srcExists(p - 1)) + --p; + } + setSrc(p, value); + srcs[p].usedAsPtr = (value != 0); + srcs[s].indirect[dim] = value ? p : -1; + return true; +} + +bool +Instruction::setPredicate(CondCode ccode, Value *value) +{ + cc = ccode; + + if (!value) { + if (predSrc >= 0) { + srcs[predSrc].set(NULL); + predSrc = -1; + } + return true; + } + + if (predSrc < 0) { + predSrc = srcs.size(); + while (predSrc > 0 && !srcExists(predSrc - 1)) + --predSrc; + } + + setSrc(predSrc, value); + return true; +} + +bool +Instruction::writesPredicate() const +{ + for (int d = 0; defExists(d); ++d) + if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS)) + return true; + return false; +} + +static bool +insnCheckCommutationDefSrc(const Instruction *a, const Instruction *b) +{ + for (int d = 0; a->defExists(d); ++d) + for (int s = 0; b->srcExists(s); ++s) + if (a->getDef(d)->interfers(b->getSrc(s))) + return false; + return true; +} + +static bool +insnCheckCommutationDefDef(const Instruction *a, const Instruction *b) +{ + for (int d = 0; a->defExists(d); ++d) + for (int c = 0; b->defExists(c); ++c) + if (a->getDef(d)->interfers(b->getDef(c))) + return false; + return true; +} + +bool +Instruction::isCommutationLegal(const Instruction *i) const +{ + bool ret = insnCheckCommutationDefDef(this, i); + ret = ret && insnCheckCommutationDefSrc(this, i); + ret = ret && insnCheckCommutationDefSrc(i, this); + return ret; +} + +TexInstruction::TexInstruction(Function *fn, operation op) + : Instruction(fn, op, TYPE_F32) +{ + memset(&tex, 0, sizeof(tex)); + + tex.rIndirectSrc = -1; + tex.sIndirectSrc = -1; +} + +TexInstruction::~TexInstruction() +{ + for (int c = 0; c < 3; ++c) { + dPdx[c].set(NULL); + dPdy[c].set(NULL); + } +} + +TexInstruction * +TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const +{ + TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) : + new_TexInstruction(pol.context(), op)); + + Instruction::clone(pol, tex); + + tex->tex = this->tex; + + if (op == OP_TXD) { + for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) { + tex->dPdx[c].set(dPdx[c]); + tex->dPdy[c].set(dPdy[c]); + } + } + + return tex; +} + +const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] = +{ + { "1D", 1, 1, false, false, false }, + { "2D", 2, 2, false, false, false }, + { "2D_MS", 2, 3, false, false, false }, + { "3D", 3, 3, false, false, false }, + { "CUBE", 2, 3, false, true, false }, + { "1D_SHADOW", 1, 1, false, false, true }, + { "2D_SHADOW", 2, 2, false, false, true }, + { "CUBE_SHADOW", 2, 3, false, true, true }, + { "1D_ARRAY", 1, 2, true, false, false }, + { "2D_ARRAY", 2, 3, true, false, false }, + { "2D_MS_ARRAY", 2, 4, true, false, false }, + { "CUBE_ARRAY", 2, 4, true, true, false }, + { "1D_ARRAY_SHADOW", 1, 2, true, false, true }, + { "2D_ARRAY_SHADOW", 2, 3, true, false, true }, + { "RECT", 2, 2, false, false, false }, + { "RECT_SHADOW", 2, 2, false, false, true }, + { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true }, + { "BUFFER", 1, 1, false, false, false }, +}; + +void +TexInstruction::setIndirectR(Value *v) +{ + int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc; + if (p >= 0) { + tex.rIndirectSrc = p; + setSrc(p, v); + srcs[p].usedAsPtr = !!v; + } +} + +void +TexInstruction::setIndirectS(Value *v) +{ + int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc; + if (p >= 0) { + tex.sIndirectSrc = p; + setSrc(p, v); + srcs[p].usedAsPtr = !!v; + } +} + +CmpInstruction::CmpInstruction(Function *fn, operation op) + : Instruction(fn, op, TYPE_F32) +{ + setCond = CC_ALWAYS; +} + +CmpInstruction * +CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const +{ + CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) : + new_CmpInstruction(pol.context(), op)); + cmp->dType = dType; + Instruction::clone(pol, cmp); + cmp->setCond = setCond; + return cmp; +} + +FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ) + : Instruction(fn, op, TYPE_NONE) +{ + if (op == OP_CALL) + target.fn = reinterpret_cast<Function *>(targ); + else + target.bb = reinterpret_cast<BasicBlock *>(targ); + + if (op == OP_BRA || + op == OP_CONT || op == OP_BREAK || + op == OP_RET || op == OP_EXIT) + terminator = 1; + else + if (op == OP_JOIN) + terminator = targ ? 1 : 0; + + allWarp = absolute = limit = builtin = indirect = 0; +} + +FlowInstruction * +FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const +{ + FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) : + new_FlowInstruction(pol.context(), op, NULL)); + + Instruction::clone(pol, flow); + flow->allWarp = allWarp; + flow->absolute = absolute; + flow->limit = limit; + flow->builtin = builtin; + + if (builtin) + flow->target.builtin = target.builtin; + else + if (op == OP_CALL) + flow->target.fn = target.fn; + else + if (target.bb) + flow->target.bb = pol.get<BasicBlock>(target.bb); + + return flow; +} + +Program::Program(Type type, Target *arch) + : progType(type), + target(arch), + mem_Instruction(sizeof(Instruction), 6), + mem_CmpInstruction(sizeof(CmpInstruction), 4), + mem_TexInstruction(sizeof(TexInstruction), 4), + mem_FlowInstruction(sizeof(FlowInstruction), 4), + mem_LValue(sizeof(LValue), 8), + mem_Symbol(sizeof(Symbol), 7), + mem_ImmediateValue(sizeof(ImmediateValue), 7) +{ + code = NULL; + binSize = 0; + + maxGPR = -1; + + main = new Function(this, "MAIN", ~0); + calls.insert(&main->call); + + dbgFlags = 0; + optLevel = 0; + + targetPriv = NULL; +} + +Program::~Program() +{ + for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next()) + delete reinterpret_cast<Function *>(it.get()); + + for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next()) + releaseValue(reinterpret_cast<Value *>(it.get())); +} + +void Program::releaseInstruction(Instruction *insn) +{ + // TODO: make this not suck so much + + insn->~Instruction(); + + if (insn->asCmp()) + mem_CmpInstruction.release(insn); + else + if (insn->asTex()) + mem_TexInstruction.release(insn); + else + if (insn->asFlow()) + mem_FlowInstruction.release(insn); + else + mem_Instruction.release(insn); +} + +void Program::releaseValue(Value *value) +{ + value->~Value(); + + if (value->asLValue()) + mem_LValue.release(value); + else + if (value->asImm()) + mem_ImmediateValue.release(value); + else + if (value->asSym()) + mem_Symbol.release(value); +} + + +} // namespace nv50_ir + +extern "C" { + +static void +nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) +{ +#if defined(PIPE_SHADER_HULL) && defined(PIPE_SHADER_DOMAIN) + if (info->type == PIPE_SHADER_HULL || info->type == PIPE_SHADER_DOMAIN) { + info->prop.tp.domain = PIPE_PRIM_MAX; + info->prop.tp.outputPrim = PIPE_PRIM_MAX; + } +#endif + if (info->type == PIPE_SHADER_GEOMETRY) { + info->prop.gp.instanceCount = 1; + info->prop.gp.maxVertices = 1; + } + info->io.clipDistance = 0xff; + info->io.pointSize = 0xff; + info->io.instanceId = 0xff; + info->io.vertexId = 0xff; + info->io.edgeFlagIn = 0xff; + info->io.edgeFlagOut = 0xff; + info->io.fragDepth = 0xff; + info->io.sampleMask = 0xff; + info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff; +} + +int +nv50_ir_generate_code(struct nv50_ir_prog_info *info) +{ + int ret = 0; + + nv50_ir::Program::Type type; + + nv50_ir_init_prog_info(info); + +#define PROG_TYPE_CASE(a, b) \ + case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break + + switch (info->type) { + PROG_TYPE_CASE(VERTEX, VERTEX); +// PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL); +// PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL); + PROG_TYPE_CASE(GEOMETRY, GEOMETRY); + PROG_TYPE_CASE(FRAGMENT, FRAGMENT); + PROG_TYPE_CASE(COMPUTE, COMPUTE); + default: + type = nv50_ir::Program::TYPE_COMPUTE; + break; + } + INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type); + + nv50_ir::Target *targ = nv50_ir::Target::create(info->target); + if (!targ) + return -1; + + nv50_ir::Program *prog = new nv50_ir::Program(type, targ); + if (!prog) + return -1; + prog->driver = info; + prog->dbgFlags = info->dbgFlags; + prog->optLevel = info->optLevel; + + switch (info->bin.sourceRep) { +#if 0 + case PIPE_IR_LLVM: + case PIPE_IR_GLSL: + return -1; + case PIPE_IR_SM4: + ret = prog->makeFromSM4(info) ? 0 : -2; + break; + case PIPE_IR_TGSI: +#endif + default: + ret = prog->makeFromTGSI(info) ? 0 : -2; + break; + } + if (ret < 0) + goto out; + if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) + prog->print(); + + targ->parseDriverInfo(info); + prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); + + prog->convertToSSA(); + + if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) + prog->print(); + + prog->optimizeSSA(info->optLevel); + prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA); + + if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) + prog->print(); + + if (!prog->registerAllocation()) { + ret = -4; + goto out; + } + prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA); + + prog->optimizePostRA(info->optLevel); + + if (!prog->emitBinary(info)) { + ret = -5; + goto out; + } + +out: + INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret); + + info->bin.maxGPR = prog->maxGPR; + info->bin.code = prog->code; + info->bin.codeSize = prog->binSize; + info->bin.tlsSpace = prog->tlsSize; + + delete prog; + nv50_ir::Target::destroy(targ); + + return ret; +} + +} // extern "C" diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h new file mode 100644 index 00000000000..68c76e5a9cb --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -0,0 +1,1197 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NV50_IR_H__ +#define __NV50_IR_H__ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <deque> +#include <list> +#include <vector> + +#include "codegen/nv50_ir_util.h" +#include "codegen/nv50_ir_graph.h" + +#include "codegen/nv50_ir_driver.h" + +namespace nv50_ir { + +enum operation +{ + OP_NOP = 0, + OP_PHI, + OP_UNION, // unify a new definition and several source values + OP_SPLIT, // $r0d -> { $r0, $r1 } ($r0d and $r0/$r1 will be coalesced) + OP_MERGE, // opposite of split, e.g. combine 2 32 bit into a 64 bit value + OP_CONSTRAINT, // copy values into consecutive registers + OP_MOV, // simple copy, no modifiers allowed + OP_LOAD, + OP_STORE, + OP_ADD, // NOTE: add u64 + u32 is legal for targets w/o 64-bit integer adds + OP_SUB, + OP_MUL, + OP_DIV, + OP_MOD, + OP_MAD, + OP_FMA, + OP_SAD, // abs(src0 - src1) + src2 + OP_ABS, + OP_NEG, + OP_NOT, + OP_AND, + OP_OR, + OP_XOR, + OP_SHL, + OP_SHR, + OP_MAX, + OP_MIN, + OP_SAT, // CLAMP(f32, 0.0, 1.0) + OP_CEIL, + OP_FLOOR, + OP_TRUNC, + OP_CVT, + OP_SET_AND, // dst = (src0 CMP src1) & src2 + OP_SET_OR, + OP_SET_XOR, + OP_SET, + OP_SELP, // dst = src2 ? src0 : src1 + OP_SLCT, // dst = (src2 CMP 0) ? src0 : src1 + OP_RCP, + OP_RSQ, + OP_LG2, + OP_SIN, + OP_COS, + OP_EX2, + OP_EXP, // exponential (base M_E) + OP_LOG, // natural logarithm + OP_PRESIN, + OP_PREEX2, + OP_SQRT, + OP_POW, + OP_BRA, + OP_CALL, + OP_RET, + OP_CONT, + OP_BREAK, + OP_PRERET, + OP_PRECONT, + OP_PREBREAK, + OP_BRKPT, // breakpoint (not related to loops) + OP_JOINAT, // push control flow convergence point + OP_JOIN, // converge + OP_DISCARD, + OP_EXIT, + OP_MEMBAR, // memory barrier (mfence, lfence, sfence) + OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base + OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1] + OP_EXPORT, + OP_LINTERP, + OP_PINTERP, + OP_EMIT, // emit vertex + OP_RESTART, // restart primitive + OP_TEX, + OP_TXB, // texture bias + OP_TXL, // texure lod + OP_TXF, // texel fetch + OP_TXQ, // texture size query + OP_TXD, // texture derivatives + OP_TXG, // texture gather + OP_TEXCSAA, // texture op for coverage sampling + OP_TEXPREP, // turn cube map array into 2d array coordinates + OP_SULDB, // surface load (raw) + OP_SULDP, // surface load (formatted) + OP_SUSTB, // surface store (raw) + OP_SUSTP, // surface store (formatted) + OP_SUREDB, + OP_SUREDP, // surface reduction (atomic op) + OP_SULEA, // surface load effective address + OP_SUBFM, // surface bitfield manipulation + OP_SUCLAMP, // clamp surface coordinates + OP_SUEAU, // surface effective address + OP_MADSP, // special integer multiply-add + OP_TEXBAR, // texture dependency barrier + OP_DFDX, + OP_DFDY, + OP_RDSV, // read system value + OP_WRSV, // write system value + OP_QUADOP, + OP_QUADON, + OP_QUADPOP, + OP_POPCNT, // bitcount(src0 & src1) + OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7] + OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK + OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order) + OP_ATOM, + OP_BAR, // execution barrier, sources = { id, thread count, predicate } + OP_VADD, // byte/word vector operations + OP_VAVG, + OP_VMIN, + OP_VMAX, + OP_VSAD, + OP_VSET, + OP_VSHR, + OP_VSHL, + OP_VSEL, + OP_CCTL, // cache control + OP_LAST +}; + +// various instruction-specific modifier definitions Instruction::subOp +// MOV_FINAL marks a MOV originating from an EXPORT (used for placing TEXBARs) +#define NV50_IR_SUBOP_MUL_HIGH 1 +#define NV50_IR_SUBOP_EMIT_RESTART 1 +#define NV50_IR_SUBOP_LDC_IL 1 +#define NV50_IR_SUBOP_LDC_IS 2 +#define NV50_IR_SUBOP_LDC_ISL 3 +#define NV50_IR_SUBOP_SHIFT_WRAP 1 +#define NV50_IR_SUBOP_EMU_PRERET 1 +#define NV50_IR_SUBOP_TEXBAR(n) n +#define NV50_IR_SUBOP_MOV_FINAL 1 +#define NV50_IR_SUBOP_EXTBF_REV 1 +#define NV50_IR_SUBOP_PERMT_F4E 1 +#define NV50_IR_SUBOP_PERMT_B4E 2 +#define NV50_IR_SUBOP_PERMT_RC8 3 +#define NV50_IR_SUBOP_PERMT_ECL 4 +#define NV50_IR_SUBOP_PERMT_ECR 5 +#define NV50_IR_SUBOP_PERMT_RC16 6 +#define NV50_IR_SUBOP_BAR_SYNC 0 +#define NV50_IR_SUBOP_BAR_ARRIVE 1 +#define NV50_IR_SUBOP_BAR_RED_AND 2 +#define NV50_IR_SUBOP_BAR_RED_OR 3 +#define NV50_IR_SUBOP_BAR_RED_POPC 4 +#define NV50_IR_SUBOP_MEMBAR_L 1 +#define NV50_IR_SUBOP_MEMBAR_S 2 +#define NV50_IR_SUBOP_MEMBAR_M 3 +#define NV50_IR_SUBOP_MEMBAR_CTA (0 << 2) +#define NV50_IR_SUBOP_MEMBAR_GL (1 << 2) +#define NV50_IR_SUBOP_MEMBAR_SYS (2 << 2) +#define NV50_IR_SUBOP_MEMBAR_DIR(m) ((m) & 0x3) +#define NV50_IR_SUBOP_MEMBAR_SCOPE(m) ((m) & ~0x3) +#define NV50_IR_SUBOP_MEMBAR(d,s) \ + (NV50_IR_SUBOP_MEMBAR_##d | NV50_IR_SUBOP_MEMBAR_##s) +#define NV50_IR_SUBOP_ATOM_ADD 0 +#define NV50_IR_SUBOP_ATOM_MIN 1 +#define NV50_IR_SUBOP_ATOM_MAX 2 +#define NV50_IR_SUBOP_ATOM_INC 3 +#define NV50_IR_SUBOP_ATOM_DEC 4 +#define NV50_IR_SUBOP_ATOM_AND 5 +#define NV50_IR_SUBOP_ATOM_OR 6 +#define NV50_IR_SUBOP_ATOM_XOR 7 +#define NV50_IR_SUBOP_ATOM_CAS 8 +#define NV50_IR_SUBOP_ATOM_EXCH 9 +#define NV50_IR_SUBOP_CCTL_IV 5 +#define NV50_IR_SUBOP_CCTL_IVALL 6 +#define NV50_IR_SUBOP_SUST_IGN 0 +#define NV50_IR_SUBOP_SUST_TRAP 1 +#define NV50_IR_SUBOP_SUST_SDCL 3 +#define NV50_IR_SUBOP_SULD_ZERO 0 +#define NV50_IR_SUBOP_SULD_TRAP 1 +#define NV50_IR_SUBOP_SULD_SDCL 3 +#define NV50_IR_SUBOP_SUBFM_3D 1 +#define NV50_IR_SUBOP_SUCLAMP_2D 0x10 +#define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0)) +#define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0)) +#define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0)) +#define NV50_IR_SUBOP_MADSP_SD 0xffff +// Yes, we could represent those with DataType. +// Or put the type into operation and have a couple 1000 values in that enum. +// This will have to do for now. +// The bitfields are supposed to correspond to nve4 ISA. +#define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a)) +#define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000) +#define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000) +#define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000) +#define NV50_IR_SUBOP_Vn(n) ((n) >> 14) + +enum DataType +{ + TYPE_NONE, + TYPE_U8, + TYPE_S8, + TYPE_U16, + TYPE_S16, + TYPE_U32, + TYPE_S32, + TYPE_U64, // 64 bit operations are only lowered after register allocation + TYPE_S64, + TYPE_F16, + TYPE_F32, + TYPE_F64, + TYPE_B96, + TYPE_B128 +}; + +enum CondCode +{ + CC_FL = 0, + CC_NEVER = CC_FL, // when used with FILE_FLAGS + CC_LT = 1, + CC_EQ = 2, + CC_NOT_P = CC_EQ, // when used with FILE_PREDICATE + CC_LE = 3, + CC_GT = 4, + CC_NE = 5, + CC_P = CC_NE, + CC_GE = 6, + CC_TR = 7, + CC_ALWAYS = CC_TR, + CC_U = 8, + CC_LTU = 9, + CC_EQU = 10, + CC_LEU = 11, + CC_GTU = 12, + CC_NEU = 13, + CC_GEU = 14, + CC_NO = 0x10, + CC_NC = 0x11, + CC_NS = 0x12, + CC_NA = 0x13, + CC_A = 0x14, + CC_S = 0x15, + CC_C = 0x16, + CC_O = 0x17 +}; + +enum RoundMode +{ + ROUND_N, // nearest + ROUND_M, // towards -inf + ROUND_Z, // towards 0 + ROUND_P, // towards +inf + ROUND_NI, // nearest integer + ROUND_MI, // to integer towards -inf + ROUND_ZI, // to integer towards 0 + ROUND_PI, // to integer towards +inf +}; + +enum CacheMode +{ + CACHE_CA, // cache at all levels + CACHE_WB = CACHE_CA, // cache write back + CACHE_CG, // cache at global level + CACHE_CS, // cache streaming + CACHE_CV, // cache as volatile + CACHE_WT = CACHE_CV // cache write-through +}; + +enum DataFile +{ + FILE_NULL = 0, + FILE_GPR, + FILE_PREDICATE, // boolean predicate + FILE_FLAGS, // zero/sign/carry/overflow bits + FILE_ADDRESS, + LAST_REGISTER_FILE = FILE_ADDRESS, + FILE_IMMEDIATE, + FILE_MEMORY_CONST, + FILE_SHADER_INPUT, + FILE_SHADER_OUTPUT, + FILE_MEMORY_GLOBAL, + FILE_MEMORY_SHARED, + FILE_MEMORY_LOCAL, + FILE_SYSTEM_VALUE, + DATA_FILE_COUNT +}; + +enum TexTarget +{ + TEX_TARGET_1D, + TEX_TARGET_2D, + TEX_TARGET_2D_MS, + TEX_TARGET_3D, + TEX_TARGET_CUBE, + TEX_TARGET_1D_SHADOW, + TEX_TARGET_2D_SHADOW, + TEX_TARGET_CUBE_SHADOW, + TEX_TARGET_1D_ARRAY, + TEX_TARGET_2D_ARRAY, + TEX_TARGET_2D_MS_ARRAY, + TEX_TARGET_CUBE_ARRAY, + TEX_TARGET_1D_ARRAY_SHADOW, + TEX_TARGET_2D_ARRAY_SHADOW, + TEX_TARGET_RECT, + TEX_TARGET_RECT_SHADOW, + TEX_TARGET_CUBE_ARRAY_SHADOW, + TEX_TARGET_BUFFER, + TEX_TARGET_COUNT +}; + +enum SVSemantic +{ + SV_POSITION, // WPOS + SV_VERTEX_ID, + SV_INSTANCE_ID, + SV_INVOCATION_ID, + SV_PRIMITIVE_ID, + SV_VERTEX_COUNT, // gl_PatchVerticesIn + SV_LAYER, + SV_VIEWPORT_INDEX, + SV_YDIR, + SV_FACE, + SV_POINT_SIZE, + SV_POINT_COORD, + SV_CLIP_DISTANCE, + SV_SAMPLE_INDEX, + SV_TESS_FACTOR, + SV_TESS_COORD, + SV_TID, + SV_CTAID, + SV_NTID, + SV_GRIDID, + SV_NCTAID, + SV_LANEID, + SV_PHYSID, + SV_NPHYSID, + SV_CLOCK, + SV_LBASE, + SV_SBASE, + SV_UNDEFINED, + SV_LAST +}; + +class Program; +class Function; +class BasicBlock; + +class Target; + +class Instruction; +class CmpInstruction; +class TexInstruction; +class FlowInstruction; + +class Value; +class LValue; +class Symbol; +class ImmediateValue; + +struct Storage +{ + DataFile file; + int8_t fileIndex; // signed, may be indirect for CONST[] + uint8_t size; // this should match the Instruction type's size + DataType type; // mainly for pretty printing + union { + uint64_t u64; // immediate values + uint32_t u32; + uint16_t u16; + uint8_t u8; + int64_t s64; + int32_t s32; + int16_t s16; + int8_t s8; + float f32; + double f64; + int32_t offset; // offset from 0 (base of address space) + int32_t id; // register id (< 0 if virtual/unassigned, in units <= 4) + struct { + SVSemantic sv; + int index; + } sv; + } data; +}; + +// precedence: NOT after SAT after NEG after ABS +#define NV50_IR_MOD_ABS (1 << 0) +#define NV50_IR_MOD_NEG (1 << 1) +#define NV50_IR_MOD_SAT (1 << 2) +#define NV50_IR_MOD_NOT (1 << 3) +#define NV50_IR_MOD_NEG_ABS (NV50_IR_MOD_NEG | NV50_IR_MOD_ABS) + +#define NV50_IR_INTERP_MODE_MASK 0x3 +#define NV50_IR_INTERP_LINEAR (0 << 0) +#define NV50_IR_INTERP_PERSPECTIVE (1 << 0) +#define NV50_IR_INTERP_FLAT (2 << 0) +#define NV50_IR_INTERP_SC (3 << 0) // what exactly is that ? +#define NV50_IR_INTERP_SAMPLE_MASK 0xc +#define NV50_IR_INTERP_DEFAULT (0 << 2) +#define NV50_IR_INTERP_CENTROID (1 << 2) +#define NV50_IR_INTERP_OFFSET (2 << 2) +#define NV50_IR_INTERP_SAMPLEID (3 << 2) + +// do we really want this to be a class ? +class Modifier +{ +public: + Modifier() : bits(0) { } + Modifier(unsigned int m) : bits(m) { } + Modifier(operation op); + + // @return new Modifier applying a after b (asserts if unrepresentable) + Modifier operator*(const Modifier) const; + Modifier operator*=(const Modifier m) { *this = *this * m; return *this; } + Modifier operator==(const Modifier m) const { return m.bits == bits; } + Modifier operator!=(const Modifier m) const { return m.bits != bits; } + + inline Modifier operator&(const Modifier m) const { return bits & m.bits; } + inline Modifier operator|(const Modifier m) const { return bits | m.bits; } + inline Modifier operator^(const Modifier m) const { return bits ^ m.bits; } + + operation getOp() const; + + inline int neg() const { return (bits & NV50_IR_MOD_NEG) ? 1 : 0; } + inline int abs() const { return (bits & NV50_IR_MOD_ABS) ? 1 : 0; } + + inline operator bool() const { return bits ? true : false; } + + void applyTo(ImmediateValue &imm) const; + + int print(char *buf, size_t size) const; + +private: + uint8_t bits; +}; + +class ValueRef +{ +public: + ValueRef(Value * = NULL); + ValueRef(const ValueRef&); + ~ValueRef(); + + inline bool exists() const { return value != NULL; } + + void set(Value *); + void set(const ValueRef&); + inline Value *get() const { return value; } + inline Value *rep() const; + + inline Instruction *getInsn() const { return insn; } + inline void setInsn(Instruction *inst) { insn = inst; } + + inline bool isIndirect(int dim) const { return indirect[dim] >= 0; } + inline const ValueRef *getIndirect(int dim) const; + + inline DataFile getFile() const; + inline unsigned getSize() const; + + // SSA: return eventual (traverse MOVs) literal value, if it exists + bool getImmediate(ImmediateValue&) const; + +public: + Modifier mod; + int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src(indirect[i]) + uint8_t swizzle; + + bool usedAsPtr; // for printing + +private: + Value *value; + Instruction *insn; +}; + +class ValueDef +{ +public: + ValueDef(Value * = NULL); + ValueDef(const ValueDef&); + ~ValueDef(); + + inline bool exists() const { return value != NULL; } + + inline Value *get() const { return value; } + inline Value *rep() const; + void set(Value *); + bool mayReplace(const ValueRef &); + void replace(const ValueRef &, bool doSet); // replace all uses of the old value + + inline Instruction *getInsn() const { return insn; } + inline void setInsn(Instruction *inst) { insn = inst; } + + inline DataFile getFile() const; + inline unsigned getSize() const; + + inline void setSSA(LValue *); + inline const LValue *preSSA() const; + +private: + Value *value; // should make this LValue * ... + LValue *origin; // pre SSA value + Instruction *insn; +}; + +class Value +{ +public: + Value(); + virtual ~Value() { } + + virtual Value *clone(ClonePolicy<Function>&) const = 0; + + virtual int print(char *, size_t, DataType ty = TYPE_NONE) const = 0; + + virtual bool equals(const Value *, bool strict = false) const; + virtual bool interfers(const Value *) const; + virtual bool isUniform() const { return true; } + + inline Value *rep() const { return join; } + + inline Instruction *getUniqueInsn() const; + inline Instruction *getInsn() const; // use when uniqueness is certain + + inline int refCount() { return uses.size(); } + + inline LValue *asLValue(); + inline Symbol *asSym(); + inline ImmediateValue *asImm(); + inline const Symbol *asSym() const; + inline const ImmediateValue *asImm() const; + + inline bool inFile(DataFile f) { return reg.file == f; } + + static inline Value *get(Iterator&); + + std::list<ValueRef *> uses; + std::list<ValueDef *> defs; + typedef std::list<ValueRef *>::iterator UseIterator; + typedef std::list<ValueRef *>::const_iterator UseCIterator; + typedef std::list<ValueDef *>::iterator DefIterator; + typedef std::list<ValueDef *>::const_iterator DefCIterator; + + int id; + Storage reg; + + // TODO: these should be in LValue: + Interval livei; + Value *join; +}; + +class LValue : public Value +{ +public: + LValue(Function *, DataFile file); + LValue(Function *, LValue *); + ~LValue() { } + + virtual bool isUniform() const; + + virtual LValue *clone(ClonePolicy<Function>&) const; + + virtual int print(char *, size_t, DataType ty = TYPE_NONE) const; + +public: + unsigned compMask : 8; // compound/component mask + unsigned compound : 1; // used by RA, value involved in split/merge + unsigned ssa : 1; + unsigned fixedReg : 1; // set & used by RA, earlier just use (id < 0) + unsigned noSpill : 1; // do not spill (e.g. if spill temporary already) +}; + +class Symbol : public Value +{ +public: + Symbol(Program *, DataFile file = FILE_MEMORY_CONST, ubyte fileIdx = 0); + ~Symbol() { } + + virtual Symbol *clone(ClonePolicy<Function>&) const; + + virtual bool equals(const Value *that, bool strict) const; + + virtual bool isUniform() const; + + virtual int print(char *, size_t, DataType ty = TYPE_NONE) const; + + // print with indirect values + int print(char *, size_t, Value *, Value *, DataType ty = TYPE_NONE) const; + + inline void setFile(DataFile file, ubyte fileIndex = 0) + { + reg.file = file; + reg.fileIndex = fileIndex; + } + + inline void setOffset(int32_t offset); + inline void setAddress(Symbol *base, int32_t offset); + inline void setSV(SVSemantic sv, uint32_t idx = 0); + + inline const Symbol *getBase() const { return baseSym; } + +private: + Symbol *baseSym; // array base for Symbols representing array elements +}; + +class ImmediateValue : public Value +{ +public: + ImmediateValue() { } + ImmediateValue(Program *, uint32_t); + ImmediateValue(Program *, float); + ImmediateValue(Program *, double); + // NOTE: not added to program with + ImmediateValue(const ImmediateValue *, DataType ty); + ~ImmediateValue() { }; + + virtual ImmediateValue *clone(ClonePolicy<Function>&) const; + + virtual bool equals(const Value *that, bool strict) const; + + // these only work if 'type' is valid (we mostly use untyped literals): + bool isInteger(const int ival) const; // ival is cast to this' type + bool isNegative() const; + bool isPow2() const; + + void applyLog2(); + + // for constant folding: + ImmediateValue operator+(const ImmediateValue&) const; + ImmediateValue operator-(const ImmediateValue&) const; + ImmediateValue operator*(const ImmediateValue&) const; + ImmediateValue operator/(const ImmediateValue&) const; + + ImmediateValue& operator=(const ImmediateValue&); // only sets value ! + + bool compare(CondCode cc, float fval) const; + + virtual int print(char *, size_t, DataType ty = TYPE_NONE) const; +}; + +class Instruction +{ +public: + Instruction(); + Instruction(Function *, operation, DataType); + virtual ~Instruction(); + + virtual Instruction *clone(ClonePolicy<Function>&, + Instruction * = NULL) const; + + void setDef(int i, Value *); + void setSrc(int s, Value *); + void setSrc(int s, const ValueRef&); + void swapSources(int a, int b); + void moveSources(int s, int delta); + bool setIndirect(int s, int dim, Value *); + + inline ValueRef& src(int s) { return srcs[s]; } + inline ValueDef& def(int s) { return defs[s]; } + inline const ValueRef& src(int s) const { return srcs[s]; } + inline const ValueDef& def(int s) const { return defs[s]; } + + inline Value *getDef(int d) const { return defs[d].get(); } + inline Value *getSrc(int s) const { return srcs[s].get(); } + inline Value *getIndirect(int s, int dim) const; + + inline bool defExists(unsigned d) const + { + return d < defs.size() && defs[d].exists(); + } + inline bool srcExists(unsigned s) const + { + return s < srcs.size() && srcs[s].exists(); + } + + inline bool constrainedDefs() const; + + bool setPredicate(CondCode ccode, Value *); + inline Value *getPredicate() const; + bool writesPredicate() const; + inline bool isPredicated() const { return predSrc >= 0; } + + inline void setFlagsSrc(int s, Value *); + inline void setFlagsDef(int d, Value *); + inline bool usesFlags() const { return flagsSrc >= 0; } + + unsigned int defCount() const { return defs.size(); }; + unsigned int defCount(unsigned int mask, bool singleFile = false) const; + unsigned int srcCount() const { return srcs.size(); }; + unsigned int srcCount(unsigned int mask, bool singleFile = false) const; + + // save & remove / set indirect[0,1] and predicate source + void takeExtraSources(int s, Value *[3]); + void putExtraSources(int s, Value *[3]); + + inline void setType(DataType type) { dType = sType = type; } + + inline void setType(DataType dtype, DataType stype) + { + dType = dtype; + sType = stype; + } + + inline bool isPseudo() const { return op < OP_MOV; } + bool isDead() const; + bool isNop() const; + bool isCommutationLegal(const Instruction *) const; // must be adjacent ! + bool isActionEqual(const Instruction *) const; + bool isResultEqual(const Instruction *) const; + + void print() const; + + inline CmpInstruction *asCmp(); + inline TexInstruction *asTex(); + inline FlowInstruction *asFlow(); + inline const TexInstruction *asTex() const; + inline const CmpInstruction *asCmp() const; + inline const FlowInstruction *asFlow() const; + +public: + Instruction *next; + Instruction *prev; + int id; + int serial; // CFG order + + operation op; + DataType dType; // destination or defining type + DataType sType; // source or secondary type + CondCode cc; + RoundMode rnd; + CacheMode cache; + + uint16_t subOp; // quadop, 1 for mul-high, etc. + + unsigned encSize : 4; // encoding size in bytes + unsigned saturate : 1; // to [0.0f, 1.0f] + unsigned join : 1; // converge control flow (use OP_JOIN until end) + unsigned fixed : 1; // prevent dead code elimination + unsigned terminator : 1; // end of basic block + unsigned ftz : 1; // flush denormal to zero + unsigned dnz : 1; // denormals, NaN are zero + unsigned ipa : 4; // interpolation mode + unsigned lanes : 4; + unsigned perPatch : 1; + unsigned exit : 1; // terminate program after insn + unsigned mask : 4; // for vector ops + + int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor + + int8_t predSrc; + int8_t flagsDef; + int8_t flagsSrc; + + uint8_t sched; // scheduling data (NOTE: maybe move to separate storage) + + BasicBlock *bb; + +protected: + std::deque<ValueDef> defs; // no gaps ! + std::deque<ValueRef> srcs; // no gaps ! + + // instruction specific methods: + // (don't want to subclass, would need more constructors and memory pools) +public: + inline void setInterpolate(unsigned int mode) { ipa = mode; } + + unsigned int getInterpMode() const { return ipa & 0x3; } + unsigned int getSampleMode() const { return ipa & 0xc; } + +private: + void init(); +}; + +enum TexQuery +{ + TXQ_DIMS, + TXQ_TYPE, + TXQ_SAMPLE_POSITION, + TXQ_FILTER, + TXQ_LOD, + TXQ_WRAP, + TXQ_BORDER_COLOUR +}; + +class TexInstruction : public Instruction +{ +public: + class Target + { + public: + Target(TexTarget targ = TEX_TARGET_2D) : target(targ) { } + + const char *getName() const { return descTable[target].name; } + unsigned int getArgCount() const { return descTable[target].argc; } + unsigned int getDim() const { return descTable[target].dim; } + int isArray() const { return descTable[target].array ? 1 : 0; } + int isCube() const { return descTable[target].cube ? 1 : 0; } + int isShadow() const { return descTable[target].shadow ? 1 : 0; } + int isMS() const { + return target == TEX_TARGET_2D_MS || target == TEX_TARGET_2D_MS_ARRAY; } + + Target& operator=(TexTarget targ) + { + assert(targ < TEX_TARGET_COUNT); + target = targ; + return *this; + } + + inline bool operator==(TexTarget targ) const { return target == targ; } + inline bool operator!=(TexTarget targ) const { return target != targ; } + + enum TexTarget getEnum() const { return target; } + + private: + struct Desc + { + char name[19]; + uint8_t dim; + uint8_t argc; + bool array; + bool cube; + bool shadow; + }; + + static const struct Desc descTable[TEX_TARGET_COUNT]; + + private: + enum TexTarget target; + }; + +public: + TexInstruction(Function *, operation); + virtual ~TexInstruction(); + + virtual TexInstruction *clone(ClonePolicy<Function>&, + Instruction * = NULL) const; + + inline void setTexture(Target targ, uint8_t r, uint8_t s) + { + tex.r = r; + tex.s = s; + tex.target = targ; + } + + void setIndirectR(Value *); + void setIndirectS(Value *); + inline Value *getIndirectR() const; + inline Value *getIndirectS() const; + +public: + struct { + Target target; + + uint16_t r; + uint16_t s; + int8_t rIndirectSrc; + int8_t sIndirectSrc; + + uint8_t mask; + uint8_t gatherComp; + + bool liveOnly; // only execute on live pixels of a quad (optimization) + bool levelZero; + bool derivAll; + + int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets + int8_t offset[4][3]; + + enum TexQuery query; + } tex; + + ValueRef dPdx[3]; + ValueRef dPdy[3]; +}; + +class CmpInstruction : public Instruction +{ +public: + CmpInstruction(Function *, operation); + + virtual CmpInstruction *clone(ClonePolicy<Function>&, + Instruction * = NULL) const; + + void setCondition(CondCode cond) { setCond = cond; } + CondCode getCondition() const { return setCond; } + +public: + CondCode setCond; +}; + +class FlowInstruction : public Instruction +{ +public: + FlowInstruction(Function *, operation, void *target); + + virtual FlowInstruction *clone(ClonePolicy<Function>&, + Instruction * = NULL) const; + +public: + unsigned allWarp : 1; + unsigned absolute : 1; + unsigned limit : 1; + unsigned builtin : 1; // true for calls to emulation code + unsigned indirect : 1; // target in src(0) + + union { + BasicBlock *bb; + int builtin; + Function *fn; + } target; +}; + +class BasicBlock +{ +public: + BasicBlock(Function *); + ~BasicBlock(); + + BasicBlock *clone(ClonePolicy<Function>&) const; + + inline int getId() const { return id; } + inline unsigned int getInsnCount() const { return numInsns; } + inline bool isTerminated() const { return exit && exit->terminator; } + + bool dominatedBy(BasicBlock *bb); + inline bool reachableBy(const BasicBlock *by, const BasicBlock *term); + + // returns mask of conditional out blocks + // e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF + unsigned int initiatesSimpleConditional() const; + +public: + Function *getFunction() const { return func; } + Program *getProgram() const { return program; } + + Instruction *getEntry() const { return entry; } // first non-phi instruction + Instruction *getPhi() const { return phi; } + Instruction *getFirst() const { return phi ? phi : entry; } + Instruction *getExit() const { return exit; } + + void insertHead(Instruction *); + void insertTail(Instruction *); + void insertBefore(Instruction *, Instruction *); + void insertAfter(Instruction *, Instruction *); + void remove(Instruction *); + void permuteAdjacent(Instruction *, Instruction *); + + BasicBlock *idom() const; + + // NOTE: currently does not rebuild the dominator tree + BasicBlock *splitBefore(Instruction *, bool attach = true); + BasicBlock *splitAfter(Instruction *, bool attach = true); + + DLList& getDF() { return df; } + DLList::Iterator iterDF() { return df.iterator(); } + + static inline BasicBlock *get(Iterator&); + static inline BasicBlock *get(Graph::Node *); + +public: + Graph::Node cfg; // first edge is branch *taken* (the ELSE branch) + Graph::Node dom; + + BitSet liveSet; + BitSet defSet; + + uint32_t binPos; + uint32_t binSize; + + Instruction *joinAt; // for quick reference + + bool explicitCont; // loop headers: true if loop contains continue stmts + +private: + int id; + DLList df; + + Instruction *phi; + Instruction *entry; + Instruction *exit; + + unsigned int numInsns; + +private: + Function *func; + Program *program; + + void splitCommon(Instruction *, BasicBlock *, bool attach); +}; + +class Function +{ +public: + Function(Program *, const char *name, uint32_t label); + ~Function(); + + static inline Function *get(Graph::Node *node); + + inline Program *getProgram() const { return prog; } + inline const char *getName() const { return name; } + inline int getId() const { return id; } + inline uint32_t getLabel() const { return label; } + + void print(); + void printLiveIntervals() const; + void printCFGraph(const char *filePath); + + bool setEntry(BasicBlock *); + bool setExit(BasicBlock *); + + unsigned int orderInstructions(ArrayList&); + + inline void add(BasicBlock *bb, int& id) { allBBlocks.insert(bb, id); } + inline void add(Instruction *insn, int& id) { allInsns.insert(insn, id); } + inline void add(LValue *lval, int& id) { allLValues.insert(lval, id); } + + inline LValue *getLValue(int id); + + void buildLiveSets(); + void buildDefSets(); + bool convertToSSA(); + +public: + std::deque<ValueDef> ins; + std::deque<ValueRef> outs; + std::deque<Value *> clobbers; + + Graph cfg; + Graph::Node *cfgExit; + Graph *domTree; + Graph::Node call; // node in the call graph + + BasicBlock **bbArray; // BBs in emission order + int bbCount; + + unsigned int loopNestingBound; + int regClobberMax; + + uint32_t binPos; + uint32_t binSize; + + Value *stackPtr; + + uint32_t tlsBase; // base address for l[] space (if no stack pointer is used) + uint32_t tlsSize; + + ArrayList allBBlocks; + ArrayList allInsns; + ArrayList allLValues; + +private: + void buildLiveSetsPreSSA(BasicBlock *, const int sequence); + void buildDefSetsPreSSA(BasicBlock *bb, const int seq); + +private: + uint32_t label; + int id; + const char *const name; + Program *prog; +}; + +enum CGStage +{ + CG_STAGE_PRE_SSA, + CG_STAGE_SSA, // expected directly before register allocation + CG_STAGE_POST_RA +}; + +class Program +{ +public: + enum Type + { + TYPE_VERTEX, + TYPE_TESSELLATION_CONTROL, + TYPE_TESSELLATION_EVAL, + TYPE_GEOMETRY, + TYPE_FRAGMENT, + TYPE_COMPUTE + }; + + Program(Type type, Target *targ); + ~Program(); + + void print(); + + Type getType() const { return progType; } + + inline void add(Function *fn, int& id) { allFuncs.insert(fn, id); } + inline void del(Function *fn, int& id) { allFuncs.remove(id); } + inline void add(Value *rval, int& id) { allRValues.insert(rval, id); } + + bool makeFromTGSI(struct nv50_ir_prog_info *); + bool makeFromSM4(struct nv50_ir_prog_info *); + bool convertToSSA(); + bool optimizeSSA(int level); + bool optimizePostRA(int level); + bool registerAllocation(); + bool emitBinary(struct nv50_ir_prog_info *); + + const Target *getTarget() const { return target; } + +private: + void emitSymbolTable(struct nv50_ir_prog_info *); + + Type progType; + Target *target; + +public: + Function *main; + Graph calls; + + ArrayList allFuncs; + ArrayList allRValues; + + uint32_t *code; + uint32_t binSize; + uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL + + int maxGPR; + + MemoryPool mem_Instruction; + MemoryPool mem_CmpInstruction; + MemoryPool mem_TexInstruction; + MemoryPool mem_FlowInstruction; + MemoryPool mem_LValue; + MemoryPool mem_Symbol; + MemoryPool mem_ImmediateValue; + + uint32_t dbgFlags; + uint8_t optLevel; + + void *targetPriv; // e.g. to carry information between passes + + const struct nv50_ir_prog_info *driver; // for driver configuration + + void releaseInstruction(Instruction *); + void releaseValue(Value *); +}; + +// TODO: add const version +class Pass +{ +public: + bool run(Program *, bool ordered = false, bool skipPhi = false); + bool run(Function *, bool ordered = false, bool skipPhi = false); + +private: + // return false to continue with next entity on next higher level + virtual bool visit(Function *) { return true; } + virtual bool visit(BasicBlock *) { return true; } + virtual bool visit(Instruction *) { return false; } + + bool doRun(Program *, bool ordered, bool skipPhi); + bool doRun(Function *, bool ordered, bool skipPhi); + +protected: + bool err; + Function *func; + Program *prog; +}; + +// ============================================================================= + +#include "codegen/nv50_ir_inlines.h" + +} // namespace nv50_ir + +#endif // __NV50_IR_H__ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp new file mode 100644 index 00000000000..51b9225156b --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp @@ -0,0 +1,550 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir.h" + +namespace nv50_ir { + +Function::Function(Program *p, const char *fnName, uint32_t label) + : call(this), + label(label), + name(fnName), + prog(p) +{ + cfgExit = NULL; + domTree = NULL; + + bbArray = NULL; + bbCount = 0; + loopNestingBound = 0; + regClobberMax = 0; + + binPos = 0; + binSize = 0; + + stackPtr = NULL; + tlsBase = 0; + tlsSize = 0; + + prog->add(this, id); +} + +Function::~Function() +{ + prog->del(this, id); + + if (domTree) + delete domTree; + if (bbArray) + delete[] bbArray; + + // clear value refs and defs + ins.clear(); + outs.clear(); + + for (ArrayList::Iterator it = allInsns.iterator(); !it.end(); it.next()) + delete_Instruction(prog, reinterpret_cast<Instruction *>(it.get())); + + for (ArrayList::Iterator it = allLValues.iterator(); !it.end(); it.next()) + delete_Value(prog, reinterpret_cast<LValue *>(it.get())); + + for (ArrayList::Iterator BBs = allBBlocks.iterator(); !BBs.end(); BBs.next()) + delete reinterpret_cast<BasicBlock *>(BBs.get()); +} + +BasicBlock::BasicBlock(Function *fn) : cfg(this), dom(this), func(fn) +{ + program = func->getProgram(); + + joinAt = phi = entry = exit = NULL; + + numInsns = 0; + binPos = 0; + binSize = 0; + + explicitCont = false; + + func->add(this, this->id); +} + +BasicBlock::~BasicBlock() +{ + // nothing yet +} + +BasicBlock * +BasicBlock::clone(ClonePolicy<Function>& pol) const +{ + BasicBlock *bb = new BasicBlock(pol.context()); + + pol.set(this, bb); + + for (Instruction *i = getFirst(); i; i = i->next) + bb->insertTail(i->clone(pol)); + + pol.context()->cfg.insert(&bb->cfg); + + for (Graph::EdgeIterator it = cfg.outgoing(); !it.end(); it.next()) { + BasicBlock *obb = BasicBlock::get(it.getNode()); + bb->cfg.attach(&pol.get(obb)->cfg, it.getType()); + } + + return bb; +} + +BasicBlock * +BasicBlock::idom() const +{ + Graph::Node *dn = dom.parent(); + return dn ? BasicBlock::get(dn) : NULL; +} + +void +BasicBlock::insertHead(Instruction *inst) +{ + assert(inst->next == 0 && inst->prev == 0); + + if (inst->op == OP_PHI) { + if (phi) { + insertBefore(phi, inst); + } else { + if (entry) { + insertBefore(entry, inst); + } else { + assert(!exit); + phi = exit = inst; + inst->bb = this; + ++numInsns; + } + } + } else { + if (entry) { + insertBefore(entry, inst); + } else { + if (phi) { + insertAfter(exit, inst); // after last phi + } else { + assert(!exit); + entry = exit = inst; + inst->bb = this; + ++numInsns; + } + } + } +} + +void +BasicBlock::insertTail(Instruction *inst) +{ + assert(inst->next == 0 && inst->prev == 0); + + if (inst->op == OP_PHI) { + if (entry) { + insertBefore(entry, inst); + } else + if (exit) { + assert(phi); + insertAfter(exit, inst); + } else { + assert(!phi); + phi = exit = inst; + inst->bb = this; + ++numInsns; + } + } else { + if (exit) { + insertAfter(exit, inst); + } else { + assert(!phi); + entry = exit = inst; + inst->bb = this; + ++numInsns; + } + } +} + +void +BasicBlock::insertBefore(Instruction *q, Instruction *p) +{ + assert(p && q); + + assert(p->next == 0 && p->prev == 0); + + if (q == entry) { + if (p->op == OP_PHI) { + if (!phi) + phi = p; + } else { + entry = p; + } + } else + if (q == phi) { + assert(p->op == OP_PHI); + phi = p; + } + + p->next = q; + p->prev = q->prev; + if (p->prev) + p->prev->next = p; + q->prev = p; + + p->bb = this; + ++numInsns; +} + +void +BasicBlock::insertAfter(Instruction *p, Instruction *q) +{ + assert(p && q); + assert(q->op != OP_PHI || p->op == OP_PHI); + + assert(q->next == 0 && q->prev == 0); + + if (p == exit) + exit = q; + if (p->op == OP_PHI && q->op != OP_PHI) + entry = q; + + q->prev = p; + q->next = p->next; + if (q->next) + q->next->prev = q; + p->next = q; + + q->bb = this; + ++numInsns; +} + +void +BasicBlock::remove(Instruction *insn) +{ + assert(insn->bb == this); + + if (insn->prev) + insn->prev->next = insn->next; + + if (insn->next) + insn->next->prev = insn->prev; + else + exit = insn->prev; + + if (insn == entry) { + if (insn->next) + entry = insn->next; + else + if (insn->prev && insn->prev->op != OP_PHI) + entry = insn->prev; + else + entry = NULL; + } + + if (insn == phi) + phi = (insn->next && insn->next->op == OP_PHI) ? insn->next : 0; + + --numInsns; + insn->bb = NULL; + insn->next = + insn->prev = NULL; +} + +void BasicBlock::permuteAdjacent(Instruction *a, Instruction *b) +{ + assert(a->bb == b->bb); + + if (a->next != b) { + Instruction *i = a; + a = b; + b = i; + } + assert(a->next == b); + assert(a->op != OP_PHI && b->op != OP_PHI); + + if (b == exit) + exit = a; + if (a == entry) + entry = b; + + b->prev = a->prev; + a->next = b->next; + b->next = a; + a->prev = b; + + if (b->prev) + b->prev->next = b; + if (a->prev) + a->next->prev = a; +} + +void +BasicBlock::splitCommon(Instruction *insn, BasicBlock *bb, bool attach) +{ + bb->entry = insn; + + if (insn) { + exit = insn->prev; + insn->prev = NULL; + } + + if (exit) + exit->next = NULL; + else + entry = NULL; + + while (!cfg.outgoing(true).end()) { + Graph::Edge *e = cfg.outgoing(true).getEdge(); + bb->cfg.attach(e->getTarget(), e->getType()); + this->cfg.detach(e->getTarget()); + } + + for (; insn; insn = insn->next) { + this->numInsns--; + bb->numInsns++; + insn->bb = bb; + bb->exit = insn; + } + if (attach) + this->cfg.attach(&bb->cfg, Graph::Edge::TREE); +} + +BasicBlock * +BasicBlock::splitBefore(Instruction *insn, bool attach) +{ + BasicBlock *bb = new BasicBlock(func); + assert(!insn || insn->op != OP_PHI); + + splitCommon(insn, bb, attach); + return bb; +} + +BasicBlock * +BasicBlock::splitAfter(Instruction *insn, bool attach) +{ + BasicBlock *bb = new BasicBlock(func); + assert(!insn || insn->op != OP_PHI); + + bb->joinAt = joinAt; + joinAt = NULL; + + splitCommon(insn ? insn->next : NULL, bb, attach); + return bb; +} + +bool +BasicBlock::dominatedBy(BasicBlock *that) +{ + Graph::Node *bn = &that->dom; + Graph::Node *dn = &this->dom; + + while (dn && dn != bn) + dn = dn->parent(); + + return dn != NULL; +} + +unsigned int +BasicBlock::initiatesSimpleConditional() const +{ + Graph::Node *out[2]; + int n; + Graph::Edge::Type eR; + + if (cfg.outgoingCount() != 2) // -> if and -> else/endif + return false; + + n = 0; + for (Graph::EdgeIterator ei = cfg.outgoing(); !ei.end(); ei.next()) + out[n++] = ei.getNode(); + eR = out[1]->outgoing().getType(); + + // IF block is out edge to the right + if (eR == Graph::Edge::CROSS || eR == Graph::Edge::BACK) + return 0x2; + + if (out[1]->outgoingCount() != 1) // 0 is IF { RET; }, >1 is more divergence + return 0x0; + // do they reconverge immediately ? + if (out[1]->outgoing().getNode() == out[0]) + return 0x1; + if (out[0]->outgoingCount() == 1) + if (out[0]->outgoing().getNode() == out[1]->outgoing().getNode()) + return 0x3; + + return 0x0; +} + +bool +Function::setEntry(BasicBlock *bb) +{ + if (cfg.getRoot()) + return false; + cfg.insert(&bb->cfg); + return true; +} + +bool +Function::setExit(BasicBlock *bb) +{ + if (cfgExit) + return false; + cfgExit = &bb->cfg; + return true; +} + +unsigned int +Function::orderInstructions(ArrayList &result) +{ + result.clear(); + + for (IteratorRef it = cfg.iteratorCFG(); !it->end(); it->next()) { + BasicBlock *bb = + BasicBlock::get(reinterpret_cast<Graph::Node *>(it->get())); + + for (Instruction *insn = bb->getFirst(); insn; insn = insn->next) + result.insert(insn, insn->serial); + } + + return result.getSize(); +} + +void +Function::buildLiveSets() +{ + for (unsigned i = 0; i <= loopNestingBound; ++i) + buildLiveSetsPreSSA(BasicBlock::get(cfg.getRoot()), cfg.nextSequence()); + + for (ArrayList::Iterator bi = allBBlocks.iterator(); !bi.end(); bi.next()) + BasicBlock::get(bi)->liveSet.marker = false; +} + +void +Function::buildDefSets() +{ + for (unsigned i = 0; i <= loopNestingBound; ++i) + buildDefSetsPreSSA(BasicBlock::get(cfgExit), cfg.nextSequence()); + + for (ArrayList::Iterator bi = allBBlocks.iterator(); !bi.end(); bi.next()) + BasicBlock::get(bi)->liveSet.marker = false; +} + +bool +Pass::run(Program *prog, bool ordered, bool skipPhi) +{ + this->prog = prog; + err = false; + return doRun(prog, ordered, skipPhi); +} + +bool +Pass::doRun(Program *prog, bool ordered, bool skipPhi) +{ + for (IteratorRef it = prog->calls.iteratorDFS(false); + !it->end(); it->next()) { + Graph::Node *n = reinterpret_cast<Graph::Node *>(it->get()); + if (!doRun(Function::get(n), ordered, skipPhi)) + return false; + } + return !err; +} + +bool +Pass::run(Function *func, bool ordered, bool skipPhi) +{ + prog = func->getProgram(); + err = false; + return doRun(func, ordered, skipPhi); +} + +bool +Pass::doRun(Function *func, bool ordered, bool skipPhi) +{ + IteratorRef bbIter; + BasicBlock *bb; + Instruction *insn, *next; + + this->func = func; + if (!visit(func)) + return false; + + bbIter = ordered ? func->cfg.iteratorCFG() : func->cfg.iteratorDFS(); + + for (; !bbIter->end(); bbIter->next()) { + bb = BasicBlock::get(reinterpret_cast<Graph::Node *>(bbIter->get())); + if (!visit(bb)) + break; + for (insn = skipPhi ? bb->getEntry() : bb->getFirst(); insn != NULL; + insn = next) { + next = insn->next; + if (!visit(insn)) + break; + } + } + + return !err; +} + +void +Function::printCFGraph(const char *filePath) +{ + FILE *out = fopen(filePath, "a"); + if (!out) { + ERROR("failed to open file: %s\n", filePath); + return; + } + INFO("printing control flow graph to: %s\n", filePath); + + fprintf(out, "digraph G {\n"); + + for (IteratorRef it = cfg.iteratorDFS(); !it->end(); it->next()) { + BasicBlock *bb = BasicBlock::get( + reinterpret_cast<Graph::Node *>(it->get())); + int idA = bb->getId(); + for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { + int idB = BasicBlock::get(ei.getNode())->getId(); + switch (ei.getType()) { + case Graph::Edge::TREE: + fprintf(out, "\t%i -> %i;\n", idA, idB); + break; + case Graph::Edge::FORWARD: + fprintf(out, "\t%i -> %i [color=green];\n", idA, idB); + break; + case Graph::Edge::CROSS: + fprintf(out, "\t%i -> %i [color=red];\n", idA, idB); + break; + case Graph::Edge::BACK: + fprintf(out, "\t%i -> %i;\n", idA, idB); + break; + case Graph::Edge::DUMMY: + fprintf(out, "\t%i -> %i [style=dotted];\n", idA, idB); + break; + default: + assert(0); + break; + } + } + } + + fprintf(out, "}\n"); + fclose(out); +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp new file mode 100644 index 00000000000..70e5e226bed --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp @@ -0,0 +1,614 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_build_util.h" + +namespace nv50_ir { + +BuildUtil::BuildUtil() +{ + init(NULL); +} + +BuildUtil::BuildUtil(Program *prog) +{ + init(prog); +} + +void +BuildUtil::init(Program *prog) +{ + this->prog = prog; + + func = NULL; + bb = NULL; + pos = NULL; + + memset(imms, 0, sizeof(imms)); + immCount = 0; +} + +void +BuildUtil::addImmediate(ImmediateValue *imm) +{ + if (immCount > (NV50_IR_BUILD_IMM_HT_SIZE * 3) / 4) + return; + + unsigned int pos = u32Hash(imm->reg.data.u32); + + while (imms[pos]) + pos = (pos + 1) % NV50_IR_BUILD_IMM_HT_SIZE; + imms[pos] = imm; + immCount++; +} + +Instruction * +BuildUtil::mkOp1(operation op, DataType ty, Value *dst, Value *src) +{ + Instruction *insn = new_Instruction(func, op, ty); + + insn->setDef(0, dst); + insn->setSrc(0, src); + + insert(insn); + return insn; +} + +Instruction * +BuildUtil::mkOp2(operation op, DataType ty, Value *dst, + Value *src0, Value *src1) +{ + Instruction *insn = new_Instruction(func, op, ty); + + insn->setDef(0, dst); + insn->setSrc(0, src0); + insn->setSrc(1, src1); + + insert(insn); + return insn; +} + +Instruction * +BuildUtil::mkOp3(operation op, DataType ty, Value *dst, + Value *src0, Value *src1, Value *src2) +{ + Instruction *insn = new_Instruction(func, op, ty); + + insn->setDef(0, dst); + insn->setSrc(0, src0); + insn->setSrc(1, src1); + insn->setSrc(2, src2); + + insert(insn); + return insn; +} + +Instruction * +BuildUtil::mkLoad(DataType ty, Value *dst, Symbol *mem, Value *ptr) +{ + Instruction *insn = new_Instruction(func, OP_LOAD, ty); + + insn->setDef(0, dst); + insn->setSrc(0, mem); + if (ptr) + insn->setIndirect(0, 0, ptr); + + insert(insn); + return insn; +} + +Instruction * +BuildUtil::mkStore(operation op, DataType ty, Symbol *mem, Value *ptr, + Value *stVal) +{ + Instruction *insn = new_Instruction(func, op, ty); + + insn->setSrc(0, mem); + insn->setSrc(1, stVal); + if (ptr) + insn->setIndirect(0, 0, ptr); + + insert(insn); + return insn; +} + +Instruction * +BuildUtil::mkFetch(Value *dst, DataType ty, DataFile file, int32_t offset, + Value *attrRel, Value *primRel) +{ + Symbol *sym = mkSymbol(file, 0, ty, offset); + + Instruction *insn = mkOp1(OP_VFETCH, ty, dst, sym); + + insn->setIndirect(0, 0, attrRel); + insn->setIndirect(0, 1, primRel); + + // already inserted + return insn; +} + +Instruction * +BuildUtil::mkInterp(unsigned mode, Value *dst, int32_t offset, Value *rel) +{ + operation op = OP_LINTERP; + DataType ty = TYPE_F32; + + if ((mode & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_FLAT) + ty = TYPE_U32; + else + if ((mode & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_PERSPECTIVE) + op = OP_PINTERP; + + Symbol *sym = mkSymbol(FILE_SHADER_INPUT, 0, ty, offset); + + Instruction *insn = mkOp1(op, ty, dst, sym); + insn->setIndirect(0, 0, rel); + return insn; +} + +Instruction * +BuildUtil::mkMov(Value *dst, Value *src, DataType ty) +{ + Instruction *insn = new_Instruction(func, OP_MOV, ty); + + insn->setDef(0, dst); + insn->setSrc(0, src); + + insert(insn); + return insn; +} + +Instruction * +BuildUtil::mkMovToReg(int id, Value *src) +{ + Instruction *insn = new_Instruction(func, OP_MOV, typeOfSize(src->reg.size)); + + insn->setDef(0, new_LValue(func, FILE_GPR)); + insn->getDef(0)->reg.data.id = id; + insn->setSrc(0, src); + + insert(insn); + return insn; +} + +Instruction * +BuildUtil::mkMovFromReg(Value *dst, int id) +{ + Instruction *insn = new_Instruction(func, OP_MOV, typeOfSize(dst->reg.size)); + + insn->setDef(0, dst); + insn->setSrc(0, new_LValue(func, FILE_GPR)); + insn->getSrc(0)->reg.data.id = id; + + insert(insn); + return insn; +} + +Instruction * +BuildUtil::mkCvt(operation op, + DataType dstTy, Value *dst, DataType srcTy, Value *src) +{ + Instruction *insn = new_Instruction(func, op, dstTy); + + insn->setType(dstTy, srcTy); + insn->setDef(0, dst); + insn->setSrc(0, src); + + insert(insn); + return insn; +} + +CmpInstruction * +BuildUtil::mkCmp(operation op, CondCode cc, DataType ty, Value *dst, + Value *src0, Value *src1, Value *src2) +{ + CmpInstruction *insn = new_CmpInstruction(func, op); + + insn->setType((dst->reg.file == FILE_PREDICATE || + dst->reg.file == FILE_FLAGS) ? TYPE_U8 : ty, ty); + insn->setCondition(cc); + insn->setDef(0, dst); + insn->setSrc(0, src0); + insn->setSrc(1, src1); + if (src2) + insn->setSrc(2, src2); + + if (dst->reg.file == FILE_FLAGS) + insn->flagsDef = 0; + + insert(insn); + return insn; +} + +TexInstruction * +BuildUtil::mkTex(operation op, TexTarget targ, + uint16_t tic, uint16_t tsc, + const std::vector<Value *> &def, + const std::vector<Value *> &src) +{ + TexInstruction *tex = new_TexInstruction(func, op); + + for (size_t d = 0; d < def.size() && def[d]; ++d) + tex->setDef(d, def[d]); + for (size_t s = 0; s < src.size() && src[s]; ++s) + tex->setSrc(s, src[s]); + + tex->setTexture(targ, tic, tsc); + + insert(tex); + return tex; +} + +Instruction * +BuildUtil::mkQuadop(uint8_t q, Value *def, uint8_t l, Value *src0, Value *src1) +{ + Instruction *quadop = mkOp2(OP_QUADOP, TYPE_F32, def, src0, src1); + quadop->subOp = q; + quadop->lanes = l; + return quadop; +} + +Instruction * +BuildUtil::mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc) +{ + LValue *def0 = getSSA(); + LValue *def1 = getSSA(); + + mkMov(def0, trSrc)->setPredicate(CC_P, pred); + mkMov(def1, flSrc)->setPredicate(CC_NOT_P, pred); + + return mkOp2(OP_UNION, typeOfSize(dst->reg.size), dst, def0, def1); +} + +Instruction * +BuildUtil::mkSplit(Value *h[2], uint8_t halfSize, Value *val) +{ + Instruction *insn = NULL; + + const DataType fTy = typeOfSize(halfSize * 2); + + if (val->reg.file == FILE_IMMEDIATE) + val = mkMov(getSSA(halfSize * 2), val, fTy)->getDef(0); + + if (isMemoryFile(val->reg.file)) { + h[0] = cloneShallow(getFunction(), val); + h[1] = cloneShallow(getFunction(), val); + h[0]->reg.size = halfSize; + h[1]->reg.size = halfSize; + h[1]->reg.data.offset += halfSize; + } else { + h[0] = getSSA(halfSize, val->reg.file); + h[1] = getSSA(halfSize, val->reg.file); + insn = mkOp1(OP_SPLIT, fTy, h[0], val); + insn->setDef(1, h[1]); + } + return insn; +} + +FlowInstruction * +BuildUtil::mkFlow(operation op, void *targ, CondCode cc, Value *pred) +{ + FlowInstruction *insn = new_FlowInstruction(func, op, targ); + + if (pred) + insn->setPredicate(cc, pred); + + insert(insn); + return insn; +} + +void +BuildUtil::mkClobber(DataFile f, uint32_t rMask, int unit) +{ + static const uint16_t baseSize2[16] = + { + 0x0000, 0x0010, 0x0011, 0x0020, 0x0012, 0x1210, 0x1211, 0x1220, + 0x0013, 0x1310, 0x1311, 0x1320, 0x0022, 0x2210, 0x2211, 0x0040, + }; + + int base = 0; + + for (; rMask; rMask >>= 4, base += 4) { + const uint32_t mask = rMask & 0xf; + if (!mask) + continue; + int base1 = (baseSize2[mask] >> 0) & 0xf; + int size1 = (baseSize2[mask] >> 4) & 0xf; + int base2 = (baseSize2[mask] >> 8) & 0xf; + int size2 = (baseSize2[mask] >> 12) & 0xf; + Instruction *insn = mkOp(OP_NOP, TYPE_NONE, NULL); + if (1) { // size1 can't be 0 + LValue *reg = new_LValue(func, f); + reg->reg.size = size1 << unit; + reg->reg.data.id = base + base1; + insn->setDef(0, reg); + } + if (size2) { + LValue *reg = new_LValue(func, f); + reg->reg.size = size2 << unit; + reg->reg.data.id = base + base2; + insn->setDef(1, reg); + } + } +} + +ImmediateValue * +BuildUtil::mkImm(uint32_t u) +{ + unsigned int pos = u32Hash(u); + + while (imms[pos] && imms[pos]->reg.data.u32 != u) + pos = (pos + 1) % NV50_IR_BUILD_IMM_HT_SIZE; + + ImmediateValue *imm = imms[pos]; + if (!imm) { + imm = new_ImmediateValue(prog, u); + addImmediate(imm); + } + return imm; +} + +ImmediateValue * +BuildUtil::mkImm(uint64_t u) +{ + ImmediateValue *imm = new_ImmediateValue(prog, (uint32_t)0); + + imm->reg.size = 8; + imm->reg.type = TYPE_U64; + imm->reg.data.u64 = u; + + return imm; +} + +ImmediateValue * +BuildUtil::mkImm(float f) +{ + union { + float f32; + uint32_t u32; + } u; + u.f32 = f; + return mkImm(u.u32); +} + +Value * +BuildUtil::loadImm(Value *dst, float f) +{ + return mkOp1v(OP_MOV, TYPE_F32, dst ? dst : getScratch(), mkImm(f)); +} + +Value * +BuildUtil::loadImm(Value *dst, uint32_t u) +{ + return mkOp1v(OP_MOV, TYPE_U32, dst ? dst : getScratch(), mkImm(u)); +} + +Value * +BuildUtil::loadImm(Value *dst, uint64_t u) +{ + return mkOp1v(OP_MOV, TYPE_U64, dst ? dst : getScratch(8), mkImm(u)); +} + +Symbol * +BuildUtil::mkSymbol(DataFile file, int8_t fileIndex, DataType ty, + uint32_t baseAddr) +{ + Symbol *sym = new_Symbol(prog, file, fileIndex); + + sym->setOffset(baseAddr); + sym->reg.type = ty; + sym->reg.size = typeSizeof(ty); + + return sym; +} + +Symbol * +BuildUtil::mkSysVal(SVSemantic svName, uint32_t svIndex) +{ + Symbol *sym = new_Symbol(prog, FILE_SYSTEM_VALUE, 0); + + assert(svIndex < 4 || + (svName == SV_CLIP_DISTANCE || svName == SV_TESS_FACTOR)); + + switch (svName) { + case SV_POSITION: + case SV_FACE: + case SV_YDIR: + case SV_POINT_SIZE: + case SV_POINT_COORD: + case SV_CLIP_DISTANCE: + case SV_TESS_FACTOR: + sym->reg.type = TYPE_F32; + break; + default: + sym->reg.type = TYPE_U32; + break; + } + sym->reg.size = typeSizeof(sym->reg.type); + + sym->reg.data.sv.sv = svName; + sym->reg.data.sv.index = svIndex; + + return sym; +} + +void +BuildUtil::DataArray::setup(unsigned array, unsigned arrayIdx, + uint32_t base, int len, int vecDim, int eltSize, + DataFile file, int8_t fileIdx) +{ + this->array = array; + this->arrayIdx = arrayIdx; + this->baseAddr = base; + this->arrayLen = len; + this->vecDim = vecDim; + this->eltSize = eltSize; + this->file = file; + this->regOnly = !isMemoryFile(file); + + if (!regOnly) { + baseSym = new_Symbol(up->getProgram(), file, fileIdx); + baseSym->setOffset(baseAddr); + baseSym->reg.size = eltSize; + } else { + baseSym = NULL; + } +} + +Value * +BuildUtil::DataArray::acquire(ValueMap &m, int i, int c) +{ + if (regOnly) { + Value *v = lookup(m, i, c); + if (!v) + v = insert(m, i, c, new_LValue(up->getFunction(), file)); + + return v; + } else { + return up->getScratch(); + } +} + +Value * +BuildUtil::DataArray::load(ValueMap &m, int i, int c, Value *ptr) +{ + if (regOnly) { + Value *v = lookup(m, i, c); + if (!v) + v = insert(m, i, c, new_LValue(up->getFunction(), file)); + + return v; + } else { + Value *sym = lookup(m, i, c); + if (!sym) + sym = insert(m, i, c, mkSymbol(i, c)); + + return up->mkLoadv(typeOfSize(eltSize), static_cast<Symbol *>(sym), ptr); + } +} + +void +BuildUtil::DataArray::store(ValueMap &m, int i, int c, Value *ptr, Value *value) +{ + if (regOnly) { + assert(!ptr); + if (!lookup(m, i, c)) + insert(m, i, c, value); + + assert(lookup(m, i, c) == value); + } else { + Value *sym = lookup(m, i, c); + if (!sym) + sym = insert(m, i, c, mkSymbol(i, c)); + + const DataType stTy = typeOfSize(value->reg.size); + + up->mkStore(OP_STORE, stTy, static_cast<Symbol *>(sym), ptr, value); + } +} + +Symbol * +BuildUtil::DataArray::mkSymbol(int i, int c) +{ + const unsigned int idx = i * vecDim + c; + Symbol *sym = new_Symbol(up->getProgram(), file, 0); + + assert(baseSym || (idx < arrayLen && c < vecDim)); + + sym->reg.size = eltSize; + sym->reg.type = typeOfSize(eltSize); + sym->setAddress(baseSym, baseAddr + idx * eltSize); + return sym; +} + + +Instruction * +BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i, + Value *zero, + Value *carry) +{ + DataType hTy; + int srcNr; + + switch (i->dType) { + case TYPE_U64: hTy = TYPE_U32; break; + case TYPE_S64: hTy = TYPE_S32; break; + default: + return NULL; + } + + switch (i->op) { + case OP_MOV: srcNr = 1; break; + case OP_ADD: + case OP_SUB: + if (!carry) + return NULL; + srcNr = 2; + break; + default: + // TODO when needed + return NULL; + } + + i->setType(hTy); + i->setDef(0, cloneShallow(fn, i->getDef(0))); + i->getDef(0)->reg.size = 4; + Instruction *lo = i; + Instruction *hi = cloneForward(fn, i); + lo->bb->insertAfter(lo, hi); + + hi->getDef(0)->reg.data.id++; + + for (int s = 0; s < srcNr; ++s) { + if (lo->getSrc(s)->reg.size < 8) { + hi->setSrc(s, zero); + } else { + if (lo->getSrc(s)->refCount() > 1) + lo->setSrc(s, cloneShallow(fn, lo->getSrc(s))); + lo->getSrc(s)->reg.size /= 2; + hi->setSrc(s, cloneShallow(fn, lo->getSrc(s))); + + switch (hi->src(s).getFile()) { + case FILE_IMMEDIATE: + hi->getSrc(s)->reg.data.u64 >>= 32; + break; + case FILE_MEMORY_CONST: + case FILE_MEMORY_SHARED: + case FILE_SHADER_INPUT: + hi->getSrc(s)->reg.data.offset += 4; + break; + default: + assert(hi->src(s).getFile() == FILE_GPR); + hi->getSrc(s)->reg.data.id++; + break; + } + } + } + if (srcNr == 2) { + lo->setDef(1, carry); + hi->setFlagsSrc(hi->srcCount(), carry); + } + return hi; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h new file mode 100644 index 00000000000..2305a275d0d --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h @@ -0,0 +1,324 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NV50_IR_BUILD_UTIL__ +#define __NV50_IR_BUILD_UTIL__ + +namespace nv50_ir { + +class BuildUtil +{ +public: + BuildUtil(); + BuildUtil(Program *); + + inline void setProgram(Program *); + inline Program *getProgram() const { return prog; } + inline Function *getFunction() const { return func; } + + // keeps inserting at head/tail of block + inline void setPosition(BasicBlock *, bool tail); + // position advances only if @after is true + inline void setPosition(Instruction *, bool after); + + inline BasicBlock *getBB() { return bb; } + + inline void insert(Instruction *); + inline void remove(Instruction *i) { assert(i->bb == bb); bb->remove(i); } + + inline LValue *getScratch(int size = 4, DataFile = FILE_GPR); + // scratch value for a single assignment: + inline LValue *getSSA(int size = 4, DataFile = FILE_GPR); + + inline Instruction *mkOp(operation, DataType, Value *); + Instruction *mkOp1(operation, DataType, Value *, Value *); + Instruction *mkOp2(operation, DataType, Value *, Value *, Value *); + Instruction *mkOp3(operation, DataType, Value *, Value *, Value *, Value *); + + LValue *mkOp1v(operation, DataType, Value *, Value *); + LValue *mkOp2v(operation, DataType, Value *, Value *, Value *); + LValue *mkOp3v(operation, DataType, Value *, Value *, Value *, Value *); + + Instruction *mkLoad(DataType, Value *dst, Symbol *, Value *ptr); + Instruction *mkStore(operation, DataType, Symbol *, Value *ptr, Value *val); + + LValue *mkLoadv(DataType, Symbol *, Value *ptr); + + Instruction *mkMov(Value *, Value *, DataType = TYPE_U32); + Instruction *mkMovToReg(int id, Value *); + Instruction *mkMovFromReg(Value *, int id); + + Instruction *mkInterp(unsigned mode, Value *, int32_t offset, Value *rel); + Instruction *mkFetch(Value *, DataType, DataFile, int32_t offset, + Value *attrRel, Value *primRel); + + Instruction *mkCvt(operation, DataType, Value *, DataType, Value *); + CmpInstruction *mkCmp(operation, CondCode, DataType, + Value *, + Value *, Value *, Value * = NULL); + TexInstruction *mkTex(operation, TexTarget, + uint16_t tic, uint16_t tsc, + const std::vector<Value *> &def, + const std::vector<Value *> &src); + Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *); + + FlowInstruction *mkFlow(operation, void *target, CondCode, Value *pred); + + Instruction *mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc); + + Instruction *mkSplit(Value *half[2], uint8_t halfSize, Value *); + + void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2); + + ImmediateValue *mkImm(float); + ImmediateValue *mkImm(uint32_t); + ImmediateValue *mkImm(uint64_t); + + ImmediateValue *mkImm(int i) { return mkImm((uint32_t)i); } + + Value *loadImm(Value *dst, float); + Value *loadImm(Value *dst, uint32_t); + Value *loadImm(Value *dst, uint64_t); + + Value *loadImm(Value *dst, int i) { return loadImm(dst, (uint32_t)i); } + + // returns high part of the operation + static Instruction *split64BitOpPostRA(Function *, Instruction *, + Value *zero, Value *carry); + + struct Location + { + Location(unsigned array, unsigned arrayIdx, unsigned i, unsigned c) + : array(array), arrayIdx(arrayIdx), i(i), c(c) { } + Location(const Location &l) + : array(l.array), arrayIdx(l.arrayIdx), i(l.i), c(l.c) { } + + bool operator==(const Location &l) const + { + return + array == l.array && arrayIdx == l.arrayIdx && i == l.i && c == l.c; + } + + bool operator<(const Location &l) const + { + return array != l.array ? array < l.array : + arrayIdx != l.arrayIdx ? arrayIdx < l.arrayIdx : + i != l.i ? i < l.i : + c != l.c ? c < l.c : + false; + } + + unsigned array, arrayIdx, i, c; + }; + + typedef bimap<Location, Value *> ValueMap; + + class DataArray + { + public: + DataArray(BuildUtil *bld) : up(bld) { } + + void setup(unsigned array, unsigned arrayIdx, + uint32_t base, int len, int vecDim, int eltSize, + DataFile file, int8_t fileIdx); + + inline bool exists(ValueMap&, unsigned int i, unsigned int c); + + Value *load(ValueMap&, int i, int c, Value *ptr); + void store(ValueMap&, int i, int c, Value *ptr, Value *value); + Value *acquire(ValueMap&, int i, int c); + + private: + inline Value *lookup(ValueMap&, unsigned i, unsigned c); + inline Value *insert(ValueMap&, unsigned i, unsigned c, Value *v); + + Symbol *mkSymbol(int i, int c); + + private: + BuildUtil *up; + unsigned array, arrayIdx; + + uint32_t baseAddr; + uint32_t arrayLen; + Symbol *baseSym; + + uint8_t vecDim; + uint8_t eltSize; // in bytes + + DataFile file; + bool regOnly; + }; + + Symbol *mkSymbol(DataFile file, int8_t fileIndex, + DataType ty, uint32_t baseAddress); + + Symbol *mkSysVal(SVSemantic svName, uint32_t svIndex); + +private: + void init(Program *); + void addImmediate(ImmediateValue *); + inline unsigned int u32Hash(uint32_t); + +protected: + Program *prog; + Function *func; + Instruction *pos; + BasicBlock *bb; + bool tail; + +#define NV50_IR_BUILD_IMM_HT_SIZE 256 + + ImmediateValue *imms[NV50_IR_BUILD_IMM_HT_SIZE]; + unsigned int immCount; +}; + +unsigned int BuildUtil::u32Hash(uint32_t u) +{ + return (u % 273) % NV50_IR_BUILD_IMM_HT_SIZE; +} + +void BuildUtil::setProgram(Program *program) +{ + prog = program; +} + +void +BuildUtil::setPosition(BasicBlock *block, bool atTail) +{ + bb = block; + prog = bb->getProgram(); + func = bb->getFunction(); + pos = NULL; + tail = atTail; +} + +void +BuildUtil::setPosition(Instruction *i, bool after) +{ + bb = i->bb; + prog = bb->getProgram(); + func = bb->getFunction(); + pos = i; + tail = after; + assert(bb); +} + +LValue * +BuildUtil::getScratch(int size, DataFile f) +{ + LValue *lval = new_LValue(func, f); + lval->reg.size = size; + return lval; +} + +LValue * +BuildUtil::getSSA(int size, DataFile f) +{ + LValue *lval = new_LValue(func, f); + lval->ssa = 1; + lval->reg.size = size; + return lval; +} + +void BuildUtil::insert(Instruction *i) +{ + if (!pos) { + tail ? bb->insertTail(i) : bb->insertHead(i); + } else { + if (tail) { + bb->insertAfter(pos, i); + pos = i; + } else { + bb->insertBefore(pos, i); + } + } +} + +Instruction * +BuildUtil::mkOp(operation op, DataType ty, Value *dst) +{ + Instruction *insn = new_Instruction(func, op, ty); + insn->setDef(0, dst); + insert(insn); + if (op == OP_DISCARD || op == OP_EXIT || + op == OP_JOIN || + op == OP_QUADON || op == OP_QUADPOP || + op == OP_EMIT || op == OP_RESTART) + insn->fixed = 1; + return insn; +} + +inline LValue * +BuildUtil::mkOp1v(operation op, DataType ty, Value *dst, Value *src) +{ + mkOp1(op, ty, dst, src); + return dst->asLValue(); +} + +inline LValue * +BuildUtil::mkOp2v(operation op, DataType ty, Value *dst, + Value *src0, Value *src1) +{ + mkOp2(op, ty, dst, src0, src1); + return dst->asLValue(); +} + +inline LValue * +BuildUtil::mkOp3v(operation op, DataType ty, Value *dst, + Value *src0, Value *src1, Value *src2) +{ + mkOp3(op, ty, dst, src0, src1, src2); + return dst->asLValue(); +} + +inline LValue * +BuildUtil::mkLoadv(DataType ty, Symbol *mem, Value *ptr) +{ + LValue *dst = getScratch(); + mkLoad(ty, dst, mem, ptr); + return dst; +} + +bool +BuildUtil::DataArray::exists(ValueMap &m, unsigned int i, unsigned int c) +{ + assert(i < arrayLen && c < vecDim); + return !regOnly || m.r.count(Location(array, arrayIdx, i, c)); +} + +Value * +BuildUtil::DataArray::lookup(ValueMap &m, unsigned i, unsigned c) +{ + ValueMap::r_iterator it = m.r.find(Location(array, arrayIdx, i, c)); + return it != m.r.end() ? it->second : NULL; +} + +Value * +BuildUtil::DataArray::insert(ValueMap &m, unsigned i, unsigned c, Value *v) +{ + m.insert(Location(array, arrayIdx, i, c), v); + return v; +} + +} // namespace nv50_ir + +#endif // __NV50_IR_BUILD_UTIL_H__ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h new file mode 100644 index 00000000000..752bad37941 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -0,0 +1,220 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NV50_IR_DRIVER_H__ +#define __NV50_IR_DRIVER_H__ + +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" + +/* + * This struct constitutes linkage information in TGSI terminology. + * + * It is created by the code generator and handed to the pipe driver + * for input/output slot assignment. + */ +struct nv50_ir_varying +{ + uint8_t slot[4]; /* native slots for xyzw (addresses in 32-bit words) */ + + unsigned mask : 4; /* vec4 mask */ + unsigned linear : 1; /* linearly interpolated if true (and not flat) */ + unsigned flat : 1; + unsigned sc : 1; /* special colour interpolation mode (SHADE_MODEL) */ + unsigned centroid : 1; + unsigned patch : 1; /* patch constant value */ + unsigned regular : 1; /* driver-specific meaning (e.g. input in sreg) */ + unsigned input : 1; /* indicates direction of system values */ + unsigned oread : 1; /* true if output is read from parallel TCP */ + + ubyte id; /* TGSI register index */ + ubyte sn; /* TGSI semantic name */ + ubyte si; /* TGSI semantic index */ +}; + +#define NV50_PROGRAM_IR_TGSI 0 +#define NV50_PROGRAM_IR_SM4 1 +#define NV50_PROGRAM_IR_GLSL 2 +#define NV50_PROGRAM_IR_LLVM 3 + +#ifdef DEBUG +# define NV50_IR_DEBUG_BASIC (1 << 0) +# define NV50_IR_DEBUG_VERBOSE (2 << 0) +# define NV50_IR_DEBUG_REG_ALLOC (1 << 2) +#else +# define NV50_IR_DEBUG_BASIC 0 +# define NV50_IR_DEBUG_VERBOSE 0 +# define NV50_IR_DEBUG_REG_ALLOC 0 +#endif + +#define NV50_SEMANTIC_CLIPDISTANCE (TGSI_SEMANTIC_COUNT + 0) +#define NV50_SEMANTIC_VIEWPORTINDEX (TGSI_SEMANTIC_COUNT + 4) +#define NV50_SEMANTIC_LAYER (TGSI_SEMANTIC_COUNT + 5) +#define NV50_SEMANTIC_INVOCATIONID (TGSI_SEMANTIC_COUNT + 6) +#define NV50_SEMANTIC_TESSFACTOR (TGSI_SEMANTIC_COUNT + 7) +#define NV50_SEMANTIC_TESSCOORD (TGSI_SEMANTIC_COUNT + 8) +#define NV50_SEMANTIC_SAMPLEMASK (TGSI_SEMANTIC_COUNT + 9) +#define NV50_SEMANTIC_COUNT (TGSI_SEMANTIC_COUNT + 10) + +#define NV50_TESS_PART_FRACT_ODD 0 +#define NV50_TESS_PART_FRACT_EVEN 1 +#define NV50_TESS_PART_POW2 2 +#define NV50_TESS_PART_INTEGER 3 + +#define NV50_PRIM_PATCHES PIPE_PRIM_MAX + +struct nv50_ir_prog_symbol +{ + uint32_t label; + uint32_t offset; +}; + +#define NVISA_GF100_CHIPSET_C0 0xc0 +#define NVISA_GF100_CHIPSET_D0 0xd0 +#define NVISA_GK104_CHIPSET 0xe0 +#define NVISA_GK110_CHIPSET 0xf0 + +struct nv50_ir_prog_info +{ + uint16_t target; /* chipset (0x50, 0x84, 0xc0, ...) */ + + uint8_t type; /* PIPE_SHADER */ + + uint8_t optLevel; /* optimization level (0 to 3) */ + uint8_t dbgFlags; + + struct { + int16_t maxGPR; /* may be -1 if none used */ + int16_t maxOutput; + uint32_t tlsSpace; /* required local memory per thread */ + uint32_t *code; + uint32_t codeSize; + uint8_t sourceRep; /* NV50_PROGRAM_IR */ + const void *source; + void *relocData; + struct nv50_ir_prog_symbol *syms; + uint16_t numSyms; + } bin; + + struct nv50_ir_varying sv[PIPE_MAX_SHADER_INPUTS]; + struct nv50_ir_varying in[PIPE_MAX_SHADER_INPUTS]; + struct nv50_ir_varying out[PIPE_MAX_SHADER_OUTPUTS]; + uint8_t numInputs; + uint8_t numOutputs; + uint8_t numPatchConstants; /* also included in numInputs/numOutputs */ + uint8_t numSysVals; + + struct { + uint32_t *buf; /* for IMMEDIATE_ARRAY */ + uint16_t bufSize; /* size of immediate array */ + uint16_t count; /* count of inline immediates */ + uint32_t *data; /* inline immediate data */ + uint8_t *type; /* for each vec4 (128 bit) */ + } immd; + + union { + struct { + uint32_t inputMask[4]; /* mask of attributes read (1 bit per scalar) */ + } vp; + struct { + uint8_t inputPatchSize; + uint8_t outputPatchSize; + uint8_t partitioning; /* PIPE_TESS_PART */ + int8_t winding; /* +1 (clockwise) / -1 (counter-clockwise) */ + uint8_t domain; /* PIPE_PRIM_{QUADS,TRIANGLES,LINES} */ + uint8_t outputPrim; /* PIPE_PRIM_{TRIANGLES,LINES,POINTS} */ + } tp; + struct { + uint8_t inputPrim; + uint8_t outputPrim; + unsigned instanceCount; + unsigned maxVertices; + } gp; + struct { + unsigned numColourResults; + boolean writesDepth; + boolean earlyFragTests; + boolean separateFragData; + boolean usesDiscard; + } fp; + struct { + uint32_t inputOffset; /* base address for user args */ + uint32_t sharedOffset; /* reserved space in s[] */ + uint32_t gridInfoBase; /* base address for NTID,NCTAID */ + } cp; + } prop; + + uint8_t numBarriers; + + struct { + uint8_t clipDistance; /* index of first clip distance output */ + uint8_t clipDistanceMask; /* mask of clip distances defined */ + uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */ + int8_t genUserClip; /* request user clip planes for ClipVertex */ + uint16_t ucpBase; /* base address for UCPs */ + uint8_t ucpCBSlot; /* constant buffer index of UCP data */ + uint8_t pointSize; /* output index for PointSize */ + uint8_t instanceId; /* system value index of InstanceID */ + uint8_t vertexId; /* system value index of VertexID */ + uint8_t edgeFlagIn; + uint8_t edgeFlagOut; + uint8_t fragDepth; /* output index of FragDepth */ + uint8_t sampleMask; /* output index of SampleMask */ + uint8_t backFaceColor[2]; /* input/output indices of back face colour */ + uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */ + boolean nv50styleSurfaces; /* generate gX[] access for raw buffers */ + uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */ + uint16_t texBindBase; /* base address for tex handles (nve4) */ + uint16_t suInfoBase; /* base address for surface info (nve4) */ + uint8_t msInfoCBSlot; /* cX[] used for multisample info */ + uint16_t msInfoBase; /* base address for multisample info */ + } io; + + /* driver callback to assign input/output locations */ + int (*assignSlots)(struct nv50_ir_prog_info *); + + void *driverPriv; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +extern int nv50_ir_generate_code(struct nv50_ir_prog_info *); + +extern void nv50_ir_relocate_code(void *relocData, uint32_t *code, + uint32_t codePos, + uint32_t libPos, + uint32_t dataPos); + +/* obtain code that will be shared among programs */ +extern void nv50_ir_get_target_library(uint32_t chipset, + const uint32_t **code, uint32_t *size); + +#ifdef __cplusplus +} +#endif + +#endif // __NV50_IR_DRIVER_H__ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp new file mode 100644 index 00000000000..ac59187130c --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -0,0 +1,1682 @@ +/* + * Copyright 2012 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir_target_nvc0.h" + +// CodeEmitter for GK110 encoding of the Fermi/Kepler ISA. + +namespace nv50_ir { + +class CodeEmitterGK110 : public CodeEmitter +{ +public: + CodeEmitterGK110(const TargetNVC0 *); + + virtual bool emitInstruction(Instruction *); + virtual uint32_t getMinEncodingSize(const Instruction *) const; + virtual void prepareEmission(Function *); + + inline void setProgramType(Program::Type pType) { progType = pType; } + +private: + const TargetNVC0 *targNVC0; + + Program::Type progType; + + const bool writeIssueDelays; + +private: + void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1); + void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg); + void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier); + + void emitPredicate(const Instruction *); + + void setCAddress14(const ValueRef&); + void setShortImmediate(const Instruction *, const int s); + void setImmediate32(const Instruction *, const int s, Modifier); + + void modNegAbsF32_3b(const Instruction *, const int s); + + void emitCondCode(CondCode cc, int pos, uint8_t mask); + void emitInterpMode(const Instruction *); + void emitLoadStoreType(DataType ty, const int pos); + void emitCachingMode(CacheMode c, const int pos); + + inline uint8_t getSRegEncoding(const ValueRef&); + + void emitRoundMode(RoundMode, const int pos, const int rintPos); + void emitRoundModeF(RoundMode, const int pos); + void emitRoundModeI(RoundMode, const int pos); + + void emitNegAbs12(const Instruction *); + + void emitNOP(const Instruction *); + + void emitLOAD(const Instruction *); + void emitSTORE(const Instruction *); + void emitMOV(const Instruction *); + + void emitINTERP(const Instruction *); + void emitPFETCH(const Instruction *); + void emitVFETCH(const Instruction *); + void emitEXPORT(const Instruction *); + void emitOUT(const Instruction *); + + void emitUADD(const Instruction *); + void emitFADD(const Instruction *); + void emitIMUL(const Instruction *); + void emitFMUL(const Instruction *); + void emitIMAD(const Instruction *); + void emitISAD(const Instruction *); + void emitFMAD(const Instruction *); + + void emitNOT(const Instruction *); + void emitLogicOp(const Instruction *, uint8_t subOp); + void emitPOPC(const Instruction *); + void emitINSBF(const Instruction *); + void emitShift(const Instruction *); + + void emitSFnOp(const Instruction *, uint8_t subOp); + + void emitCVT(const Instruction *); + void emitMINMAX(const Instruction *); + void emitPreOp(const Instruction *); + + void emitSET(const CmpInstruction *); + void emitSLCT(const CmpInstruction *); + void emitSELP(const Instruction *); + + void emitTEXBAR(const Instruction *); + void emitTEX(const TexInstruction *); + void emitTEXCSAA(const TexInstruction *); + void emitTXQ(const TexInstruction *); + + void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask); + + void emitFlow(const Instruction *); + + inline void defId(const ValueDef&, const int pos); + inline void srcId(const ValueRef&, const int pos); + inline void srcId(const ValueRef *, const int pos); + inline void srcId(const Instruction *, int s, const int pos); + + inline void srcAddr32(const ValueRef&, const int pos); // address / 4 + + inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false); +}; + +#define GK110_GPR_ZERO 255 + +#define NEG_(b, s) \ + if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) +#define ABS_(b, s) \ + if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) + +#define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) \ + code[(0x##b) / 32] |= 1 << ((0x##b) % 32) + +#define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) + +#define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) + +#define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b) + +#define SDATA(a) ((a).rep()->reg.data) +#define DDATA(a) ((a).rep()->reg.data) + +void CodeEmitterGK110::srcId(const ValueRef& src, const int pos) +{ + code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32); +} + +void CodeEmitterGK110::srcId(const ValueRef *src, const int pos) +{ + code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32); +} + +void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos) +{ + int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO; + code[pos / 32] |= r << (pos % 32); +} + +void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos) +{ + code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32); +} + +void CodeEmitterGK110::defId(const ValueDef& def, const int pos) +{ + code[pos / 32] |= (def.get() ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32); +} + +bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod) +{ + const ImmediateValue *imm = ref.get()->asImm(); + + return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000)); +} + +void +CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos) +{ + bool rint = false; + uint8_t n; + + switch (rnd) { + case ROUND_MI: rint = true; /* fall through */ case ROUND_M: n = 1; break; + case ROUND_PI: rint = true; /* fall through */ case ROUND_P: n = 2; break; + case ROUND_ZI: rint = true; /* fall through */ case ROUND_Z: n = 3; break; + default: + rint = rnd == ROUND_NI; + n = 0; + assert(rnd == ROUND_N || rnd == ROUND_NI); + break; + } + code[pos / 32] |= n << (pos % 32); + if (rint && rintPos >= 0) + code[rintPos / 32] |= 1 << (rintPos % 32); +} + +void +CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos) +{ + uint8_t n; + + switch (rnd) { + case ROUND_M: n = 1; break; + case ROUND_P: n = 2; break; + case ROUND_Z: n = 3; break; + default: + n = 0; + assert(rnd == ROUND_N); + break; + } + code[pos / 32] |= n << (pos % 32); +} + +void +CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos) +{ + uint8_t n; + + switch (rnd) { + case ROUND_MI: n = 1; break; + case ROUND_PI: n = 2; break; + case ROUND_ZI: n = 3; break; + default: + n = 0; + assert(rnd == ROUND_NI); + break; + } + code[pos / 32] |= n << (pos % 32); +} + +void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask) +{ + uint8_t n; + + switch (cc) { + case CC_FL: n = 0x00; break; + case CC_LT: n = 0x01; break; + case CC_EQ: n = 0x02; break; + case CC_LE: n = 0x03; break; + case CC_GT: n = 0x04; break; + case CC_NE: n = 0x05; break; + case CC_GE: n = 0x06; break; + case CC_LTU: n = 0x09; break; + case CC_EQU: n = 0x0a; break; + case CC_LEU: n = 0x0b; break; + case CC_GTU: n = 0x0c; break; + case CC_NEU: n = 0x0d; break; + case CC_GEU: n = 0x0e; break; + case CC_TR: n = 0x0f; break; + case CC_NO: n = 0x10; break; + case CC_NC: n = 0x11; break; + case CC_NS: n = 0x12; break; + case CC_NA: n = 0x13; break; + case CC_A: n = 0x14; break; + case CC_S: n = 0x15; break; + case CC_C: n = 0x16; break; + case CC_O: n = 0x17; break; + default: + n = 0; + assert(!"invalid condition code"); + break; + } + code[pos / 32] |= (n & mask) << (pos % 32); +} + +void +CodeEmitterGK110::emitPredicate(const Instruction *i) +{ + if (i->predSrc >= 0) { + srcId(i->src(i->predSrc), 18); + if (i->cc == CC_NOT_P) + code[0] |= 8 << 18; // negate + assert(i->getPredicate()->reg.file == FILE_PREDICATE); + } else { + code[0] |= 7 << 18; + } +} + +void +CodeEmitterGK110::setCAddress14(const ValueRef& src) +{ + const int32_t addr = src.get()->asSym()->reg.data.offset / 4; + + code[0] |= (addr & 0x01ff) << 23; + code[1] |= (addr & 0x3e00) >> 9; +} + +void +CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s) +{ + const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32; + const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64; + + if (i->sType == TYPE_F32) { + assert(!(u32 & 0x00000fff)); + code[0] |= ((u32 & 0x001ff000) >> 12) << 23; + code[1] |= ((u32 & 0x7fe00000) >> 21); + code[1] |= ((u32 & 0x80000000) >> 4); + } else + if (i->sType == TYPE_F64) { + assert(!(u64 & 0x00000fffffffffffULL)); + code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23; + code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53); + code[1] |= ((u64 & 0x8000000000000000ULL) >> 36); + } else { + assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000); + code[0] |= (u32 & 0x001ff) << 23; + code[1] |= (u32 & 0x7fe00) >> 9; + code[1] |= (u32 & 0x80000) << 8; + } +} + +void +CodeEmitterGK110::setImmediate32(const Instruction *i, const int s, + Modifier mod) +{ + uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32; + + if (mod) { + ImmediateValue imm(i->getSrc(s)->asImm(), i->sType); + mod.applyTo(imm); + u32 = imm.reg.data.u32; + } + + code[0] |= u32 << 23; + code[1] |= u32 >> 9; +} + +void +CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg, + Modifier mod) +{ + code[0] = ctg; + code[1] = opc << 20; + + emitPredicate(i); + + defId(i->def(0), 2); + + for (int s = 0; s < 3 && i->srcExists(s); ++s) { + switch (i->src(s).getFile()) { + case FILE_GPR: + srcId(i->src(s), s ? 42 : 10); + break; + case FILE_IMMEDIATE: + setImmediate32(i, s, mod); + break; + default: + break; + } + } +} + + +void +CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg) +{ + code[0] = ctg; + code[1] = opc << 20; + + emitPredicate(i); + + defId(i->def(0), 2); + + switch (i->src(0).getFile()) { + case FILE_MEMORY_CONST: + code[1] |= 0x4 << 28; + setCAddress14(i->src(0)); + break; + case FILE_GPR: + code[1] |= 0xc << 28; + srcId(i->src(0), 23); + break; + default: + assert(0); + break; + } +} + +// 0x2 for GPR, c[] and 0x1 for short immediate +void +CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2, + uint32_t opc1) +{ + const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE; + + int s1 = 23; + if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST) + s1 = 42; + + if (imm) { + code[0] = 0x1; + code[1] = opc1 << 20; + } else { + code[0] = 0x2; + code[1] = (0xc << 28) | (opc2 << 20); + } + + emitPredicate(i); + + defId(i->def(0), 2); + + for (int s = 0; s < 3 && i->srcExists(s); ++s) { + switch (i->src(s).getFile()) { + case FILE_MEMORY_CONST: + code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28); + setCAddress14(i->src(s)); + code[1] |= i->getSrc(s)->reg.fileIndex << 5; + break; + case FILE_IMMEDIATE: + setShortImmediate(i, s); + break; + case FILE_GPR: + srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10); + break; + default: + // ignore here, can be predicate or flags, but must not be address + break; + } + } + // 0x0 = invalid + // 0xc = rrr + // 0x8 = rrc + // 0x4 = rcr + assert(imm || (code[1] & (0xc << 28))); +} + +inline void +CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s) +{ + if (i->src(s).mod.abs()) code[1] &= ~(1 << 27); + if (i->src(s).mod.neg()) code[1] ^= (1 << 27); +} + +void +CodeEmitterGK110::emitNOP(const Instruction *i) +{ + code[0] = 0x00003c02; + code[1] = 0x85800000; + + if (i) + emitPredicate(i); + else + code[0] = 0x001c3c02; +} + +void +CodeEmitterGK110::emitFMAD(const Instruction *i) +{ + assert(!isLIMM(i->src(1), TYPE_F32)); + + emitForm_21(i, 0x0c0, 0x940); + + NEG_(34, 2); + SAT_(35); + RND_(36, F); + FTZ_(38); + + bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); + + if (code[0] & 0x1) { + if (neg1) + code[1] ^= 1 << 27; + } else + if (neg1) { + code[1] |= 1 << 19; + } +} + +void +CodeEmitterGK110::emitFMUL(const Instruction *i) +{ + bool neg = (i->src(0).mod ^ i->src(1).mod).neg(); + + assert(i->postFactor >= -3 && i->postFactor <= 3); + + if (isLIMM(i->src(1), TYPE_F32)) { + emitForm_L(i, 0x200, 0x2, Modifier(0)); + + FTZ_(38); + SAT_(3a); + if (neg) + code[1] ^= 1 << 22; + + assert(i->postFactor == 0); + } else { + emitForm_21(i, 0x234, 0xc34); + + RND_(2a, F); + FTZ_(2f); + SAT_(35); + + if (code[0] & 0x1) { + if (neg) + code[1] ^= 1 << 27; + } else + if (neg) { + code[1] |= 1 << 19; + } + } +} + +void +CodeEmitterGK110::emitIMUL(const Instruction *i) +{ + assert(!i->src(0).mod.neg() && !i->src(1).mod.neg()); + assert(!i->src(0).mod.abs() && !i->src(1).mod.abs()); + + if (isLIMM(i->src(1), TYPE_S32)) { + emitForm_L(i, 0x280, 2, Modifier(0)); + + assert(i->subOp != NV50_IR_SUBOP_MUL_HIGH); + + if (i->sType == TYPE_S32) + code[1] |= 3 << 25; + } else { + emitForm_21(i, 0x21c, 0xc1c); + + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) + code[1] |= 1 << 10; + if (i->sType == TYPE_S32) + code[1] |= 3 << 11; + } +} + +void +CodeEmitterGK110::emitFADD(const Instruction *i) +{ + if (isLIMM(i->src(1), TYPE_F32)) { + assert(i->rnd == ROUND_N); + assert(!i->saturate); + + emitForm_L(i, 0x400, 0, i->src(1).mod); + + FTZ_(3a); + NEG_(3b, 0); + ABS_(39, 0); + } else { + emitForm_21(i, 0x22c, 0xc2c); + + FTZ_(2f); + RND_(2a, F); + ABS_(31, 0); + NEG_(33, 0); + + if (code[0] & 0x1) { + modNegAbsF32_3b(i, 1); + } else { + ABS_(34, 1); + NEG_(30, 1); + } + } +} + +void +CodeEmitterGK110::emitUADD(const Instruction *i) +{ + uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg(); + + if (i->op == OP_SUB) + addOp ^= 1; + + assert(!i->src(0).mod.abs() && !i->src(1).mod.abs()); + + if (isLIMM(i->src(1), TYPE_S32)) { + emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0)); + + if (addOp & 2) + code[1] |= 1 << 27; + + assert(!i->defExists(1)); + assert(i->flagsSrc < 0); + + SAT_(39); + } else { + emitForm_21(i, 0x208, 0xc08); + + assert(addOp != 3); // would be add-plus-one + + code[1] |= addOp << 19; + + if (i->defExists(1)) + code[1] |= 1 << 18; // write carry + if (i->flagsSrc >= 0) + code[1] |= 1 << 14; // add carry + + SAT_(35); + } +} + +// TODO: shl-add +void +CodeEmitterGK110::emitIMAD(const Instruction *i) +{ + uint8_t addOp = + (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg()); + + emitForm_21(i, 0x100, 0xa00); + + assert(addOp != 3); + code[1] |= addOp << 26; + + if (i->sType == TYPE_S32) + code[1] |= (1 << 19) | (1 << 24); + + if (code[0] & 0x1) { + assert(!i->subOp); + SAT_(39); + } else { + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) + code[1] |= 1 << 25; + SAT_(35); + } +} + +void +CodeEmitterGK110::emitISAD(const Instruction *i) +{ + assert(i->dType == TYPE_S32 || i->dType == TYPE_U32); + + emitForm_21(i, 0x1fc, 0xb74); + + if (i->dType == TYPE_S32) + code[1] |= 1 << 19; +} + +void +CodeEmitterGK110::emitNOT(const Instruction *i) +{ + code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src + code[1] = 0x22003800; + + emitPredicate(i); + + defId(i->def(0), 2); + + switch (i->src(0).getFile()) { + case FILE_GPR: + code[1] |= 0xc << 28; + srcId(i->src(0), 23); + break; + case FILE_MEMORY_CONST: + code[1] |= 0x4 << 28; + setCAddress14(i->src(1)); + break; + default: + assert(0); + break; + } +} + +void +CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp) +{ + assert(!(i->src(0).mod & Modifier(NV50_IR_MOD_NOT))); // XXX: find me + + if (isLIMM(i->src(1), TYPE_S32)) { + emitForm_L(i, 0x200, 0, i->src(1).mod); + code[1] |= subOp << 24; + } else { + emitForm_21(i, 0x220, 0xc20); + code[1] |= subOp << 12; + NOT_(2b, 1); + } + assert(!(code[0] & 0x1) || !(i->src(1).mod & Modifier(NV50_IR_MOD_NOT))); +} + +void +CodeEmitterGK110::emitPOPC(const Instruction *i) +{ + assert(!isLIMM(i->src(1), TYPE_S32, true)); + + emitForm_21(i, 0x204, 0xc04); + + NOT_(2a, 0); + if (!(code[0] & 0x1)) + NOT_(2b, 1); +} + +void +CodeEmitterGK110::emitINSBF(const Instruction *i) +{ + emitForm_21(i, 0x1f8, 0xb78); +} + +void +CodeEmitterGK110::emitShift(const Instruction *i) +{ + const bool sar = i->op == OP_SHR && isSignedType(i->sType); + + if (sar) { + emitForm_21(i, 0x214, 0x014); + code[1] |= 1 << 19; + } else + if (i->op == OP_SHR) { + // this is actually RSHF + emitForm_21(i, 0x27c, 0x87c); + code[1] |= GK110_GPR_ZERO << 10; + } else { + // this is actually LSHF + emitForm_21(i, 0x1fc, 0xb7c); + code[1] |= GK110_GPR_ZERO << 10; + } + + if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP) { + if (!sar) + code[1] |= 1 << 21; + // XXX: find wrap modifier for SHR S32 + } +} + +void +CodeEmitterGK110::emitPreOp(const Instruction *i) +{ + emitForm_21(i, 0x248, -1); + + if (i->op == OP_PREEX2) + code[1] |= 1 << 10; + + NEG_(30, 0); + ABS_(34, 0); +} + +void +CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp) +{ + code[0] = 0x00000002 | (subOp << 23); + code[1] = 0x84000000; + + emitPredicate(i); + + defId(i->def(0), 2); + srcId(i->src(0), 10); + + NEG_(33, 0); + ABS_(31, 0); + + // XXX: find saturate +} + +void +CodeEmitterGK110::emitMINMAX(const Instruction *i) +{ + uint32_t op2, op1; + + switch (i->dType) { + case TYPE_U32: + case TYPE_S32: + op2 = 0x210; + op1 = 0xc10; + break; + case TYPE_F32: + op2 = 0x230; + op1 = 0xc30; + break; + case TYPE_F64: + op2 = 0x228; + op1 = 0xc28; + break; + default: + assert(0); + op2 = 0; + op1 = 0; + break; + } + emitForm_21(i, op2, op1); + + if (i->dType == TYPE_S32) + code[1] |= 1 << 19; + code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt + + FTZ_(2f); + ABS_(31, 0); + NEG_(33, 0); + if (code[0] & 0x1) { + modNegAbsF32_3b(i, 1); + } else { + ABS_(34, 1); + NEG_(30, 1); + } +} + +void +CodeEmitterGK110::emitCVT(const Instruction *i) +{ + const bool f2f = isFloatType(i->dType) && isFloatType(i->sType); + const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType); + const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType); + + bool sat = i->saturate; + bool abs = i->src(0).mod.abs(); + bool neg = i->src(0).mod.neg(); + + RoundMode rnd = i->rnd; + + switch (i->op) { + case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break; + case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break; + case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break; + case OP_SAT: sat = true; break; + case OP_NEG: neg = !neg; break; + case OP_ABS: abs = true; neg = false; break; + default: + break; + } + + uint32_t op; + + if (f2f) op = 0x254; + else if (f2i) op = 0x258; + else if (i2f) op = 0x25c; + else op = 0x260; + + emitForm_C(i, op, 0x2); + + FTZ_(2f); + if (neg) code[1] |= 1 << 16; + if (abs) code[1] |= 1 << 20; + if (sat) code[1] |= 1 << 21; + + emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1); + + code[0] |= typeSizeofLog2(i->dType) << 10; + code[0] |= typeSizeofLog2(i->sType) << 12; + + if (isSignedIntType(i->dType)) + code[0] |= 0x4000; + if (isSignedIntType(i->sType)) + code[0] |= 0x8000; +} + +void +CodeEmitterGK110::emitSET(const CmpInstruction *i) +{ + uint16_t op1, op2; + + if (i->def(0).getFile() == FILE_PREDICATE) { + switch (i->sType) { + case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break; + case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break; + default: + op2 = 0x1b0; + op1 = 0xb30; + break; + } + emitForm_21(i, op2, op1); + + NEG_(2e, 0); + ABS_(9, 0); + if (!(code[0] & 0x1)) { + NEG_(8, 1); + ABS_(2f, 1); + } else { + modNegAbsF32_3b(i, 1); + } + FTZ_(32); + + // normal DST field is negated predicate result + code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0); + if (i->defExists(1)) + defId(i->def(1), 2); + else + code[0] |= 0x1c; + } else { + switch (i->sType) { + case TYPE_F32: op2 = 0x000; op1 = 0x820; break; + case TYPE_F64: op2 = 0x080; op1 = 0x900; break; + default: + op2 = 0x1a8; + op1 = 0xb28; + break; + } + emitForm_21(i, op2, op1); + + NEG_(2e, 0); + ABS_(39, 0); + if (!(code[0] & 0x1)) { + NEG_(38, 1); + ABS_(2f, 1); + } else { + modNegAbsF32_3b(i, 1); + } + FTZ_(3a); + } + if (i->sType == TYPE_S32) + code[1] |= 1 << 19; + + if (i->op != OP_SET) { + switch (i->op) { + case OP_SET_AND: code[1] |= 0x0 << 16; break; + case OP_SET_OR: code[1] |= 0x1 << 16; break; + case OP_SET_XOR: code[1] |= 0x2 << 16; break; + default: + assert(0); + break; + } + srcId(i->src(2), 0x2a); + } else { + code[1] |= 0x7 << 10; + } + emitCondCode(i->setCond, + isFloatType(i->sType) ? 0x33 : 0x34, + isFloatType(i->sType) ? 0xf : 0x7); +} + +void +CodeEmitterGK110::emitSLCT(const CmpInstruction *i) +{ + CondCode cc = i->setCond; + if (i->src(2).mod.neg()) + cc = reverseCondCode(cc); + + if (i->dType == TYPE_F32) { + emitForm_21(i, 0x1d0, 0xb50); + FTZ_(32); + emitCondCode(cc, 0x33, 0xf); + } else { + emitForm_21(i, 0x1a4, 0xb20); + emitCondCode(cc, 0x34, 0x7); + } +} + +void CodeEmitterGK110::emitSELP(const Instruction *i) +{ + emitForm_21(i, 0x250, 0x050); + + if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT))) + code[1] |= 1 << 13; +} + +void CodeEmitterGK110::emitTEXBAR(const Instruction *i) +{ + code[0] = 0x00000002 | (i->subOp << 23); + code[1] = 0x77000000; + + emitPredicate(i); +} + +void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i) +{ + emitNOP(i); // TODO +} + +static inline bool +isNextIndependentTex(const TexInstruction *i) +{ + if (!i->next || !isTextureOp(i->next->op)) + return false; + if (i->getDef(0)->interfers(i->next->getSrc(0))) + return false; + return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1)); +} + +void +CodeEmitterGK110::emitTEX(const TexInstruction *i) +{ + const bool ind = i->tex.rIndirectSrc >= 0; + + if (ind) { + code[0] = 0x00000002; + switch (i->op) { + case OP_TXD: + code[1] = 0x7e000000; + break; + default: + code[1] = 0x7d800000; + break; + } + } else { + switch (i->op) { + case OP_TXD: + code[0] = 0x00000002; + code[1] = 0x76000000; + break; + default: + code[0] = 0x00000001; + code[1] = 0x60000000; + break; + } + code[1] |= i->tex.r << 15; + } + + code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode + + // if (i->tex.liveOnly) + // ? + + switch (i->op) { + case OP_TEX: break; + case OP_TXB: code[1] |= 0x2000; break; + case OP_TXL: code[1] |= 0x3000; break; + case OP_TXF: break; // XXX + case OP_TXG: break; // XXX + case OP_TXD: break; + default: + assert(!"invalid texture op"); + break; + } + /* + if (i->op == OP_TXF) { + if (!i->tex.levelZero) + code[1] |= 0x02000000; + } else */ + if (i->tex.levelZero) { + code[1] |= 0x1000; + } + + // if (i->op != OP_TXD && i->tex.derivAll) + // code[1] |= 1 << 13; + + emitPredicate(i); + + code[1] |= i->tex.mask << 2; + + const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) + + defId(i->def(0), 2); + srcId(i->src(0), 10); + srcId(i, src1, 23); + + // if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5; + + // texture target: + code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7; + if (i->tex.target.isArray()) + code[1] |= 0x40; + // if (i->tex.target.isShadow()) + // ? + // if (i->tex.target == TEX_TARGET_2D_MS || + // i->tex.target == TEX_TARGET_2D_MS_ARRAY) + // ? + + if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) { + // ? + } + + // if (i->tex.useOffsets) + // ? +} + +void +CodeEmitterGK110::emitTXQ(const TexInstruction *i) +{ + emitNOP(i); // TODO +} + +void +CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask) +{ + emitNOP(i); // TODO +} + +void +CodeEmitterGK110::emitFlow(const Instruction *i) +{ + const FlowInstruction *f = i->asFlow(); + + unsigned mask; // bit 0: predicate, bit 1: target + + code[0] = 0x00000000; + + switch (i->op) { + case OP_BRA: + code[1] = f->absolute ? 0x00000 : 0x12000000; // XXX + // if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) + // code[0] |= 0x4000; + mask = 3; + break; + case OP_CALL: + code[1] = f->absolute ? 0x00000 : 0x13000000; // XXX + // if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) + // code[0] |= 0x4000; + mask = 2; + break; + + case OP_EXIT: code[1] = 0x18000000; mask = 1; break; + case OP_RET: code[1] = 0x19000000; mask = 1; break; + case OP_DISCARD: code[1] = 0x19800000; mask = 1; break; // XXX: guess + case OP_BREAK: code[1] = 0x1a800000; mask = 1; break; // XXX: guess + case OP_CONT: code[1] = 0x1b000000; mask = 1; break; // XXX: guess + + case OP_JOINAT: code[1] = 0x14800000; mask = 2; break; + case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break; // XXX: guess + case OP_PRECONT: code[1] = 0x15800000; mask = 2; break; // XXX: guess + case OP_PRERET: code[1] = 0x16000000; mask = 2; break; // XXX: guess + + case OP_QUADON: code[1] = 0x1c000000; mask = 0; break; // XXX: guess + case OP_QUADPOP: code[1] = 0x1c800000; mask = 0; break; // XXX: guess + case OP_BRKPT: code[1] = 0x1d000000; mask = 0; break; // XXX: guess + default: + assert(!"invalid flow operation"); + return; + } + + if (mask & 1) { + emitPredicate(i); + if (i->flagsSrc < 0) + code[0] |= 0x3c; + } + + if (!f) + return; + + // TODO + /* + if (f->allWarp) + code[0] |= 1 << 15; + if (f->limit) + code[0] |= 1 << 16; + */ + + if (f->op == OP_CALL) { + if (f->builtin) { + assert(f->absolute); + uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin); + addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23); + addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9); + } else { + assert(!f->absolute); + int32_t pcRel = f->target.fn->binPos - (codeSize + 8); + code[0] |= (pcRel & 0x1ff) << 23; + code[1] |= (pcRel >> 9) & 0x7fff; + } + } else + if (mask & 2) { + int32_t pcRel = f->target.bb->binPos - (codeSize + 8); + // currently we don't want absolute branches + assert(!f->absolute); + code[0] |= (pcRel & 0x1ff) << 23; + code[1] |= (pcRel >> 9) & 0x7fff; + } +} + +void +CodeEmitterGK110::emitPFETCH(const Instruction *i) +{ + emitNOP(i); // TODO +} + +void +CodeEmitterGK110::emitVFETCH(const Instruction *i) +{ + uint32_t offset = i->src(0).get()->reg.data.offset; + + code[0] = 0x00000002 | (offset << 23); + code[1] = 0x7ec00000 | (offset >> 9); + +#if 0 + if (i->perPatch) + code[0] |= 0x100; + if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT) + code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads +#endif + + emitPredicate(i); + + defId(i->def(0), 2); + srcId(i->src(0).getIndirect(0), 10); + srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address +} + +void +CodeEmitterGK110::emitEXPORT(const Instruction *i) +{ + uint32_t offset = i->src(0).get()->reg.data.offset; + + code[0] = 0x00000002 | (offset << 23); + code[1] = 0x7f000000 | (offset >> 9); + +#if 0 + if (i->perPatch) + code[0] |= 0x100; +#endif + + emitPredicate(i); + + assert(i->src(1).getFile() == FILE_GPR); + + srcId(i->src(0).getIndirect(0), 10); + srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address + srcId(i->src(1), 2); +} + +void +CodeEmitterGK110::emitOUT(const Instruction *i) +{ + emitNOP(i); // TODO +} + +void +CodeEmitterGK110::emitInterpMode(const Instruction *i) +{ + code[1] |= i->ipa << 21; // TODO: INTERP_SAMPLEID +} + +void +CodeEmitterGK110::emitINTERP(const Instruction *i) +{ + const uint32_t base = i->getSrc(0)->reg.data.offset; + + code[0] = 0x00000002 | (base << 31); + code[1] = 0x74800000 | (base >> 1); + + if (i->saturate) + code[1] |= 1 << 18; + + if (i->op == OP_PINTERP) + srcId(i->src(1), 23); + else + code[0] |= 0xff << 23; + + srcId(i->src(0).getIndirect(0), 10); + emitInterpMode(i); + + emitPredicate(i); + defId(i->def(0), 2); + + if (i->getSampleMode() == NV50_IR_INTERP_OFFSET) + srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10); + else + code[1] |= 0xff << 10; +} + +void +CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos) +{ + uint8_t n; + + switch (ty) { + case TYPE_U8: + n = 0; + break; + case TYPE_S8: + n = 1; + break; + case TYPE_U16: + n = 2; + break; + case TYPE_S16: + n = 3; + break; + case TYPE_F32: + case TYPE_U32: + case TYPE_S32: + n = 4; + break; + case TYPE_F64: + case TYPE_U64: + case TYPE_S64: + n = 5; + break; + case TYPE_B128: + n = 6; + break; + default: + n = 0; + assert(!"invalid ld/st type"); + break; + } + code[pos / 32] |= n << (pos % 32); +} + +void +CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos) +{ + uint8_t n; + + switch (c) { + case CACHE_CA: +// case CACHE_WB: + n = 0; + break; + case CACHE_CG: + n = 1; + break; + case CACHE_CS: + n = 2; + break; + case CACHE_CV: +// case CACHE_WT: + n = 3; + break; + default: + n = 0; + assert(!"invalid caching mode"); + break; + } + code[pos / 32] |= n << (pos % 32); +} + +void +CodeEmitterGK110::emitSTORE(const Instruction *i) +{ + int32_t offset = SDATA(i->src(0)).offset; + + switch (i->src(0).getFile()) { + case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break; + case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break; + case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break; + default: + assert(!"invalid memory file"); + break; + } + + if (i->src(0).getFile() != FILE_MEMORY_GLOBAL) + offset &= 0xffffff; + + if (code[0] & 0x2) { + emitLoadStoreType(i->dType, 0x33); + if (i->src(0).getFile() == FILE_MEMORY_LOCAL) + emitCachingMode(i->cache, 0x2f); + } else { + emitLoadStoreType(i->dType, 0x38); + emitCachingMode(i->cache, 0x3b); + } + code[0] |= offset << 23; + code[1] |= offset >> 9; + + emitPredicate(i); + + srcId(i->src(1), 2); + srcId(i->src(0).getIndirect(0), 10); +} + +void +CodeEmitterGK110::emitLOAD(const Instruction *i) +{ + int32_t offset = SDATA(i->src(0)).offset; + + switch (i->src(0).getFile()) { + case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break; + case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break; + case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break; + case FILE_MEMORY_CONST: + if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) { + emitMOV(i); + return; + } + offset &= 0xffff; + code[0] = 0x00000002; + code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7); + break; + default: + assert(!"invalid memory file"); + break; + } + + if (code[0] & 0x2) { + offset &= 0xffffff; + emitLoadStoreType(i->dType, 0x33); + if (i->src(0).getFile() == FILE_MEMORY_LOCAL) + emitCachingMode(i->cache, 0x2f); + } else { + emitLoadStoreType(i->dType, 0x38); + emitCachingMode(i->cache, 0x3b); + } + code[0] |= offset << 23; + code[1] |= offset >> 9; + + emitPredicate(i); + + defId(i->def(0), 2); + srcId(i->src(0).getIndirect(0), 10); +} + +uint8_t +CodeEmitterGK110::getSRegEncoding(const ValueRef& ref) +{ + switch (SDATA(ref).sv.sv) { + case SV_LANEID: return 0x00; + case SV_PHYSID: return 0x03; + case SV_VERTEX_COUNT: return 0x10; + case SV_INVOCATION_ID: return 0x11; + case SV_YDIR: return 0x12; + case SV_TID: return 0x21 + SDATA(ref).sv.index; + case SV_CTAID: return 0x25 + SDATA(ref).sv.index; + case SV_NTID: return 0x29 + SDATA(ref).sv.index; + case SV_GRIDID: return 0x2c; + case SV_NCTAID: return 0x2d + SDATA(ref).sv.index; + case SV_LBASE: return 0x34; + case SV_SBASE: return 0x30; + case SV_CLOCK: return 0x50 + SDATA(ref).sv.index; + default: + assert(!"no sreg for system value"); + return 0; + } +} + +void +CodeEmitterGK110::emitMOV(const Instruction *i) +{ + if (i->src(0).getFile() == FILE_SYSTEM_VALUE) { + code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23); + code[1] = 0x86400000; + emitPredicate(i); + defId(i->def(0), 2); + } else + if (i->src(0).getFile() == FILE_IMMEDIATE) { + code[0] = 0x00000002 | (i->lanes << 14); + code[1] = 0x74000000; + emitPredicate(i); + defId(i->def(0), 2); + setImmediate32(i, 0, Modifier(0)); + } else + if (i->src(0).getFile() == FILE_PREDICATE) { + // TODO + } else { + emitForm_C(i, 0x24c, 2); + code[1] |= i->lanes << 10; + } +} + +bool +CodeEmitterGK110::emitInstruction(Instruction *insn) +{ + const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8; + + if (insn->encSize != 8) { + ERROR("skipping unencodable instruction: "); + insn->print(); + return false; + } else + if (codeSize + size > codeSizeLimit) { + ERROR("code emitter output buffer too small\n"); + return false; + } + + if (writeIssueDelays) { + int id = (codeSize & 0x3f) / 8 - 1; + if (id < 0) { + id += 1; + code[0] = 0x00000000; // cf issue delay "instruction" + code[1] = 0x08000000; + code += 2; + codeSize += 8; + } + uint32_t *data = code - (id * 2 + 2); + + switch (id) { + case 0: data[0] |= insn->sched << 2; break; + case 1: data[0] |= insn->sched << 10; break; + case 2: data[0] |= insn->sched << 18; break; + case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break; + case 4: data[1] |= insn->sched << 2; + case 5: data[1] |= insn->sched << 10; break; + case 6: data[1] |= insn->sched << 18; break; + default: + assert(0); + break; + } + } + + // assert that instructions with multiple defs don't corrupt registers + for (int d = 0; insn->defExists(d); ++d) + assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0); + + switch (insn->op) { + case OP_MOV: + case OP_RDSV: + emitMOV(insn); + break; + case OP_NOP: + break; + case OP_LOAD: + emitLOAD(insn); + break; + case OP_STORE: + emitSTORE(insn); + break; + case OP_LINTERP: + case OP_PINTERP: + emitINTERP(insn); + break; + case OP_VFETCH: + emitVFETCH(insn); + break; + case OP_EXPORT: + emitEXPORT(insn); + break; + case OP_PFETCH: + emitPFETCH(insn); + break; + case OP_EMIT: + case OP_RESTART: + emitOUT(insn); + break; + case OP_ADD: + case OP_SUB: + if (isFloatType(insn->dType)) + emitFADD(insn); + else + emitUADD(insn); + break; + case OP_MUL: + if (isFloatType(insn->dType)) + emitFMUL(insn); + else + emitIMUL(insn); + break; + case OP_MAD: + case OP_FMA: + if (isFloatType(insn->dType)) + emitFMAD(insn); + else + emitIMAD(insn); + break; + case OP_SAD: + emitISAD(insn); + break; + case OP_NOT: + emitNOT(insn); + break; + case OP_AND: + emitLogicOp(insn, 0); + break; + case OP_OR: + emitLogicOp(insn, 1); + break; + case OP_XOR: + emitLogicOp(insn, 2); + break; + case OP_SHL: + case OP_SHR: + emitShift(insn); + break; + case OP_SET: + case OP_SET_AND: + case OP_SET_OR: + case OP_SET_XOR: + emitSET(insn->asCmp()); + break; + case OP_SELP: + emitSELP(insn); + break; + case OP_SLCT: + emitSLCT(insn->asCmp()); + break; + case OP_MIN: + case OP_MAX: + emitMINMAX(insn); + break; + case OP_ABS: + case OP_NEG: + case OP_CEIL: + case OP_FLOOR: + case OP_TRUNC: + case OP_CVT: + case OP_SAT: + emitCVT(insn); + break; + case OP_RSQ: + emitSFnOp(insn, 5); + break; + case OP_RCP: + emitSFnOp(insn, 4); + break; + case OP_LG2: + emitSFnOp(insn, 3); + break; + case OP_EX2: + emitSFnOp(insn, 2); + break; + case OP_SIN: + emitSFnOp(insn, 1); + break; + case OP_COS: + emitSFnOp(insn, 0); + break; + case OP_PRESIN: + case OP_PREEX2: + emitPreOp(insn); + break; + case OP_TEX: + case OP_TXB: + case OP_TXL: + case OP_TXD: + case OP_TXF: + emitTEX(insn->asTex()); + break; + case OP_TXQ: + emitTXQ(insn->asTex()); + break; + case OP_TEXBAR: + emitTEXBAR(insn); + break; + case OP_BRA: + case OP_CALL: + case OP_PRERET: + case OP_RET: + case OP_DISCARD: + case OP_EXIT: + case OP_PRECONT: + case OP_CONT: + case OP_PREBREAK: + case OP_BREAK: + case OP_JOINAT: + case OP_BRKPT: + case OP_QUADON: + case OP_QUADPOP: + emitFlow(insn); + break; + case OP_QUADOP: + emitQUADOP(insn, insn->subOp, insn->lanes); + break; + case OP_DFDX: + emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4); + break; + case OP_DFDY: + emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5); + break; + case OP_POPCNT: + emitPOPC(insn); + break; + case OP_JOIN: + emitNOP(insn); + insn->join = 1; + break; + case OP_PHI: + case OP_UNION: + case OP_CONSTRAINT: + ERROR("operation should have been eliminated"); + return false; + case OP_EXP: + case OP_LOG: + case OP_SQRT: + case OP_POW: + ERROR("operation should have been lowered\n"); + return false; + default: + ERROR("unknow op\n"); + return false; + } + + if (insn->join) + code[0] |= 1 << 22; + + code += 2; + codeSize += 8; + return true; +} + +uint32_t +CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const +{ + // No more short instruction encodings. + return 8; +} + +void +CodeEmitterGK110::prepareEmission(Function *func) +{ + const Target *targ = func->getProgram()->getTarget(); + + CodeEmitter::prepareEmission(func); + + if (targ->hasSWSched) + calculateSchedDataNVC0(targ, func); +} + +CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target) + : CodeEmitter(target), + targNVC0(target), + writeIssueDelays(target->hasSWSched) +{ + code = NULL; + codeSize = codeSizeLimit = 0; + relocInfo = NULL; +} + +CodeEmitter * +TargetNVC0::createCodeEmitterGK110(Program::Type type) +{ + CodeEmitterGK110 *emit = new CodeEmitterGK110(this); + emit->setProgramType(type); + return emit; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp new file mode 100644 index 00000000000..3eca27d0bbc --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -0,0 +1,1962 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_target_nv50.h" + +namespace nv50_ir { + +#define NV50_OP_ENC_LONG 0 +#define NV50_OP_ENC_SHORT 1 +#define NV50_OP_ENC_IMM 2 +#define NV50_OP_ENC_LONG_ALT 3 + +class CodeEmitterNV50 : public CodeEmitter +{ +public: + CodeEmitterNV50(const TargetNV50 *); + + virtual bool emitInstruction(Instruction *); + + virtual uint32_t getMinEncodingSize(const Instruction *) const; + + inline void setProgramType(Program::Type pType) { progType = pType; } + + virtual void prepareEmission(Function *); + +private: + Program::Type progType; + + const TargetNV50 *targNV50; + +private: + inline void defId(const ValueDef&, const int pos); + inline void srcId(const ValueRef&, const int pos); + inline void srcId(const ValueRef *, const int pos); + + inline void srcAddr16(const ValueRef&, bool adj, const int pos); + inline void srcAddr8(const ValueRef&, const int pos); + + void emitFlagsRd(const Instruction *); + void emitFlagsWr(const Instruction *); + + void emitCondCode(CondCode cc, DataType ty, int pos); + + inline void setARegBits(unsigned int); + + void setAReg16(const Instruction *, int s); + void setImmediate(const Instruction *, int s); + + void setDst(const Value *); + void setDst(const Instruction *, int d); + void setSrcFileBits(const Instruction *, int enc); + void setSrc(const Instruction *, unsigned int s, int slot); + + void emitForm_MAD(const Instruction *); + void emitForm_ADD(const Instruction *); + void emitForm_MUL(const Instruction *); + void emitForm_IMM(const Instruction *); + + void emitLoadStoreSizeLG(DataType ty, int pos); + void emitLoadStoreSizeCS(DataType ty); + + void roundMode_MAD(const Instruction *); + void roundMode_CVT(RoundMode); + + void emitMNeg12(const Instruction *); + + void emitLOAD(const Instruction *); + void emitSTORE(const Instruction *); + void emitMOV(const Instruction *); + void emitNOP(); + void emitINTERP(const Instruction *); + void emitPFETCH(const Instruction *); + void emitOUT(const Instruction *); + + void emitUADD(const Instruction *); + void emitAADD(const Instruction *); + void emitFADD(const Instruction *); + void emitIMUL(const Instruction *); + void emitFMUL(const Instruction *); + void emitFMAD(const Instruction *); + void emitIMAD(const Instruction *); + void emitISAD(const Instruction *); + + void emitMINMAX(const Instruction *); + + void emitPreOp(const Instruction *); + void emitSFnOp(const Instruction *, uint8_t subOp); + + void emitShift(const Instruction *); + void emitARL(const Instruction *, unsigned int shl); + void emitLogicOp(const Instruction *); + void emitNOT(const Instruction *); + + void emitCVT(const Instruction *); + void emitSET(const Instruction *); + + void emitTEX(const TexInstruction *); + void emitTXQ(const TexInstruction *); + void emitTEXPREP(const TexInstruction *); + + void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp); + + void emitFlow(const Instruction *, uint8_t flowOp); + void emitPRERETEmu(const FlowInstruction *); + void emitBAR(const Instruction *); + + void emitATOM(const Instruction *); +}; + +#define SDATA(a) ((a).rep()->reg.data) +#define DDATA(a) ((a).rep()->reg.data) + +void CodeEmitterNV50::srcId(const ValueRef& src, const int pos) +{ + assert(src.get()); + code[pos / 32] |= SDATA(src).id << (pos % 32); +} + +void CodeEmitterNV50::srcId(const ValueRef *src, const int pos) +{ + assert(src->get()); + code[pos / 32] |= SDATA(*src).id << (pos % 32); +} + +void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos) +{ + assert(src.get()); + + int32_t offset = SDATA(src).offset; + + assert(!adj || src.get()->reg.size <= 4); + if (adj) + offset /= src.get()->reg.size; + + assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16); + + if (offset < 0) + offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff; + + code[pos / 32] |= offset << (pos % 32); +} + +void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos) +{ + assert(src.get()); + + uint32_t offset = SDATA(src).offset; + + assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3)); + + code[pos / 32] |= (offset >> 2) << (pos % 32); +} + +void CodeEmitterNV50::defId(const ValueDef& def, const int pos) +{ + assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT); + + code[pos / 32] |= DDATA(def).id << (pos % 32); +} + +void +CodeEmitterNV50::roundMode_MAD(const Instruction *insn) +{ + switch (insn->rnd) { + case ROUND_M: code[1] |= 1 << 22; break; + case ROUND_P: code[1] |= 2 << 22; break; + case ROUND_Z: code[1] |= 3 << 22; break; + default: + assert(insn->rnd == ROUND_N); + break; + } +} + +void +CodeEmitterNV50::emitMNeg12(const Instruction *i) +{ + code[1] |= i->src(0).mod.neg() << 26; + code[1] |= i->src(1).mod.neg() << 27; +} + +void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos) +{ + uint8_t enc; + + assert(pos >= 32 || pos <= 27); + + switch (cc) { + case CC_LT: enc = 0x1; break; + case CC_LTU: enc = 0x9; break; + case CC_EQ: enc = 0x2; break; + case CC_EQU: enc = 0xa; break; + case CC_LE: enc = 0x3; break; + case CC_LEU: enc = 0xb; break; + case CC_GT: enc = 0x4; break; + case CC_GTU: enc = 0xc; break; + case CC_NE: enc = 0x5; break; + case CC_NEU: enc = 0xd; break; + case CC_GE: enc = 0x6; break; + case CC_GEU: enc = 0xe; break; + case CC_TR: enc = 0xf; break; + case CC_FL: enc = 0x0; break; + + case CC_O: enc = 0x10; break; + case CC_C: enc = 0x11; break; + case CC_A: enc = 0x12; break; + case CC_S: enc = 0x13; break; + case CC_NS: enc = 0x1c; break; + case CC_NA: enc = 0x1d; break; + case CC_NC: enc = 0x1e; break; + case CC_NO: enc = 0x1f; break; + + default: + enc = 0; + assert(!"invalid condition code"); + break; + } + if (ty != TYPE_NONE && !isFloatType(ty)) + enc &= ~0x8; // unordered only exists for float types + + code[pos / 32] |= enc << (pos % 32); +} + +void +CodeEmitterNV50::emitFlagsRd(const Instruction *i) +{ + int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc; + + assert(!(code[1] & 0x00003f80)); + + if (s >= 0) { + assert(i->getSrc(s)->reg.file == FILE_FLAGS); + emitCondCode(i->cc, TYPE_NONE, 32 + 7); + srcId(i->src(s), 32 + 12); + } else { + code[1] |= 0x0780; + } +} + +void +CodeEmitterNV50::emitFlagsWr(const Instruction *i) +{ + assert(!(code[1] & 0x70)); + + int flagsDef = i->flagsDef; + + // find flags definition and check that it is the last def + if (flagsDef < 0) { + for (int d = 0; i->defExists(d); ++d) + if (i->def(d).getFile() == FILE_FLAGS) + flagsDef = d; + if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point + WARN("Instruction::flagsDef was not set properly\n"); + } + if (flagsDef == 0 && i->defExists(1)) + WARN("flags def should not be the primary definition\n"); + + if (flagsDef >= 0) + code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40; + +} + +void +CodeEmitterNV50::setARegBits(unsigned int u) +{ + code[0] |= (u & 3) << 26; + code[1] |= (u & 4); +} + +void +CodeEmitterNV50::setAReg16(const Instruction *i, int s) +{ + if (i->srcExists(s)) { + s = i->src(s).indirect[0]; + if (s >= 0) + setARegBits(SDATA(i->src(s)).id + 1); + } +} + +void +CodeEmitterNV50::setImmediate(const Instruction *i, int s) +{ + const ImmediateValue *imm = i->src(s).get()->asImm(); + assert(imm); + + uint32_t u = imm->reg.data.u32; + + if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) + u = ~u; + + code[1] |= 3; + code[0] |= (u & 0x3f) << 16; + code[1] |= (u >> 6) << 2; +} + +void +CodeEmitterNV50::setDst(const Value *dst) +{ + const Storage *reg = &dst->join->reg; + + assert(reg->file != FILE_ADDRESS); + + if (reg->data.id < 0 || reg->file == FILE_FLAGS) { + code[0] |= (127 << 2) | 1; + code[1] |= 8; + } else { + int id; + if (reg->file == FILE_SHADER_OUTPUT) { + code[1] |= 8; + id = reg->data.offset / 4; + } else { + id = reg->data.id; + } + code[0] |= id << 2; + } +} + +void +CodeEmitterNV50::setDst(const Instruction *i, int d) +{ + if (i->defExists(d)) { + setDst(i->getDef(d)); + } else + if (!d) { + code[0] |= 0x01fc; // bit bucket + code[1] |= 0x0008; + } +} + +// 3 * 2 bits: +// 0: r +// 1: a/s +// 2: c +// 3: i +void +CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc) +{ + uint8_t mode = 0; + + for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) { + switch (i->src(s).getFile()) { + case FILE_GPR: + break; + case FILE_MEMORY_SHARED: + case FILE_SHADER_INPUT: + mode |= 1 << (s * 2); + break; + case FILE_MEMORY_CONST: + mode |= 2 << (s * 2); + break; + case FILE_IMMEDIATE: + mode |= 3 << (s * 2); + break; + default: + ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile()); + assert(0); + break; + } + } + switch (mode) { + case 0x00: // rrr + break; + case 0x01: // arr/grr + if (progType == Program::TYPE_GEOMETRY) { + code[0] |= 0x01800000; + if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT) + code[1] |= 0x00200000; + } else { + if (enc == NV50_OP_ENC_SHORT) + code[0] |= 0x01000000; + else + code[1] |= 0x00200000; + } + break; + case 0x03: // irr + assert(i->op == OP_MOV); + return; + case 0x0c: // rir + break; + case 0x0d: // gir + code[0] |= 0x01000000; + assert(progType == Program::TYPE_GEOMETRY || + progType == Program::TYPE_COMPUTE); + break; + case 0x08: // rcr + code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000; + code[1] |= (i->getSrc(1)->reg.fileIndex << 22); + break; + case 0x09: // acr/gcr + if (progType == Program::TYPE_GEOMETRY) { + code[0] |= 0x01800000; + } else { + code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000; + code[1] |= 0x00200000; + } + code[1] |= (i->getSrc(1)->reg.fileIndex << 22); + break; + case 0x20: // rrc + code[0] |= 0x01000000; + code[1] |= (i->getSrc(2)->reg.fileIndex << 22); + break; + case 0x21: // arc + code[0] |= 0x01000000; + code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22); + assert(progType != Program::TYPE_GEOMETRY); + break; + default: + ERROR("not encodable: %x\n", mode); + assert(0); + break; + } + if (progType != Program::TYPE_COMPUTE) + return; + + if ((mode & 3) == 1) { + const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14; + + switch (i->getSrc(0)->reg.type) { + case TYPE_U8: + break; + case TYPE_U16: + code[0] |= 1 << pos; + break; + case TYPE_S16: + code[0] |= 2 << pos; + break; + default: + code[0] |= 3 << pos; + assert(i->getSrc(0)->reg.size == 4); + break; + } + } +} + +void +CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot) +{ + if (Target::operationSrcNr[i->op] <= s) + return; + const Storage *reg = &i->src(s).rep()->reg; + + unsigned int id = (reg->file == FILE_GPR) ? + reg->data.id : + reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here + + switch (slot) { + case 0: code[0] |= id << 9; break; + case 1: code[0] |= id << 16; break; + case 2: code[1] |= id << 14; break; + default: + assert(0); + break; + } +} + +// the default form: +// - long instruction +// - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr) +// - address & flags +void +CodeEmitterNV50::emitForm_MAD(const Instruction *i) +{ + assert(i->encSize == 8); + code[0] |= 1; + + emitFlagsRd(i); + emitFlagsWr(i); + + setDst(i, 0); + + setSrcFileBits(i, NV50_OP_ENC_LONG); + setSrc(i, 0, 0); + setSrc(i, 1, 1); + setSrc(i, 2, 2); + + setAReg16(i, 1); +} + +// like default form, but 2nd source in slot 2, and no 3rd source +void +CodeEmitterNV50::emitForm_ADD(const Instruction *i) +{ + assert(i->encSize == 8); + code[0] |= 1; + + emitFlagsRd(i); + emitFlagsWr(i); + + setDst(i, 0); + + setSrcFileBits(i, NV50_OP_ENC_LONG_ALT); + setSrc(i, 0, 0); + setSrc(i, 1, 2); + + setAReg16(i, 1); +} + +// default short form (rr, ar, rc, gr) +void +CodeEmitterNV50::emitForm_MUL(const Instruction *i) +{ + assert(i->encSize == 4 && !(code[0] & 1)); + assert(i->defExists(0)); + assert(!i->getPredicate()); + + setDst(i, 0); + + setSrcFileBits(i, NV50_OP_ENC_SHORT); + setSrc(i, 0, 0); + setSrc(i, 1, 1); +} + +// usual immediate form +// - 1 to 3 sources where last is immediate (rir, gir) +// - no address or predicate possible +void +CodeEmitterNV50::emitForm_IMM(const Instruction *i) +{ + assert(i->encSize == 8); + code[0] |= 1; + + assert(i->defExists(0) && i->srcExists(0)); + + setDst(i, 0); + + setSrcFileBits(i, NV50_OP_ENC_IMM); + if (Target::operationSrcNr[i->op] > 1) { + setSrc(i, 0, 0); + setImmediate(i, 1); + setSrc(i, 2, 1); + } else { + setImmediate(i, 0); + } +} + +void +CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos) +{ + uint8_t enc; + + switch (ty) { + case TYPE_F32: // fall through + case TYPE_S32: // fall through + case TYPE_U32: enc = 0x6; break; + case TYPE_B128: enc = 0x5; break; + case TYPE_F64: // fall through + case TYPE_S64: // fall through + case TYPE_U64: enc = 0x4; break; + case TYPE_S16: enc = 0x3; break; + case TYPE_U16: enc = 0x2; break; + case TYPE_S8: enc = 0x1; break; + case TYPE_U8: enc = 0x0; break; + default: + enc = 0; + assert(!"invalid load/store type"); + break; + } + code[pos / 32] |= enc << (pos % 32); +} + +void +CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty) +{ + switch (ty) { + case TYPE_U8: break; + case TYPE_U16: code[1] |= 0x4000; break; + case TYPE_S16: code[1] |= 0x8000; break; + case TYPE_F32: + case TYPE_S32: + case TYPE_U32: code[1] |= 0xc000; break; + default: + assert(0); + break; + } +} + +void +CodeEmitterNV50::emitLOAD(const Instruction *i) +{ + DataFile sf = i->src(0).getFile(); + int32_t offset = i->getSrc(0)->reg.data.offset; + + switch (sf) { + case FILE_SHADER_INPUT: + // use 'mov' where we can + code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001; + code[1] = 0x00200000 | (i->lanes << 14); + if (typeSizeof(i->dType) == 4) + code[1] |= 0x04000000; + break; + case FILE_MEMORY_SHARED: + if (targ->getChipset() >= 0x84) { + assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType))); + code[0] = 0x10000001; + code[1] = 0x40000000; + + if (typeSizeof(i->dType) == 4) + code[1] |= 0x04000000; + + emitLoadStoreSizeCS(i->sType); + } else { + assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType))); + code[0] = 0x10000001; + code[1] = 0x00200000 | (i->lanes << 14); + emitLoadStoreSizeCS(i->sType); + } + break; + case FILE_MEMORY_CONST: + code[0] = 0x10000001; + code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22); + if (typeSizeof(i->dType) == 4) + code[1] |= 0x04000000; + emitLoadStoreSizeCS(i->sType); + break; + case FILE_MEMORY_LOCAL: + code[0] = 0xd0000001; + code[1] = 0x40000000; + break; + case FILE_MEMORY_GLOBAL: + code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); + code[1] = 0x80000000; + break; + default: + assert(!"invalid load source file"); + break; + } + if (sf == FILE_MEMORY_LOCAL || + sf == FILE_MEMORY_GLOBAL) + emitLoadStoreSizeLG(i->sType, 21 + 32); + + setDst(i, 0); + + emitFlagsRd(i); + emitFlagsWr(i); + + if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { + srcId(*i->src(0).getIndirect(0), 9); + } else { + setAReg16(i, 0); + srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9); + } +} + +void +CodeEmitterNV50::emitSTORE(const Instruction *i) +{ + DataFile f = i->getSrc(0)->reg.file; + int32_t offset = i->getSrc(0)->reg.data.offset; + + switch (f) { + case FILE_SHADER_OUTPUT: + code[0] = 0x00000001 | ((offset >> 2) << 9); + code[1] = 0x80c00000; + srcId(i->src(1), 32 + 14); + break; + case FILE_MEMORY_GLOBAL: + code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); + code[1] = 0xa0000000; + emitLoadStoreSizeLG(i->dType, 21 + 32); + srcId(i->src(1), 2); + break; + case FILE_MEMORY_LOCAL: + code[0] = 0xd0000001; + code[1] = 0x60000000; + emitLoadStoreSizeLG(i->dType, 21 + 32); + srcId(i->src(1), 2); + break; + case FILE_MEMORY_SHARED: + code[0] = 0x00000001; + code[1] = 0xe0000000; + switch (typeSizeof(i->dType)) { + case 1: + code[0] |= offset << 9; + code[1] |= 0x00400000; + break; + case 2: + code[0] |= (offset >> 1) << 9; + break; + case 4: + code[0] |= (offset >> 2) << 9; + code[1] |= 0x04200000; + break; + default: + assert(0); + break; + } + srcId(i->src(1), 32 + 14); + break; + default: + assert(!"invalid store destination file"); + break; + } + + if (f == FILE_MEMORY_GLOBAL) + srcId(*i->src(0).getIndirect(0), 9); + else + setAReg16(i, 0); + + if (f == FILE_MEMORY_LOCAL) + srcAddr16(i->src(0), false, 9); + + emitFlagsRd(i); +} + +void +CodeEmitterNV50::emitMOV(const Instruction *i) +{ + DataFile sf = i->getSrc(0)->reg.file; + DataFile df = i->getDef(0)->reg.file; + + assert(sf == FILE_GPR || df == FILE_GPR); + + if (sf == FILE_FLAGS) { + code[0] = 0x00000001; + code[1] = 0x20000000; + defId(i->def(0), 2); + srcId(i->src(0), 12); + emitFlagsRd(i); + } else + if (sf == FILE_ADDRESS) { + code[0] = 0x00000001; + code[1] = 0x40000000; + defId(i->def(0), 2); + setARegBits(SDATA(i->src(0)).id + 1); + emitFlagsRd(i); + } else + if (df == FILE_FLAGS) { + code[0] = 0x00000001; + code[1] = 0xa0000000; + defId(i->def(0), 4); + srcId(i->src(0), 9); + emitFlagsRd(i); + } else + if (sf == FILE_IMMEDIATE) { + code[0] = 0x10008001; + code[1] = 0x00000003; + emitForm_IMM(i); + } else { + if (i->encSize == 4) { + code[0] = 0x10008000; + } else { + code[0] = 0x10000001; + code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000; + code[1] |= (i->lanes << 14); + emitFlagsRd(i); + } + defId(i->def(0), 2); + srcId(i->src(0), 9); + } + if (df == FILE_SHADER_OUTPUT) { + assert(i->encSize == 8); + code[1] |= 0x8; + } +} + +void +CodeEmitterNV50::emitNOP() +{ + code[0] = 0xf0000001; + code[1] = 0xe0000000; +} + +void +CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp) +{ + code[0] = 0xc0000000 | (lane << 16); + code[1] = 0x80000000; + + code[0] |= (quOp & 0x03) << 20; + code[1] |= (quOp & 0xfc) << 20; + + emitForm_ADD(i); + + if (!i->srcExists(1)) + srcId(i->src(0), 32 + 14); +} + +void +CodeEmitterNV50::emitPFETCH(const Instruction *i) +{ + code[0] = 0x11800001; + code[1] = 0x04200000 | (0xf << 14); + + defId(i->def(0), 2); + srcAddr8(i->src(0), 9); + setAReg16(i, 0); +} + +void +CodeEmitterNV50::emitINTERP(const Instruction *i) +{ + code[0] = 0x80000000; + + defId(i->def(0), 2); + srcAddr8(i->src(0), 16); + + if (i->getInterpMode() == NV50_IR_INTERP_FLAT) { + code[0] |= 1 << 8; + } else { + if (i->op == OP_PINTERP) { + code[0] |= 1 << 25; + srcId(i->src(1), 9); + } + if (i->getSampleMode() == NV50_IR_INTERP_CENTROID) + code[0] |= 1 << 24; + } + + if (i->encSize == 8) { + code[1] = + (code[0] & (3 << 24)) >> (24 - 16) | + (code[0] & (1 << 8)) << (18 - 8); + code[0] &= ~0x03000100; + code[0] |= 1; + emitFlagsRd(i); + } +} + +void +CodeEmitterNV50::emitMINMAX(const Instruction *i) +{ + if (i->dType == TYPE_F64) { + code[0] = 0xe0000000; + code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000; + } else { + code[0] = 0x30000000; + code[1] = 0x80000000; + if (i->op == OP_MIN) + code[1] |= 0x20000000; + + switch (i->dType) { + case TYPE_F32: code[0] |= 0x80000000; break; + case TYPE_S32: code[1] |= 0x8c000000; break; + case TYPE_U32: code[1] |= 0x84000000; break; + case TYPE_S16: code[1] |= 0x80000000; break; + case TYPE_U16: break; + default: + assert(0); + break; + } + code[1] |= i->src(0).mod.abs() << 20; + code[1] |= i->src(1).mod.abs() << 19; + } + emitForm_MAD(i); +} + +void +CodeEmitterNV50::emitFMAD(const Instruction *i) +{ + const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg(); + const int neg_add = i->src(2).mod.neg(); + + code[0] = 0xe0000000; + + if (i->encSize == 4) { + emitForm_MUL(i); + assert(!neg_mul && !neg_add); + } else { + code[1] = neg_mul << 26; + code[1] |= neg_add << 27; + if (i->saturate) + code[1] |= 1 << 29; + emitForm_MAD(i); + } +} + +void +CodeEmitterNV50::emitFADD(const Instruction *i) +{ + const int neg0 = i->src(0).mod.neg(); + const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0); + + code[0] = 0xb0000000; + + assert(!(i->src(0).mod | i->src(1).mod).abs()); + + if (i->src(1).getFile() == FILE_IMMEDIATE) { + code[1] = 0; + emitForm_IMM(i); + code[0] |= neg0 << 15; + code[0] |= neg1 << 22; + if (i->saturate) + code[0] |= 1 << 8; + } else + if (i->encSize == 8) { + code[1] = 0; + emitForm_ADD(i); + code[1] |= neg0 << 26; + code[1] |= neg1 << 27; + if (i->saturate) + code[1] |= 1 << 29; + } else { + emitForm_MUL(i); + code[0] |= neg0 << 15; + code[0] |= neg1 << 22; + if (i->saturate) + code[0] |= 1 << 8; + } +} + +void +CodeEmitterNV50::emitUADD(const Instruction *i) +{ + const int neg0 = i->src(0).mod.neg(); + const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0); + + code[0] = 0x20008000; + + if (i->src(1).getFile() == FILE_IMMEDIATE) { + code[1] = 0; + emitForm_IMM(i); + } else + if (i->encSize == 8) { + code[0] = 0x20000000; + code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000; + emitForm_ADD(i); + } else { + emitForm_MUL(i); + } + assert(!(neg0 && neg1)); + code[0] |= neg0 << 28; + code[0] |= neg1 << 22; + + if (i->flagsSrc >= 0) { + // addc == sub | subr + assert(!(code[0] & 0x10400000) && !i->getPredicate()); + code[0] |= 0x10400000; + srcId(i->src(i->flagsSrc), 32 + 12); + } +} + +void +CodeEmitterNV50::emitAADD(const Instruction *i) +{ + const int s = (i->op == OP_MOV) ? 0 : 1; + + code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9); + code[1] = 0x20000000; + + code[0] |= (DDATA(i->def(0)).id + 1) << 2; + + emitFlagsRd(i); + + if (s && i->srcExists(0)) + setARegBits(SDATA(i->src(0)).id + 1); +} + +void +CodeEmitterNV50::emitIMUL(const Instruction *i) +{ + code[0] = 0x40000000; + + if (i->encSize == 8) { + code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000; + emitForm_MAD(i); + } else { + if (i->sType == TYPE_S16) + code[0] |= 0x8100; + emitForm_MUL(i); + } +} + +void +CodeEmitterNV50::emitFMUL(const Instruction *i) +{ + const int neg = (i->src(0).mod ^ i->src(1).mod).neg(); + + code[0] = 0xc0000000; + + if (i->src(1).getFile() == FILE_IMMEDIATE) { + code[1] = 0; + emitForm_IMM(i); + if (neg) + code[0] |= 0x8000; + } else + if (i->encSize == 8) { + code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0; + if (neg) + code[1] |= 0x08000000; + emitForm_MAD(i); + } else { + emitForm_MUL(i); + if (neg) + code[0] |= 0x8000; + } +} + +void +CodeEmitterNV50::emitIMAD(const Instruction *i) +{ + code[0] = 0x60000000; + if (isSignedType(i->sType)) + code[1] = i->saturate ? 0x40000000 : 0x20000000; + else + code[1] = 0x00000000; + + int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg(); + int neg2 = i->src(2).mod.neg(); + + assert(!(neg1 & neg2)); + code[1] |= neg1 << 27; + code[1] |= neg2 << 26; + + emitForm_MAD(i); + + if (i->flagsSrc >= 0) { + // add with carry from $cX + assert(!(code[1] & 0x0c000000) && !i->getPredicate()); + code[1] |= 0xc << 24; + srcId(i->src(i->flagsSrc), 32 + 12); + } +} + +void +CodeEmitterNV50::emitISAD(const Instruction *i) +{ + if (i->encSize == 8) { + code[0] = 0x50000000; + switch (i->sType) { + case TYPE_U32: code[1] = 0x04000000; break; + case TYPE_S32: code[1] = 0x0c000000; break; + case TYPE_U16: code[1] = 0x00000000; break; + case TYPE_S16: code[1] = 0x08000000; break; + default: + assert(0); + break; + } + emitForm_MAD(i); + } else { + switch (i->sType) { + case TYPE_U32: code[0] = 0x50008000; break; + case TYPE_S32: code[0] = 0x50008100; break; + case TYPE_U16: code[0] = 0x50000000; break; + case TYPE_S16: code[0] = 0x50000100; break; + default: + assert(0); + break; + } + emitForm_MUL(i); + } +} + +void +CodeEmitterNV50::emitSET(const Instruction *i) +{ + code[0] = 0x30000000; + code[1] = 0x60000000; + + emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14); + + switch (i->sType) { + case TYPE_F32: code[0] |= 0x80000000; break; + case TYPE_S32: code[1] |= 0x0c000000; break; + case TYPE_U32: code[1] |= 0x04000000; break; + case TYPE_S16: code[1] |= 0x08000000; break; + case TYPE_U16: break; + default: + assert(0); + break; + } + if (i->src(0).mod.neg()) code[1] |= 0x04000000; + if (i->src(1).mod.neg()) code[1] |= 0x08000000; + if (i->src(0).mod.abs()) code[1] |= 0x00100000; + if (i->src(1).mod.abs()) code[1] |= 0x00080000; + + emitForm_MAD(i); +} + +void +CodeEmitterNV50::roundMode_CVT(RoundMode rnd) +{ + switch (rnd) { + case ROUND_NI: code[1] |= 0x08000000; break; + case ROUND_M: code[1] |= 0x00020000; break; + case ROUND_MI: code[1] |= 0x08020000; break; + case ROUND_P: code[1] |= 0x00040000; break; + case ROUND_PI: code[1] |= 0x08040000; break; + case ROUND_Z: code[1] |= 0x00060000; break; + case ROUND_ZI: code[1] |= 0x08060000; break; + default: + assert(rnd == ROUND_N); + break; + } +} + +void +CodeEmitterNV50::emitCVT(const Instruction *i) +{ + const bool f2f = isFloatType(i->dType) && isFloatType(i->sType); + RoundMode rnd; + + switch (i->op) { + case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break; + case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break; + case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break; + default: + rnd = i->rnd; + break; + } + + code[0] = 0xa0000000; + + switch (i->dType) { + case TYPE_F64: + switch (i->sType) { + case TYPE_F64: code[1] = 0xc4404000; break; + case TYPE_S64: code[1] = 0x44414000; break; + case TYPE_U64: code[1] = 0x44404000; break; + case TYPE_F32: code[1] = 0xc4400000; break; + case TYPE_S32: code[1] = 0x44410000; break; + case TYPE_U32: code[1] = 0x44400000; break; + default: + assert(0); + break; + } + break; + case TYPE_S64: + switch (i->sType) { + case TYPE_F64: code[1] = 0x8c404000; break; + case TYPE_F32: code[1] = 0x8c400000; break; + default: + assert(0); + break; + } + break; + case TYPE_U64: + switch (i->sType) { + case TYPE_F64: code[1] = 0x84404000; break; + case TYPE_F32: code[1] = 0x84400000; break; + default: + assert(0); + break; + } + break; + case TYPE_F32: + switch (i->sType) { + case TYPE_F64: code[1] = 0xc0404000; break; + case TYPE_S64: code[1] = 0x40414000; break; + case TYPE_U64: code[1] = 0x40404000; break; + case TYPE_F32: code[1] = 0xc4004000; break; + case TYPE_S32: code[1] = 0x44014000; break; + case TYPE_U32: code[1] = 0x44004000; break; + case TYPE_F16: code[1] = 0xc4000000; break; + default: + assert(0); + break; + } + break; + case TYPE_S32: + switch (i->sType) { + case TYPE_F64: code[1] = 0x88404000; break; + case TYPE_F32: code[1] = 0x8c004000; break; + case TYPE_S32: code[1] = 0x0c014000; break; + case TYPE_U32: code[1] = 0x0c004000; break; + case TYPE_F16: code[1] = 0x8c000000; break; + case TYPE_S16: code[1] = 0x0c010000; break; + case TYPE_U16: code[1] = 0x0c000000; break; + case TYPE_S8: code[1] = 0x0c018000; break; + case TYPE_U8: code[1] = 0x0c008000; break; + default: + assert(0); + break; + } + break; + case TYPE_U32: + switch (i->sType) { + case TYPE_F64: code[1] = 0x80404000; break; + case TYPE_F32: code[1] = 0x84004000; break; + case TYPE_S32: code[1] = 0x04014000; break; + case TYPE_U32: code[1] = 0x04004000; break; + case TYPE_F16: code[1] = 0x84000000; break; + case TYPE_S16: code[1] = 0x04010000; break; + case TYPE_U16: code[1] = 0x04000000; break; + case TYPE_S8: code[1] = 0x04018000; break; + case TYPE_U8: code[1] = 0x04008000; break; + default: + assert(0); + break; + } + break; + case TYPE_S16: + case TYPE_U16: + case TYPE_S8: + case TYPE_U8: + default: + assert(0); + break; + } + if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4) + code[1] |= 0x00004000; + + roundMode_CVT(rnd); + + switch (i->op) { + case OP_ABS: code[1] |= 1 << 20; break; + case OP_SAT: code[1] |= 1 << 19; break; + case OP_NEG: code[1] |= 1 << 29; break; + default: + break; + } + code[1] ^= i->src(0).mod.neg() << 29; + code[1] |= i->src(0).mod.abs() << 20; + if (i->saturate) + code[1] |= 1 << 19; + + assert(i->op != OP_ABS || !i->src(0).mod.neg()); + + emitForm_MAD(i); +} + +void +CodeEmitterNV50::emitPreOp(const Instruction *i) +{ + code[0] = 0xb0000000; + code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000; + + code[1] |= i->src(0).mod.abs() << 20; + code[1] |= i->src(0).mod.neg() << 26; + + emitForm_MAD(i); +} + +void +CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp) +{ + code[0] = 0x90000000; + + if (i->encSize == 4) { + assert(i->op == OP_RCP); + code[0] |= i->src(0).mod.abs() << 15; + code[0] |= i->src(0).mod.neg() << 22; + emitForm_MUL(i); + } else { + code[1] = subOp << 29; + code[1] |= i->src(0).mod.abs() << 20; + code[1] |= i->src(0).mod.neg() << 26; + emitForm_MAD(i); + } +} + +void +CodeEmitterNV50::emitNOT(const Instruction *i) +{ + code[0] = 0xd0000000; + code[1] = 0x0002c000; + + switch (i->sType) { + case TYPE_U32: + case TYPE_S32: + code[1] |= 0x04000000; + break; + default: + break; + } + emitForm_MAD(i); + setSrc(i, 0, 1); +} + +void +CodeEmitterNV50::emitLogicOp(const Instruction *i) +{ + code[0] = 0xd0000000; + code[1] = 0; + + if (i->src(1).getFile() == FILE_IMMEDIATE) { + switch (i->op) { + case OP_OR: code[0] |= 0x0100; break; + case OP_XOR: code[0] |= 0x8000; break; + default: + assert(i->op == OP_AND); + break; + } + if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) + code[0] |= 1 << 22; + + emitForm_IMM(i); + } else { + switch (i->op) { + case OP_AND: code[1] = 0x04000000; break; + case OP_OR: code[1] = 0x04004000; break; + case OP_XOR: code[1] = 0x04008000; break; + default: + assert(0); + break; + } + if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) + code[1] |= 1 << 16; + if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) + code[1] |= 1 << 17; + + emitForm_MAD(i); + } +} + +void +CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl) +{ + code[0] = 0x00000001 | (shl << 16); + code[1] = 0xc0000000; + + code[0] |= (DDATA(i->def(0)).id + 1) << 2; + + setSrcFileBits(i, NV50_OP_ENC_IMM); + setSrc(i, 0, 0); + emitFlagsRd(i); +} + +void +CodeEmitterNV50::emitShift(const Instruction *i) +{ + if (i->def(0).getFile() == FILE_ADDRESS) { + assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE); + emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f); + } else { + code[0] = 0x30000001; + code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000; + if (i->op == OP_SHR && isSignedType(i->sType)) + code[1] |= 1 << 27; + + if (i->src(1).getFile() == FILE_IMMEDIATE) { + code[1] |= 1 << 20; + code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16; + defId(i->def(0), 2); + srcId(i->src(0), 9); + emitFlagsRd(i); + } else { + emitForm_MAD(i); + } + } +} + +void +CodeEmitterNV50::emitOUT(const Instruction *i) +{ + code[0] = (i->op == OP_EMIT) ? 0xf0000200 : 0xf0000400; + code[1] = 0xc0000001; + + emitFlagsRd(i); +} + +void +CodeEmitterNV50::emitTEX(const TexInstruction *i) +{ + code[0] = 0xf0000001; + code[1] = 0x00000000; + + switch (i->op) { + case OP_TXB: + code[1] = 0x20000000; + break; + case OP_TXL: + code[1] = 0x40000000; + break; + case OP_TXF: + code[0] |= 0x01000000; + break; + case OP_TXG: + code[0] = 0x01000000; + code[1] = 0x80000000; + break; + default: + assert(i->op == OP_TEX); + break; + } + + code[0] |= i->tex.r << 9; + code[0] |= i->tex.s << 17; + + int argc = i->tex.target.getArgCount(); + + if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF) + argc += 1; + if (i->tex.target.isShadow()) + argc += 1; + assert(argc <= 4); + + code[0] |= (argc - 1) << 22; + + if (i->tex.target.isCube()) { + code[0] |= 0x08000000; + } else + if (i->tex.useOffsets) { + code[1] |= (i->tex.offset[0][0] & 0xf) << 24; + code[1] |= (i->tex.offset[0][1] & 0xf) << 20; + code[1] |= (i->tex.offset[0][2] & 0xf) << 16; + } + + code[0] |= (i->tex.mask & 0x3) << 25; + code[1] |= (i->tex.mask & 0xc) << 12; + + if (i->tex.liveOnly) + code[1] |= 4; + + defId(i->def(0), 2); + + emitFlagsRd(i); +} + +void +CodeEmitterNV50::emitTXQ(const TexInstruction *i) +{ + assert(i->tex.query == TXQ_DIMS); + + code[0] = 0xf0000001; + code[1] = 0x60000000; + + code[0] |= i->tex.r << 9; + code[0] |= i->tex.s << 17; + + code[0] |= (i->tex.mask & 0x3) << 25; + code[1] |= (i->tex.mask & 0xc) << 12; + + defId(i->def(0), 2); + + emitFlagsRd(i); +} + +void +CodeEmitterNV50::emitTEXPREP(const TexInstruction *i) +{ + code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9); + code[1] = 0x60010000; + + code[0] |= (i->tex.mask & 0x3) << 25; + code[1] |= (i->tex.mask & 0xc) << 12; + defId(i->def(0), 2); + + emitFlagsRd(i); +} + +void +CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i) +{ + uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */ + + code[0] = 0x10000003; // bra + code[1] = 0x00000780; // always + + switch (i->subOp) { + case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call + break; + case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call + pos += 8; + break; + default: + assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2)); + code[0] = 0x20000003; // call + code[1] = 0x00000000; // no predicate + break; + } + addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9); + addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4); +} + +void +CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp) +{ + const FlowInstruction *f = i->asFlow(); + bool hasPred = false; + bool hasTarg = false; + + code[0] = 0x00000003 | (flowOp << 28); + code[1] = 0x00000000; + + switch (i->op) { + case OP_BRA: + hasPred = true; + hasTarg = true; + break; + case OP_BREAK: + case OP_BRKPT: + case OP_DISCARD: + case OP_RET: + hasPred = true; + break; + case OP_CALL: + case OP_PREBREAK: + case OP_JOINAT: + hasTarg = true; + break; + case OP_PRERET: + hasTarg = true; + if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) { + emitPRERETEmu(f); + return; + } + break; + default: + break; + } + + if (hasPred) + emitFlagsRd(i); + + if (hasTarg && f) { + uint32_t pos; + + if (f->op == OP_CALL) { + if (f->builtin) { + pos = targNV50->getBuiltinOffset(f->target.builtin); + } else { + pos = f->target.fn->binPos; + } + } else { + pos = f->target.bb->binPos; + } + + code[0] |= ((pos >> 2) & 0xffff) << 11; + code[1] |= ((pos >> 18) & 0x003f) << 14; + + RelocEntry::Type relocTy; + + relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE; + + addReloc(relocTy, 0, pos, 0x07fff800, 9); + addReloc(relocTy, 1, pos, 0x000fc000, -4); + } +} + +void +CodeEmitterNV50::emitBAR(const Instruction *i) +{ + ImmediateValue *barId = i->getSrc(0)->asImm(); + assert(barId); + + code[0] = 0x82000003 | (barId->reg.data.u32 << 21); + code[1] = 0x00004000; + + if (i->subOp == NV50_IR_SUBOP_BAR_SYNC) + code[0] |= 1 << 26; +} + +void +CodeEmitterNV50::emitATOM(const Instruction *i) +{ + uint8_t subOp; + switch (i->subOp) { + case NV50_IR_SUBOP_ATOM_ADD: subOp = 0x0; break; + case NV50_IR_SUBOP_ATOM_MIN: subOp = 0x7; break; + case NV50_IR_SUBOP_ATOM_MAX: subOp = 0x6; break; + case NV50_IR_SUBOP_ATOM_INC: subOp = 0x4; break; + case NV50_IR_SUBOP_ATOM_DEC: subOp = 0x5; break; + case NV50_IR_SUBOP_ATOM_AND: subOp = 0xa; break; + case NV50_IR_SUBOP_ATOM_OR: subOp = 0xb; break; + case NV50_IR_SUBOP_ATOM_XOR: subOp = 0xc; break; + case NV50_IR_SUBOP_ATOM_CAS: subOp = 0x2; break; + case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break; + default: + assert(!"invalid subop"); + return; + } + code[0] = 0xd0000001; + code[1] = 0xe0c00000 | (subOp << 2); + if (isSignedType(i->dType)) + code[1] |= 1 << 21; + + // args + emitFlagsRd(i); + setDst(i, 0); + setSrc(i, 1, 1); + if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) + setSrc(i, 2, 2); + + // g[] pointer + code[0] |= i->getSrc(0)->reg.fileIndex << 23; + srcId(i->getIndirect(0, 0), 9); +} + +bool +CodeEmitterNV50::emitInstruction(Instruction *insn) +{ + if (!insn->encSize) { + ERROR("skipping unencodable instruction: "); insn->print(); + return false; + } else + if (codeSize + insn->encSize > codeSizeLimit) { + ERROR("code emitter output buffer too small\n"); + return false; + } + + if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) { + INFO("EMIT: "); insn->print(); + } + + switch (insn->op) { + case OP_MOV: + emitMOV(insn); + break; + case OP_EXIT: + case OP_NOP: + case OP_JOIN: + emitNOP(); + break; + case OP_VFETCH: + case OP_LOAD: + emitLOAD(insn); + break; + case OP_EXPORT: + case OP_STORE: + emitSTORE(insn); + break; + case OP_PFETCH: + emitPFETCH(insn); + break; + case OP_LINTERP: + case OP_PINTERP: + emitINTERP(insn); + break; + case OP_ADD: + case OP_SUB: + if (isFloatType(insn->dType)) + emitFADD(insn); + else if (insn->getDef(0)->reg.file == FILE_ADDRESS) + emitAADD(insn); + else + emitUADD(insn); + break; + case OP_MUL: + if (isFloatType(insn->dType)) + emitFMUL(insn); + else + emitIMUL(insn); + break; + case OP_MAD: + case OP_FMA: + if (isFloatType(insn->dType)) + emitFMAD(insn); + else + emitIMAD(insn); + break; + case OP_SAD: + emitISAD(insn); + break; + case OP_NOT: + emitNOT(insn); + break; + case OP_AND: + case OP_OR: + case OP_XOR: + emitLogicOp(insn); + break; + case OP_SHL: + case OP_SHR: + emitShift(insn); + break; + case OP_SET: + emitSET(insn); + break; + case OP_MIN: + case OP_MAX: + emitMINMAX(insn); + break; + case OP_CEIL: + case OP_FLOOR: + case OP_TRUNC: + case OP_ABS: + case OP_NEG: + case OP_SAT: + emitCVT(insn); + break; + case OP_CVT: + if (insn->def(0).getFile() == FILE_ADDRESS) + emitARL(insn, 0); + else + if (insn->def(0).getFile() == FILE_FLAGS || + insn->src(0).getFile() == FILE_FLAGS || + insn->src(0).getFile() == FILE_ADDRESS) + emitMOV(insn); + else + emitCVT(insn); + break; + case OP_RCP: + emitSFnOp(insn, 0); + break; + case OP_RSQ: + emitSFnOp(insn, 2); + break; + case OP_LG2: + emitSFnOp(insn, 3); + break; + case OP_SIN: + emitSFnOp(insn, 4); + break; + case OP_COS: + emitSFnOp(insn, 5); + break; + case OP_EX2: + emitSFnOp(insn, 6); + break; + case OP_PRESIN: + case OP_PREEX2: + emitPreOp(insn); + break; + case OP_TEX: + case OP_TXB: + case OP_TXL: + case OP_TXF: + emitTEX(insn->asTex()); + break; + case OP_TXQ: + emitTXQ(insn->asTex()); + break; + case OP_TEXPREP: + emitTEXPREP(insn->asTex()); + break; + case OP_EMIT: + case OP_RESTART: + emitOUT(insn); + break; + case OP_DISCARD: + emitFlow(insn, 0x0); + break; + case OP_BRA: + emitFlow(insn, 0x1); + break; + case OP_CALL: + emitFlow(insn, 0x2); + break; + case OP_RET: + emitFlow(insn, 0x3); + break; + case OP_PREBREAK: + emitFlow(insn, 0x4); + break; + case OP_BREAK: + emitFlow(insn, 0x5); + break; + case OP_QUADON: + emitFlow(insn, 0x6); + break; + case OP_QUADPOP: + emitFlow(insn, 0x7); + break; + case OP_JOINAT: + emitFlow(insn, 0xa); + break; + case OP_PRERET: + emitFlow(insn, 0xd); + break; + case OP_QUADOP: + emitQUADOP(insn, insn->lanes, insn->subOp); + break; + case OP_DFDX: + emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99); + break; + case OP_DFDY: + emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5); + break; + case OP_ATOM: + emitATOM(insn); + break; + case OP_BAR: + emitBAR(insn); + break; + case OP_PHI: + case OP_UNION: + case OP_CONSTRAINT: + ERROR("operation should have been eliminated\n"); + return false; + case OP_EXP: + case OP_LOG: + case OP_SQRT: + case OP_POW: + case OP_SELP: + case OP_SLCT: + case OP_TXD: + case OP_PRECONT: + case OP_CONT: + case OP_POPCNT: + case OP_INSBF: + case OP_EXTBF: + ERROR("operation should have been lowered\n"); + return false; + default: + ERROR("unknown op: %u\n", insn->op); + return false; + } + if (insn->join || insn->op == OP_JOIN) + code[1] |= 0x2; + else + if (insn->exit || insn->op == OP_EXIT) + code[1] |= 0x1; + + assert((insn->encSize == 8) == (code[0] & 1)); + + code += insn->encSize / 4; + codeSize += insn->encSize; + return true; +} + +uint32_t +CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const +{ + const Target::OpInfo &info = targ->getOpInfo(i); + + if (info.minEncSize > 4) + return 8; + + // check constraints on dst and src operands + for (int d = 0; i->defExists(d); ++d) { + if (i->def(d).rep()->reg.data.id > 63 || + i->def(d).rep()->reg.file != FILE_GPR) + return 8; + } + + for (int s = 0; i->srcExists(s); ++s) { + DataFile sf = i->src(s).getFile(); + if (sf != FILE_GPR) + if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT) + return 8; + if (i->src(s).rep()->reg.data.id > 63) + return 8; + } + + // check modifiers & rounding + if (i->join || i->lanes != 0xf || i->exit) + return 8; + if (i->op == OP_MUL && i->rnd != ROUND_N) + return 8; + + if (i->asTex()) + return 8; // TODO: short tex encoding + + // check constraints on short MAD + if (info.srcNr >= 2 && i->srcExists(2)) { + if (i->saturate || i->src(2).mod) + return 8; + if ((i->src(0).mod ^ i->src(1).mod) || + (i->src(0).mod | i->src(1).mod).abs()) + return 8; + if (!i->defExists(0) || + i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id) + return 8; + } + + return info.minEncSize; +} + +// Change the encoding size of an instruction after BBs have been scheduled. +static void +makeInstructionLong(Instruction *insn) +{ + if (insn->encSize == 8) + return; + Function *fn = insn->bb->getFunction(); + int n = 0; + int adj = 4; + + for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next); + + if (n & 1) { + adj = 8; + insn->next->encSize = 8; + } else + if (insn->prev && insn->prev->encSize == 4) { + adj = 8; + insn->prev->encSize = 8; + } + insn->encSize = 8; + + for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) { + fn->bbArray[i]->binPos += 4; + } + fn->binSize += adj; + insn->bb->binSize += adj; +} + +static bool +trySetExitModifier(Instruction *insn) +{ + if (insn->op == OP_DISCARD || + insn->op == OP_QUADON || + insn->op == OP_QUADPOP) + return false; + for (int s = 0; insn->srcExists(s); ++s) + if (insn->src(s).getFile() == FILE_IMMEDIATE) + return false; + if (insn->asFlow()) { + if (insn->op == OP_CALL) // side effects ! + return false; + if (insn->getPredicate()) // cannot do conditional exit (or can we ?) + return false; + insn->op = OP_EXIT; + } + insn->exit = 1; + makeInstructionLong(insn); + return true; +} + +static void +replaceExitWithModifier(Function *func) +{ + BasicBlock *epilogue = BasicBlock::get(func->cfgExit); + + if (!epilogue->getExit() || + epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT + return; + + if (epilogue->getEntry()->op != OP_EXIT) { + Instruction *insn = epilogue->getExit()->prev; + if (!insn || !trySetExitModifier(insn)) + return; + insn->exit = 1; + } else { + for (Graph::EdgeIterator ei = func->cfgExit->incident(); + !ei.end(); ei.next()) { + BasicBlock *bb = BasicBlock::get(ei.getNode()); + Instruction *i = bb->getExit(); + + if (!i || !trySetExitModifier(i)) + return; + } + } + epilogue->binSize -= 8; + func->binSize -= 8; + delete_Instruction(func->getProgram(), epilogue->getExit()); +} + +void +CodeEmitterNV50::prepareEmission(Function *func) +{ + CodeEmitter::prepareEmission(func); + + replaceExitWithModifier(func); +} + +CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) : + CodeEmitter(target), targNV50(target) +{ + targ = target; // specialized + code = NULL; + codeSize = codeSizeLimit = 0; + relocInfo = NULL; +} + +CodeEmitter * +TargetNV50::getCodeEmitter(Program::Type type) +{ + CodeEmitterNV50 *emit = new CodeEmitterNV50(this); + emit->setProgramType(type); + return emit; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp new file mode 100644 index 00000000000..90c409d35e6 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -0,0 +1,2988 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir_target_nvc0.h" + +namespace nv50_ir { + +// Argh, all these assertions ... + +class CodeEmitterNVC0 : public CodeEmitter +{ +public: + CodeEmitterNVC0(const TargetNVC0 *); + + virtual bool emitInstruction(Instruction *); + virtual uint32_t getMinEncodingSize(const Instruction *) const; + virtual void prepareEmission(Function *); + + inline void setProgramType(Program::Type pType) { progType = pType; } + +private: + const TargetNVC0 *targNVC0; + + Program::Type progType; + + const bool writeIssueDelays; + +private: + void emitForm_A(const Instruction *, uint64_t); + void emitForm_B(const Instruction *, uint64_t); + void emitForm_S(const Instruction *, uint32_t, bool pred); + + void emitPredicate(const Instruction *); + + void setAddress16(const ValueRef&); + void setAddress24(const ValueRef&); + void setAddressByFile(const ValueRef&); + void setImmediate(const Instruction *, const int s); // needs op already set + void setImmediateS8(const ValueRef&); + void setSUConst16(const Instruction *, const int s); + void setSUPred(const Instruction *, const int s); + + void emitCondCode(CondCode cc, int pos); + void emitInterpMode(const Instruction *); + void emitLoadStoreType(DataType ty); + void emitSUGType(DataType); + void emitCachingMode(CacheMode c); + + void emitShortSrc2(const ValueRef&); + + inline uint8_t getSRegEncoding(const ValueRef&); + + void roundMode_A(const Instruction *); + void roundMode_C(const Instruction *); + void roundMode_CS(const Instruction *); + + void emitNegAbs12(const Instruction *); + + void emitNOP(const Instruction *); + + void emitLOAD(const Instruction *); + void emitSTORE(const Instruction *); + void emitMOV(const Instruction *); + void emitATOM(const Instruction *); + void emitMEMBAR(const Instruction *); + void emitCCTL(const Instruction *); + + void emitINTERP(const Instruction *); + void emitPFETCH(const Instruction *); + void emitVFETCH(const Instruction *); + void emitEXPORT(const Instruction *); + void emitOUT(const Instruction *); + + void emitUADD(const Instruction *); + void emitFADD(const Instruction *); + void emitUMUL(const Instruction *); + void emitFMUL(const Instruction *); + void emitIMAD(const Instruction *); + void emitISAD(const Instruction *); + void emitFMAD(const Instruction *); + void emitMADSP(const Instruction *); + + void emitNOT(Instruction *); + void emitLogicOp(const Instruction *, uint8_t subOp); + void emitPOPC(const Instruction *); + void emitINSBF(const Instruction *); + void emitEXTBF(const Instruction *); + void emitPERMT(const Instruction *); + void emitShift(const Instruction *); + + void emitSFnOp(const Instruction *, uint8_t subOp); + + void emitCVT(Instruction *); + void emitMINMAX(const Instruction *); + void emitPreOp(const Instruction *); + + void emitSET(const CmpInstruction *); + void emitSLCT(const CmpInstruction *); + void emitSELP(const Instruction *); + + void emitTEXBAR(const Instruction *); + void emitTEX(const TexInstruction *); + void emitTEXCSAA(const TexInstruction *); + void emitTXQ(const TexInstruction *); + + void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask); + + void emitFlow(const Instruction *); + void emitBAR(const Instruction *); + + void emitSUCLAMPMode(uint16_t); + void emitSUCalc(Instruction *); + void emitSULDGB(const TexInstruction *); + void emitSUSTGx(const TexInstruction *); + + void emitVSHL(const Instruction *); + void emitVectorSubOp(const Instruction *); + + inline void defId(const ValueDef&, const int pos); + inline void defId(const Instruction *, int d, const int pos); + inline void srcId(const ValueRef&, const int pos); + inline void srcId(const ValueRef *, const int pos); + inline void srcId(const Instruction *, int s, const int pos); + inline void srcAddr32(const ValueRef&, int pos, int shr); + + inline bool isLIMM(const ValueRef&, DataType ty); +}; + +// for better visibility +#define HEX64(h, l) 0x##h##l##ULL + +#define SDATA(a) ((a).rep()->reg.data) +#define DDATA(a) ((a).rep()->reg.data) + +void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos) +{ + code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32); +} + +void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos) +{ + code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32); +} + +void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos) +{ + int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63; + code[pos / 32] |= r << (pos % 32); +} + +void +CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr) +{ + const uint32_t offset = SDATA(src).offset >> shr; + + code[pos / 32] |= offset << (pos % 32); + if (pos && (pos < 32)) + code[1] |= offset >> (32 - pos); +} + +void CodeEmitterNVC0::defId(const ValueDef& def, const int pos) +{ + code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32); +} + +void CodeEmitterNVC0::defId(const Instruction *insn, int d, int pos) +{ + int r = insn->defExists(d) ? DDATA(insn->def(d)).id : 63; + code[pos / 32] |= r << (pos % 32); +} + +bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty) +{ + const ImmediateValue *imm = ref.get()->asImm(); + + return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000)); +} + +void +CodeEmitterNVC0::roundMode_A(const Instruction *insn) +{ + switch (insn->rnd) { + case ROUND_M: code[1] |= 1 << 23; break; + case ROUND_P: code[1] |= 2 << 23; break; + case ROUND_Z: code[1] |= 3 << 23; break; + default: + assert(insn->rnd == ROUND_N); + break; + } +} + +void +CodeEmitterNVC0::emitNegAbs12(const Instruction *i) +{ + if (i->src(1).mod.abs()) code[0] |= 1 << 6; + if (i->src(0).mod.abs()) code[0] |= 1 << 7; + if (i->src(1).mod.neg()) code[0] |= 1 << 8; + if (i->src(0).mod.neg()) code[0] |= 1 << 9; +} + +void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos) +{ + uint8_t val; + + switch (cc) { + case CC_LT: val = 0x1; break; + case CC_LTU: val = 0x9; break; + case CC_EQ: val = 0x2; break; + case CC_EQU: val = 0xa; break; + case CC_LE: val = 0x3; break; + case CC_LEU: val = 0xb; break; + case CC_GT: val = 0x4; break; + case CC_GTU: val = 0xc; break; + case CC_NE: val = 0x5; break; + case CC_NEU: val = 0xd; break; + case CC_GE: val = 0x6; break; + case CC_GEU: val = 0xe; break; + case CC_TR: val = 0xf; break; + case CC_FL: val = 0x0; break; + + case CC_A: val = 0x14; break; + case CC_NA: val = 0x13; break; + case CC_S: val = 0x15; break; + case CC_NS: val = 0x12; break; + case CC_C: val = 0x16; break; + case CC_NC: val = 0x11; break; + case CC_O: val = 0x17; break; + case CC_NO: val = 0x10; break; + + default: + val = 0; + assert(!"invalid condition code"); + break; + } + code[pos / 32] |= val << (pos % 32); +} + +void +CodeEmitterNVC0::emitPredicate(const Instruction *i) +{ + if (i->predSrc >= 0) { + assert(i->getPredicate()->reg.file == FILE_PREDICATE); + srcId(i->src(i->predSrc), 10); + if (i->cc == CC_NOT_P) + code[0] |= 0x2000; // negate + } else { + code[0] |= 0x1c00; + } +} + +void +CodeEmitterNVC0::setAddressByFile(const ValueRef& src) +{ + switch (src.getFile()) { + case FILE_MEMORY_GLOBAL: + srcAddr32(src, 26, 0); + break; + case FILE_MEMORY_LOCAL: + case FILE_MEMORY_SHARED: + setAddress24(src); + break; + default: + assert(src.getFile() == FILE_MEMORY_CONST); + setAddress16(src); + break; + } +} + +void +CodeEmitterNVC0::setAddress16(const ValueRef& src) +{ + Symbol *sym = src.get()->asSym(); + + assert(sym); + + code[0] |= (sym->reg.data.offset & 0x003f) << 26; + code[1] |= (sym->reg.data.offset & 0xffc0) >> 6; +} + +void +CodeEmitterNVC0::setAddress24(const ValueRef& src) +{ + Symbol *sym = src.get()->asSym(); + + assert(sym); + + code[0] |= (sym->reg.data.offset & 0x00003f) << 26; + code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6; +} + +void +CodeEmitterNVC0::setImmediate(const Instruction *i, const int s) +{ + const ImmediateValue *imm = i->src(s).get()->asImm(); + uint32_t u32; + + assert(imm); + u32 = imm->reg.data.u32; + + if ((code[0] & 0xf) == 0x2) { + // LIMM + code[0] |= (u32 & 0x3f) << 26; + code[1] |= u32 >> 6; + } else + if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) { + // integer immediate + assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000); + assert(!(code[1] & 0xc000)); + u32 &= 0xfffff; + code[0] |= (u32 & 0x3f) << 26; + code[1] |= 0xc000 | (u32 >> 6); + } else { + // float immediate + assert(!(u32 & 0x00000fff)); + assert(!(code[1] & 0xc000)); + code[0] |= ((u32 >> 12) & 0x3f) << 26; + code[1] |= 0xc000 | (u32 >> 18); + } +} + +void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref) +{ + const ImmediateValue *imm = ref.get()->asImm(); + + int8_t s8 = static_cast<int8_t>(imm->reg.data.s32); + + assert(s8 == imm->reg.data.s32); + + code[0] |= (s8 & 0x3f) << 26; + code[0] |= (s8 >> 6) << 8; +} + +void +CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc) +{ + code[0] = opc; + code[1] = opc >> 32; + + emitPredicate(i); + + defId(i->def(0), 14); + + int s1 = 26; + if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST) + s1 = 49; + + for (int s = 0; s < 3 && i->srcExists(s); ++s) { + switch (i->getSrc(s)->reg.file) { + case FILE_MEMORY_CONST: + assert(!(code[1] & 0xc000)); + code[1] |= (s == 2) ? 0x8000 : 0x4000; + code[1] |= i->getSrc(s)->reg.fileIndex << 10; + setAddress16(i->src(s)); + break; + case FILE_IMMEDIATE: + assert(s == 1 || + i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2); + assert(!(code[1] & 0xc000)); + setImmediate(i, s); + break; + case FILE_GPR: + if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst + break; + srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20); + break; + default: + // ignore here, can be predicate or flags, but must not be address + break; + } + } +} + +void +CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc) +{ + code[0] = opc; + code[1] = opc >> 32; + + emitPredicate(i); + + defId(i->def(0), 14); + + switch (i->src(0).getFile()) { + case FILE_MEMORY_CONST: + assert(!(code[1] & 0xc000)); + code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10); + setAddress16(i->src(0)); + break; + case FILE_IMMEDIATE: + assert(!(code[1] & 0xc000)); + setImmediate(i, 0); + break; + case FILE_GPR: + srcId(i->src(0), 26); + break; + default: + // ignore here, can be predicate or flags, but must not be address + break; + } +} + +void +CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred) +{ + code[0] = opc; + + int ss2a = 0; + if (opc == 0x0d || opc == 0x0e) + ss2a = 2; + + defId(i->def(0), 14); + srcId(i->src(0), 20); + + assert(pred || (i->predSrc < 0)); + if (pred) + emitPredicate(i); + + for (int s = 1; s < 3 && i->srcExists(s); ++s) { + if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) { + assert(!(code[0] & (0x300 >> ss2a))); + switch (i->src(s).get()->reg.fileIndex) { + case 0: code[0] |= 0x100 >> ss2a; break; + case 1: code[0] |= 0x200 >> ss2a; break; + case 16: code[0] |= 0x300 >> ss2a; break; + default: + ERROR("invalid c[] space for short form\n"); + break; + } + if (s == 1) + code[0] |= i->getSrc(s)->reg.data.offset << 24; + else + code[0] |= i->getSrc(s)->reg.data.offset << 6; + } else + if (i->src(s).getFile() == FILE_IMMEDIATE) { + assert(s == 1); + setImmediateS8(i->src(s)); + } else + if (i->src(s).getFile() == FILE_GPR) { + srcId(i->src(s), (s == 1) ? 26 : 8); + } + } +} + +void +CodeEmitterNVC0::emitShortSrc2(const ValueRef &src) +{ + if (src.getFile() == FILE_MEMORY_CONST) { + switch (src.get()->reg.fileIndex) { + case 0: code[0] |= 0x100; break; + case 1: code[0] |= 0x200; break; + case 16: code[0] |= 0x300; break; + default: + assert(!"unsupported file index for short op"); + break; + } + srcAddr32(src, 20, 2); + } else { + srcId(src, 20); + assert(src.getFile() == FILE_GPR); + } +} + +void +CodeEmitterNVC0::emitNOP(const Instruction *i) +{ + code[0] = 0x000001e4; + code[1] = 0x40000000; + emitPredicate(i); +} + +void +CodeEmitterNVC0::emitFMAD(const Instruction *i) +{ + bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); + + if (i->encSize == 8) { + if (isLIMM(i->src(1), TYPE_F32)) { + emitForm_A(i, HEX64(20000000, 00000002)); + } else { + emitForm_A(i, HEX64(30000000, 00000000)); + + if (i->src(2).mod.neg()) + code[0] |= 1 << 8; + } + roundMode_A(i); + + if (neg1) + code[0] |= 1 << 9; + + if (i->saturate) + code[0] |= 1 << 5; + if (i->ftz) + code[0] |= 1 << 6; + } else { + assert(!i->saturate && !i->src(2).mod.neg()); + emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e, + false); + if (neg1) + code[0] |= 1 << 4; + } +} + +void +CodeEmitterNVC0::emitFMUL(const Instruction *i) +{ + bool neg = (i->src(0).mod ^ i->src(1).mod).neg(); + + assert(i->postFactor >= -3 && i->postFactor <= 3); + + if (i->encSize == 8) { + if (isLIMM(i->src(1), TYPE_F32)) { + assert(i->postFactor == 0); // constant folded, hopefully + emitForm_A(i, HEX64(30000000, 00000002)); + } else { + emitForm_A(i, HEX64(58000000, 00000000)); + roundMode_A(i); + code[1] |= ((i->postFactor > 0) ? + (7 - i->postFactor) : (0 - i->postFactor)) << 17; + } + if (neg) + code[1] ^= 1 << 25; // aliases with LIMM sign bit + + if (i->saturate) + code[0] |= 1 << 5; + + if (i->dnz) + code[0] |= 1 << 7; + else + if (i->ftz) + code[0] |= 1 << 6; + } else { + assert(!neg && !i->saturate && !i->ftz && !i->postFactor); + emitForm_S(i, 0xa8, true); + } +} + +void +CodeEmitterNVC0::emitUMUL(const Instruction *i) +{ + if (i->encSize == 8) { + if (i->src(1).getFile() == FILE_IMMEDIATE) { + emitForm_A(i, HEX64(10000000, 00000002)); + } else { + emitForm_A(i, HEX64(50000000, 00000003)); + } + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) + code[0] |= 1 << 6; + if (i->sType == TYPE_S32) + code[0] |= 1 << 5; + if (i->dType == TYPE_S32) + code[0] |= 1 << 7; + } else { + emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true); + + if (i->sType == TYPE_S32) + code[0] |= 1 << 6; + } +} + +void +CodeEmitterNVC0::emitFADD(const Instruction *i) +{ + if (i->encSize == 8) { + if (isLIMM(i->src(1), TYPE_F32)) { + assert(!i->saturate); + emitForm_A(i, HEX64(28000000, 00000002)); + + code[0] |= i->src(0).mod.abs() << 7; + code[0] |= i->src(0).mod.neg() << 9; + + if (i->src(1).mod.abs()) + code[1] &= 0xfdffffff; + if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg())) + code[1] ^= 0x02000000; + } else { + emitForm_A(i, HEX64(50000000, 00000000)); + + roundMode_A(i); + if (i->saturate) + code[1] |= 1 << 17; + + emitNegAbs12(i); + if (i->op == OP_SUB) code[0] ^= 1 << 8; + } + if (i->ftz) + code[0] |= 1 << 5; + } else { + assert(!i->saturate && i->op != OP_SUB && + !i->src(0).mod.abs() && + !i->src(1).mod.neg() && !i->src(1).mod.abs()); + + emitForm_S(i, 0x49, true); + + if (i->src(0).mod.neg()) + code[0] |= 1 << 7; + } +} + +void +CodeEmitterNVC0::emitUADD(const Instruction *i) +{ + uint32_t addOp = 0; + + assert(!i->src(0).mod.abs() && !i->src(1).mod.abs()); + assert(!i->src(0).mod.neg() || !i->src(1).mod.neg()); + + if (i->src(0).mod.neg()) + addOp |= 0x200; + if (i->src(1).mod.neg()) + addOp |= 0x100; + if (i->op == OP_SUB) { + addOp ^= 0x100; + assert(addOp != 0x300); // would be add-plus-one + } + + if (i->encSize == 8) { + if (isLIMM(i->src(1), TYPE_U32)) { + emitForm_A(i, HEX64(08000000, 00000002)); + if (i->defExists(1)) + code[1] |= 1 << 26; // write carry + } else { + emitForm_A(i, HEX64(48000000, 00000003)); + if (i->defExists(1)) + code[1] |= 1 << 16; // write carry + } + code[0] |= addOp; + + if (i->saturate) + code[0] |= 1 << 5; + if (i->flagsSrc >= 0) // add carry + code[0] |= 1 << 6; + } else { + assert(!(addOp & 0x100)); + emitForm_S(i, (addOp >> 3) | + ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true); + } +} + +// TODO: shl-add +void +CodeEmitterNVC0::emitIMAD(const Instruction *i) +{ + assert(i->encSize == 8); + emitForm_A(i, HEX64(20000000, 00000003)); + + if (isSignedType(i->dType)) + code[0] |= 1 << 7; + if (isSignedType(i->sType)) + code[0] |= 1 << 5; + + code[1] |= i->saturate << 24; + + if (i->flagsDef >= 0) code[1] |= 1 << 16; + if (i->flagsSrc >= 0) code[1] |= 1 << 23; + + if (i->src(2).mod.neg()) code[0] |= 0x10; + if (i->src(1).mod.neg() ^ + i->src(0).mod.neg()) code[0] |= 0x20; + + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) + code[0] |= 1 << 6; +} + +void +CodeEmitterNVC0::emitMADSP(const Instruction *i) +{ + assert(targ->getChipset() >= NVISA_GK104_CHIPSET); + + emitForm_A(i, HEX64(00000000, 00000003)); + + if (i->subOp == NV50_IR_SUBOP_MADSP_SD) { + code[1] |= 0x01800000; + } else { + code[0] |= (i->subOp & 0x00f) << 7; + code[0] |= (i->subOp & 0x0f0) << 1; + code[0] |= (i->subOp & 0x100) >> 3; + code[0] |= (i->subOp & 0x200) >> 2; + code[1] |= (i->subOp & 0xc00) << 13; + } + + if (i->flagsDef >= 0) + code[1] |= 1 << 16; +} + +void +CodeEmitterNVC0::emitISAD(const Instruction *i) +{ + assert(i->dType == TYPE_S32 || i->dType == TYPE_U32); + assert(i->encSize == 8); + + emitForm_A(i, HEX64(38000000, 00000003)); + + if (i->dType == TYPE_S32) + code[0] |= 1 << 5; +} + +void +CodeEmitterNVC0::emitNOT(Instruction *i) +{ + assert(i->encSize == 8); + i->setSrc(1, i->src(0)); + emitForm_A(i, HEX64(68000000, 000001c3)); +} + +void +CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp) +{ + if (i->def(0).getFile() == FILE_PREDICATE) { + code[0] = 0x00000004 | (subOp << 30); + code[1] = 0x0c000000; + + emitPredicate(i); + + defId(i->def(0), 17); + srcId(i->src(0), 20); + if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23; + srcId(i->src(1), 26); + if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29; + + if (i->defExists(1)) { + defId(i->def(1), 14); + } else { + code[0] |= 7 << 14; + } + // (a OP b) OP c + if (i->predSrc != 2 && i->srcExists(2)) { + code[1] |= subOp << 21; + srcId(i->src(2), 17); + if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 20; + } else { + code[1] |= 0x000e0000; + } + } else + if (i->encSize == 8) { + if (isLIMM(i->src(1), TYPE_U32)) { + emitForm_A(i, HEX64(38000000, 00000002)); + + if (i->flagsDef >= 0) + code[1] |= 1 << 26; + } else { + emitForm_A(i, HEX64(68000000, 00000003)); + + if (i->flagsDef >= 0) + code[1] |= 1 << 16; + } + code[0] |= subOp << 6; + + if (i->flagsSrc >= 0) // carry + code[0] |= 1 << 5; + + if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9; + if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8; + } else { + emitForm_S(i, (subOp << 5) | + ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true); + } +} + +void +CodeEmitterNVC0::emitPOPC(const Instruction *i) +{ + emitForm_A(i, HEX64(54000000, 00000004)); + + if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9; + if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8; +} + +void +CodeEmitterNVC0::emitINSBF(const Instruction *i) +{ + emitForm_A(i, HEX64(28000000, 00000003)); +} + +void +CodeEmitterNVC0::emitEXTBF(const Instruction *i) +{ + emitForm_A(i, HEX64(70000000, 00000003)); + + if (i->dType == TYPE_S32) + code[0] |= 1 << 5; + if (i->subOp == NV50_IR_SUBOP_EXTBF_REV) + code[0] |= 1 << 8; +} + +void +CodeEmitterNVC0::emitPERMT(const Instruction *i) +{ + emitForm_A(i, HEX64(24000000, 00000004)); + + code[0] |= i->subOp << 5; +} + +void +CodeEmitterNVC0::emitShift(const Instruction *i) +{ + if (i->op == OP_SHR) { + emitForm_A(i, HEX64(58000000, 00000003) + | (isSignedType(i->dType) ? 0x20 : 0x00)); + } else { + emitForm_A(i, HEX64(60000000, 00000003)); + } + + if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP) + code[0] |= 1 << 9; +} + +void +CodeEmitterNVC0::emitPreOp(const Instruction *i) +{ + if (i->encSize == 8) { + emitForm_B(i, HEX64(60000000, 00000000)); + + if (i->op == OP_PREEX2) + code[0] |= 0x20; + + if (i->src(0).mod.abs()) code[0] |= 1 << 6; + if (i->src(0).mod.neg()) code[0] |= 1 << 8; + } else { + emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true); + } +} + +void +CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp) +{ + if (i->encSize == 8) { + code[0] = 0x00000000 | (subOp << 26); + code[1] = 0xc8000000; + + emitPredicate(i); + + defId(i->def(0), 14); + srcId(i->src(0), 20); + + assert(i->src(0).getFile() == FILE_GPR); + + if (i->saturate) code[0] |= 1 << 5; + + if (i->src(0).mod.abs()) code[0] |= 1 << 7; + if (i->src(0).mod.neg()) code[0] |= 1 << 9; + } else { + emitForm_S(i, 0x80000008 | (subOp << 26), true); + + assert(!i->src(0).mod.neg()); + if (i->src(0).mod.abs()) code[0] |= 1 << 30; + } +} + +void +CodeEmitterNVC0::emitMINMAX(const Instruction *i) +{ + uint64_t op; + + assert(i->encSize == 8); + + op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL; + + if (i->ftz) + op |= 1 << 5; + else + if (!isFloatType(i->dType)) + op |= isSignedType(i->dType) ? 0x23 : 0x03; + + emitForm_A(i, op); + emitNegAbs12(i); +} + +void +CodeEmitterNVC0::roundMode_C(const Instruction *i) +{ + switch (i->rnd) { + case ROUND_M: code[1] |= 1 << 17; break; + case ROUND_P: code[1] |= 2 << 17; break; + case ROUND_Z: code[1] |= 3 << 17; break; + case ROUND_NI: code[0] |= 1 << 7; break; + case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break; + case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break; + case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break; + case ROUND_N: break; + default: + assert(!"invalid round mode"); + break; + } +} + +void +CodeEmitterNVC0::roundMode_CS(const Instruction *i) +{ + switch (i->rnd) { + case ROUND_M: + case ROUND_MI: code[0] |= 1 << 16; break; + case ROUND_P: + case ROUND_PI: code[0] |= 2 << 16; break; + case ROUND_Z: + case ROUND_ZI: code[0] |= 3 << 16; break; + default: + break; + } +} + +void +CodeEmitterNVC0::emitCVT(Instruction *i) +{ + const bool f2f = isFloatType(i->dType) && isFloatType(i->sType); + + switch (i->op) { + case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break; + case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break; + case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break; + default: + break; + } + + const bool sat = (i->op == OP_SAT) || i->saturate; + const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs(); + const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg(); + + if (i->encSize == 8) { + emitForm_B(i, HEX64(10000000, 00000004)); + + roundMode_C(i); + + // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size() + code[0] |= util_logbase2(typeSizeof(i->dType)) << 20; + code[0] |= util_logbase2(typeSizeof(i->sType)) << 23; + + if (sat) + code[0] |= 0x20; + if (abs) + code[0] |= 1 << 6; + if (neg && i->op != OP_ABS) + code[0] |= 1 << 8; + + if (i->ftz) + code[1] |= 1 << 23; + + if (isSignedIntType(i->dType)) + code[0] |= 0x080; + if (isSignedIntType(i->sType)) + code[0] |= 0x200; + + if (isFloatType(i->dType)) { + if (!isFloatType(i->sType)) + code[1] |= 0x08000000; + } else { + if (isFloatType(i->sType)) + code[1] |= 0x04000000; + else + code[1] |= 0x0c000000; + } + } else { + if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) { + code[0] = 0x298; + } else + if (isFloatType(i->dType)) { + if (isFloatType(i->sType)) + code[0] = 0x098; + else + code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0); + } else { + assert(isFloatType(i->sType)); + + code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0); + } + + if (neg) code[0] |= 1 << 16; + if (sat) code[0] |= 1 << 18; + if (abs) code[0] |= 1 << 19; + + roundMode_CS(i); + } +} + +void +CodeEmitterNVC0::emitSET(const CmpInstruction *i) +{ + uint32_t hi; + uint32_t lo = 0; + + if (i->sType == TYPE_F64) + lo = 0x1; + else + if (!isFloatType(i->sType)) + lo = 0x3; + + if (isFloatType(i->dType) || isSignedIntType(i->sType)) + lo |= 0x20; + + switch (i->op) { + case OP_SET_AND: hi = 0x10000000; break; + case OP_SET_OR: hi = 0x10200000; break; + case OP_SET_XOR: hi = 0x10400000; break; + default: + hi = 0x100e0000; + break; + } + emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo); + + if (i->op != OP_SET) + srcId(i->src(2), 32 + 17); + + if (i->def(0).getFile() == FILE_PREDICATE) { + if (i->sType == TYPE_F32) + code[1] += 0x10000000; + else + code[1] += 0x08000000; + + code[0] &= ~0xfc000; + defId(i->def(0), 17); + if (i->defExists(1)) + defId(i->def(1), 14); + else + code[0] |= 0x1c000; + } + + if (i->ftz) + code[1] |= 1 << 27; + + emitCondCode(i->setCond, 32 + 23); + emitNegAbs12(i); +} + +void +CodeEmitterNVC0::emitSLCT(const CmpInstruction *i) +{ + uint64_t op; + + switch (i->dType) { + case TYPE_S32: + op = HEX64(30000000, 00000023); + break; + case TYPE_U32: + op = HEX64(30000000, 00000003); + break; + case TYPE_F32: + op = HEX64(38000000, 00000000); + break; + default: + assert(!"invalid type for SLCT"); + op = 0; + break; + } + emitForm_A(i, op); + + CondCode cc = i->setCond; + + if (i->src(2).mod.neg()) + cc = reverseCondCode(cc); + + emitCondCode(cc, 32 + 23); + + if (i->ftz) + code[0] |= 1 << 5; +} + +void CodeEmitterNVC0::emitSELP(const Instruction *i) +{ + emitForm_A(i, HEX64(20000000, 00000004)); + + if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT)) + code[1] |= 1 << 20; +} + +void CodeEmitterNVC0::emitTEXBAR(const Instruction *i) +{ + code[0] = 0x00000006 | (i->subOp << 26); + code[1] = 0xf0000000; + emitPredicate(i); + emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5); +} + +void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i) +{ + code[0] = 0x00000086; + code[1] = 0xd0000000; + + code[1] |= i->tex.r; + code[1] |= i->tex.s << 8; + + if (i->tex.liveOnly) + code[0] |= 1 << 9; + + defId(i->def(0), 14); + srcId(i->src(0), 20); +} + +static inline bool +isNextIndependentTex(const TexInstruction *i) +{ + if (!i->next || !isTextureOp(i->next->op)) + return false; + if (i->getDef(0)->interfers(i->next->getSrc(0))) + return false; + return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1)); +} + +void +CodeEmitterNVC0::emitTEX(const TexInstruction *i) +{ + code[0] = 0x00000006; + + if (isNextIndependentTex(i)) + code[0] |= 0x080; // t mode + else + code[0] |= 0x100; // p mode + + if (i->tex.liveOnly) + code[0] |= 1 << 9; + + switch (i->op) { + case OP_TEX: code[1] = 0x80000000; break; + case OP_TXB: code[1] = 0x84000000; break; + case OP_TXL: code[1] = 0x86000000; break; + case OP_TXF: code[1] = 0x90000000; break; + case OP_TXG: code[1] = 0xa0000000; break; + case OP_TXD: code[1] = 0xe0000000; break; + default: + assert(!"invalid texture op"); + break; + } + if (i->op == OP_TXF) { + if (!i->tex.levelZero) + code[1] |= 0x02000000; + } else + if (i->tex.levelZero) { + code[1] |= 0x02000000; + } + + if (i->op != OP_TXD && i->tex.derivAll) + code[1] |= 1 << 13; + + defId(i->def(0), 14); + srcId(i->src(0), 20); + + emitPredicate(i); + + if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5; + + code[1] |= i->tex.mask << 14; + + code[1] |= i->tex.r; + code[1] |= i->tex.s << 8; + if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) + code[1] |= 1 << 18; // in 1st source (with array index) + + // texture target: + code[1] |= (i->tex.target.getDim() - 1) << 20; + if (i->tex.target.isCube()) + code[1] += 2 << 20; + if (i->tex.target.isArray()) + code[1] |= 1 << 19; + if (i->tex.target.isShadow()) + code[1] |= 1 << 24; + + const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) + + if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) { + // lzero + if (i->op == OP_TXL) + code[1] &= ~(1 << 26); + else + if (i->op == OP_TXF) + code[1] &= ~(1 << 25); + } + if (i->tex.target == TEX_TARGET_2D_MS || + i->tex.target == TEX_TARGET_2D_MS_ARRAY) + code[1] |= 1 << 23; + + if (i->tex.useOffsets) // in vecSrc0.w + code[1] |= 1 << 22; + + srcId(i, src1, 26); +} + +void +CodeEmitterNVC0::emitTXQ(const TexInstruction *i) +{ + code[0] = 0x00000086; + code[1] = 0xc0000000; + + switch (i->tex.query) { + case TXQ_DIMS: code[1] |= 0 << 22; break; + case TXQ_TYPE: code[1] |= 1 << 22; break; + case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break; + case TXQ_FILTER: code[1] |= 3 << 22; break; + case TXQ_LOD: code[1] |= 4 << 22; break; + case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break; + default: + assert(!"invalid texture query"); + break; + } + + code[1] |= i->tex.mask << 14; + + code[1] |= i->tex.r; + code[1] |= i->tex.s << 8; + if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0) + code[1] |= 1 << 18; + + const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) + + defId(i->def(0), 14); + srcId(i->src(0), 20); + srcId(i, src1, 26); + + emitPredicate(i); +} + +void +CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask) +{ + code[0] = 0x00000000 | (laneMask << 6); + code[1] = 0x48000000 | qOp; + + defId(i->def(0), 14); + srcId(i->src(0), 20); + srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26); + + if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT) + code[0] |= 1 << 9; // dall + + emitPredicate(i); +} + +void +CodeEmitterNVC0::emitFlow(const Instruction *i) +{ + const FlowInstruction *f = i->asFlow(); + + unsigned mask; // bit 0: predicate, bit 1: target + + code[0] = 0x00000007; + + switch (i->op) { + case OP_BRA: + code[1] = f->absolute ? 0x00000000 : 0x40000000; + if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) + code[0] |= 0x4000; + mask = 3; + break; + case OP_CALL: + code[1] = f->absolute ? 0x10000000 : 0x50000000; + if (f->indirect) + code[0] |= 0x4000; // indirect calls always use c[] source + mask = 2; + break; + + case OP_EXIT: code[1] = 0x80000000; mask = 1; break; + case OP_RET: code[1] = 0x90000000; mask = 1; break; + case OP_DISCARD: code[1] = 0x98000000; mask = 1; break; + case OP_BREAK: code[1] = 0xa8000000; mask = 1; break; + case OP_CONT: code[1] = 0xb0000000; mask = 1; break; + + case OP_JOINAT: code[1] = 0x60000000; mask = 2; break; + case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break; + case OP_PRECONT: code[1] = 0x70000000; mask = 2; break; + case OP_PRERET: code[1] = 0x78000000; mask = 2; break; + + case OP_QUADON: code[1] = 0xc0000000; mask = 0; break; + case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break; + case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break; + default: + assert(!"invalid flow operation"); + return; + } + + if (mask & 1) { + emitPredicate(i); + if (i->flagsSrc < 0) + code[0] |= 0x1e0; + } + + if (!f) + return; + + if (f->allWarp) + code[0] |= 1 << 15; + if (f->limit) + code[0] |= 1 << 16; + + if (f->indirect) { + if (code[0] & 0x4000) { + assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST); + setAddress16(i->src(0)); + code[1] |= i->getSrc(0)->reg.fileIndex << 10; + if (f->op == OP_BRA) + srcId(f->src(0).getIndirect(0), 20); + } else { + srcId(f, 0, 20); + } + } + + if (f->op == OP_CALL) { + if (f->indirect) { + // nothing + } else + if (f->builtin) { + assert(f->absolute); + uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin); + addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26); + addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6); + } else { + assert(!f->absolute); + int32_t pcRel = f->target.fn->binPos - (codeSize + 8); + code[0] |= (pcRel & 0x3f) << 26; + code[1] |= (pcRel >> 6) & 0x3ffff; + } + } else + if (mask & 2) { + int32_t pcRel = f->target.bb->binPos - (codeSize + 8); + // currently we don't want absolute branches + assert(!f->absolute); + code[0] |= (pcRel & 0x3f) << 26; + code[1] |= (pcRel >> 6) & 0x3ffff; + } +} + +void +CodeEmitterNVC0::emitBAR(const Instruction *i) +{ + Value *rDef = NULL, *pDef = NULL; + + switch (i->subOp) { + case NV50_IR_SUBOP_BAR_ARRIVE: code[0] = 0x84; break; + case NV50_IR_SUBOP_BAR_RED_AND: code[0] = 0x24; break; + case NV50_IR_SUBOP_BAR_RED_OR: code[0] = 0x44; break; + case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break; + default: + code[0] = 0x04; + assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC); + break; + } + code[1] = 0x50000000; + + code[0] |= 63 << 14; + code[1] |= 7 << 21; + + emitPredicate(i); + + // barrier id + if (i->src(0).getFile() == FILE_GPR) { + srcId(i->src(0), 20); + } else { + ImmediateValue *imm = i->getSrc(0)->asImm(); + assert(imm); + code[0] |= imm->reg.data.u32 << 20; + } + + // thread count + if (i->src(1).getFile() == FILE_GPR) { + srcId(i->src(1), 26); + } else { + ImmediateValue *imm = i->getSrc(1)->asImm(); + assert(imm); + code[0] |= imm->reg.data.u32 << 26; + code[1] |= imm->reg.data.u32 >> 6; + } + + if (i->srcExists(2) && (i->predSrc != 2)) { + srcId(i->src(2), 32 + 17); + if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) + code[1] |= 1 << 20; + } else { + code[1] |= 7 << 17; + } + + if (i->defExists(0)) { + if (i->def(0).getFile() == FILE_GPR) + rDef = i->getDef(0); + else + pDef = i->getDef(0); + + if (i->defExists(1)) { + if (i->def(1).getFile() == FILE_GPR) + rDef = i->getDef(1); + else + pDef = i->getDef(1); + } + } + if (rDef) { + code[0] &= ~(63 << 14); + defId(rDef, 14); + } + if (pDef) { + code[1] &= ~(7 << 21); + defId(pDef, 32 + 21); + } +} + +void +CodeEmitterNVC0::emitPFETCH(const Instruction *i) +{ + uint32_t prim = i->src(0).get()->reg.data.u32; + + code[0] = 0x00000006 | ((prim & 0x3f) << 26); + code[1] = 0x00000000 | (prim >> 6); + + emitPredicate(i); + + defId(i->def(0), 14); + srcId(i->src(1), 20); +} + +void +CodeEmitterNVC0::emitVFETCH(const Instruction *i) +{ + code[0] = 0x00000006; + code[1] = 0x06000000 | i->src(0).get()->reg.data.offset; + + if (i->perPatch) + code[0] |= 0x100; + if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT) + code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads + + emitPredicate(i); + + code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5; + + defId(i->def(0), 14); + srcId(i->src(0).getIndirect(0), 20); + srcId(i->src(0).getIndirect(1), 26); // vertex address +} + +void +CodeEmitterNVC0::emitEXPORT(const Instruction *i) +{ + unsigned int size = typeSizeof(i->dType); + + code[0] = 0x00000006 | ((size / 4 - 1) << 5); + code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset; + + assert(!(code[1] & ((size == 12) ? 15 : (size - 1)))); + + if (i->perPatch) + code[0] |= 0x100; + + emitPredicate(i); + + assert(i->src(1).getFile() == FILE_GPR); + + srcId(i->src(0).getIndirect(0), 20); + srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address + srcId(i->src(1), 26); +} + +void +CodeEmitterNVC0::emitOUT(const Instruction *i) +{ + code[0] = 0x00000006; + code[1] = 0x1c000000; + + emitPredicate(i); + + defId(i->def(0), 14); // new secret address + srcId(i->src(0), 20); // old secret address, should be 0 initially + + assert(i->src(0).getFile() == FILE_GPR); + + if (i->op == OP_EMIT) + code[0] |= 1 << 5; + if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART) + code[0] |= 1 << 6; + + // vertex stream + if (i->src(1).getFile() == FILE_IMMEDIATE) { + code[1] |= 0xc000; + code[0] |= SDATA(i->src(1)).u32 << 26; + } else { + srcId(i->src(1), 26); + } +} + +void +CodeEmitterNVC0::emitInterpMode(const Instruction *i) +{ + if (i->encSize == 8) { + code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID + } else { + if (i->getInterpMode() == NV50_IR_INTERP_SC) + code[0] |= 0x80; + assert(i->op == OP_PINTERP && i->getSampleMode() == 0); + } +} + +void +CodeEmitterNVC0::emitINTERP(const Instruction *i) +{ + const uint32_t base = i->getSrc(0)->reg.data.offset; + + if (i->encSize == 8) { + code[0] = 0x00000000; + code[1] = 0xc0000000 | (base & 0xffff); + + if (i->saturate) + code[0] |= 1 << 5; + + if (i->op == OP_PINTERP) + srcId(i->src(1), 26); + else + code[0] |= 0x3f << 26; + + srcId(i->src(0).getIndirect(0), 20); + } else { + assert(i->op == OP_PINTERP); + code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26); + srcId(i->src(1), 20); + } + emitInterpMode(i); + + emitPredicate(i); + defId(i->def(0), 14); + + if (i->getSampleMode() == NV50_IR_INTERP_OFFSET) + srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 17); + else + code[1] |= 0x3f << 17; +} + +void +CodeEmitterNVC0::emitLoadStoreType(DataType ty) +{ + uint8_t val; + + switch (ty) { + case TYPE_U8: + val = 0x00; + break; + case TYPE_S8: + val = 0x20; + break; + case TYPE_F16: + case TYPE_U16: + val = 0x40; + break; + case TYPE_S16: + val = 0x60; + break; + case TYPE_F32: + case TYPE_U32: + case TYPE_S32: + val = 0x80; + break; + case TYPE_F64: + case TYPE_U64: + case TYPE_S64: + val = 0xa0; + break; + case TYPE_B128: + val = 0xc0; + break; + default: + val = 0x80; + assert(!"invalid type"); + break; + } + code[0] |= val; +} + +void +CodeEmitterNVC0::emitCachingMode(CacheMode c) +{ + uint32_t val; + + switch (c) { + case CACHE_CA: +// case CACHE_WB: + val = 0x000; + break; + case CACHE_CG: + val = 0x100; + break; + case CACHE_CS: + val = 0x200; + break; + case CACHE_CV: +// case CACHE_WT: + val = 0x300; + break; + default: + val = 0; + assert(!"invalid caching mode"); + break; + } + code[0] |= val; +} + +static inline bool +uses64bitAddress(const Instruction *ldst) +{ + return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL && + ldst->src(0).isIndirect(0) && + ldst->getIndirect(0, 0)->reg.size == 8; +} + +void +CodeEmitterNVC0::emitSTORE(const Instruction *i) +{ + uint32_t opc; + + switch (i->src(0).getFile()) { + case FILE_MEMORY_GLOBAL: opc = 0x90000000; break; + case FILE_MEMORY_LOCAL: opc = 0xc8000000; break; + case FILE_MEMORY_SHARED: opc = 0xc9000000; break; + default: + assert(!"invalid memory file"); + opc = 0; + break; + } + code[0] = 0x00000005; + code[1] = opc; + + setAddressByFile(i->src(0)); + srcId(i->src(1), 14); + srcId(i->src(0).getIndirect(0), 20); + if (uses64bitAddress(i)) + code[1] |= 1 << 26; + + emitPredicate(i); + + emitLoadStoreType(i->dType); + emitCachingMode(i->cache); +} + +void +CodeEmitterNVC0::emitLOAD(const Instruction *i) +{ + uint32_t opc; + + code[0] = 0x00000005; + + switch (i->src(0).getFile()) { + case FILE_MEMORY_GLOBAL: opc = 0x80000000; break; + case FILE_MEMORY_LOCAL: opc = 0xc0000000; break; + case FILE_MEMORY_SHARED: opc = 0xc1000000; break; + case FILE_MEMORY_CONST: + if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) { + emitMOV(i); // not sure if this is any better + return; + } + opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10); + code[0] = 0x00000006 | (i->subOp << 8); + break; + default: + assert(!"invalid memory file"); + opc = 0; + break; + } + code[1] = opc; + + defId(i->def(0), 14); + + setAddressByFile(i->src(0)); + srcId(i->src(0).getIndirect(0), 20); + if (uses64bitAddress(i)) + code[1] |= 1 << 26; + + emitPredicate(i); + + emitLoadStoreType(i->dType); + emitCachingMode(i->cache); +} + +uint8_t +CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref) +{ + switch (SDATA(ref).sv.sv) { + case SV_LANEID: return 0x00; + case SV_PHYSID: return 0x03; + case SV_VERTEX_COUNT: return 0x10; + case SV_INVOCATION_ID: return 0x11; + case SV_YDIR: return 0x12; + case SV_TID: return 0x21 + SDATA(ref).sv.index; + case SV_CTAID: return 0x25 + SDATA(ref).sv.index; + case SV_NTID: return 0x29 + SDATA(ref).sv.index; + case SV_GRIDID: return 0x2c; + case SV_NCTAID: return 0x2d + SDATA(ref).sv.index; + case SV_LBASE: return 0x34; + case SV_SBASE: return 0x30; + case SV_CLOCK: return 0x50 + SDATA(ref).sv.index; + default: + assert(!"no sreg for system value"); + return 0; + } +} + +void +CodeEmitterNVC0::emitMOV(const Instruction *i) +{ + if (i->def(0).getFile() == FILE_PREDICATE) { + if (i->src(0).getFile() == FILE_GPR) { + code[0] = 0xfc01c003; + code[1] = 0x1a8e0000; + srcId(i->src(0), 20); + } else { + code[0] = 0x0001c004; + code[1] = 0x0c0e0000; + if (i->src(0).getFile() == FILE_IMMEDIATE) { + code[0] |= 7 << 20; + if (!i->getSrc(0)->reg.data.u32) + code[0] |= 1 << 23; + } else { + srcId(i->src(0), 20); + } + } + defId(i->def(0), 17); + emitPredicate(i); + } else + if (i->src(0).getFile() == FILE_SYSTEM_VALUE) { + uint8_t sr = getSRegEncoding(i->src(0)); + + if (i->encSize == 8) { + code[0] = 0x00000004 | (sr << 26); + code[1] = 0x2c000000; + } else { + code[0] = 0x40000008 | (sr << 20); + } + defId(i->def(0), 14); + + emitPredicate(i); + } else + if (i->encSize == 8) { + uint64_t opc; + + if (i->src(0).getFile() == FILE_IMMEDIATE) + opc = HEX64(18000000, 000001e2); + else + if (i->src(0).getFile() == FILE_PREDICATE) + opc = HEX64(080e0000, 1c000004); + else + opc = HEX64(28000000, 00000004); + + opc |= i->lanes << 5; + + emitForm_B(i, opc); + } else { + uint32_t imm; + + if (i->src(0).getFile() == FILE_IMMEDIATE) { + imm = SDATA(i->src(0)).u32; + if (imm & 0xfff00000) { + assert(!(imm & 0x000fffff)); + code[0] = 0x00000318 | imm; + } else { + assert(imm < 0x800 || ((int32_t)imm >= -0x800)); + code[0] = 0x00000118 | (imm << 20); + } + } else { + code[0] = 0x0028; + emitShortSrc2(i->src(0)); + } + defId(i->def(0), 14); + + emitPredicate(i); + } +} + +void +CodeEmitterNVC0::emitATOM(const Instruction *i) +{ + const bool hasDst = i->defExists(0); + const bool casOrExch = + i->subOp == NV50_IR_SUBOP_ATOM_EXCH || + i->subOp == NV50_IR_SUBOP_ATOM_CAS; + + if (i->dType == TYPE_U64) { + switch (i->subOp) { + case NV50_IR_SUBOP_ATOM_ADD: + code[0] = 0x205; + if (hasDst) + code[1] = 0x507e0000; + else + code[1] = 0x10000000; + break; + case NV50_IR_SUBOP_ATOM_EXCH: + code[0] = 0x305; + code[1] = 0x507e0000; + break; + case NV50_IR_SUBOP_ATOM_CAS: + code[0] = 0x325; + code[1] = 0x50000000; + break; + default: + assert(!"invalid u64 red op"); + break; + } + } else + if (i->dType == TYPE_U32) { + switch (i->subOp) { + case NV50_IR_SUBOP_ATOM_EXCH: + code[0] = 0x105; + code[1] = 0x507e0000; + break; + case NV50_IR_SUBOP_ATOM_CAS: + code[0] = 0x125; + code[1] = 0x50000000; + break; + default: + code[0] = 0x5 | (i->subOp << 5); + if (hasDst) + code[1] = 0x507e0000; + else + code[1] = 0x10000000; + break; + } + } else + if (i->dType == TYPE_S32) { + assert(i->subOp <= 2); + code[0] = 0x205 | (i->subOp << 5); + if (hasDst) + code[1] = 0x587e0000; + else + code[1] = 0x18000000; + } else + if (i->dType == TYPE_F32) { + assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD); + code[0] = 0x205; + if (hasDst) + code[1] = 0x687e0000; + else + code[1] = 0x28000000; + } + + emitPredicate(i); + + srcId(i->src(1), 14); + + if (hasDst) + defId(i->def(0), 32 + 11); + else + if (casOrExch) + code[1] |= 63 << 11; + + if (hasDst || casOrExch) { + const int32_t offset = SDATA(i->src(0)).offset; + assert(offset < 0x80000 && offset >= -0x80000); + code[0] |= offset << 26; + code[1] |= (offset & 0x1ffc0) >> 6; + code[1] |= (offset & 0xe0000) << 6; + } else { + srcAddr32(i->src(0), 26, 0); + } + if (i->getIndirect(0, 0)) { + srcId(i->getIndirect(0, 0), 20); + if (i->getIndirect(0, 0)->reg.size == 8) + code[1] |= 1 << 26; + } else { + code[0] |= 63 << 20; + } + + if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) + srcId(i->src(2), 32 + 17); +} + +void +CodeEmitterNVC0::emitMEMBAR(const Instruction *i) +{ + switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) { + case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break; + case NV50_IR_SUBOP_MEMBAR_GL: code[0] = 0x25; break; + default: + code[0] = 0x45; + assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS); + break; + } + code[1] = 0xe0000000; + + emitPredicate(i); +} + +void +CodeEmitterNVC0::emitCCTL(const Instruction *i) +{ + code[0] = 0x00000005 | (i->subOp << 5); + + if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { + code[1] = 0x98000000; + srcAddr32(i->src(0), 28, 2); + } else { + code[1] = 0xd0000000; + setAddress24(i->src(0)); + } + if (uses64bitAddress(i)) + code[1] |= 1 << 26; + srcId(i->src(0).getIndirect(0), 20); + + emitPredicate(i); + + defId(i, 0, 14); +} + +void +CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp) +{ + uint8_t m; + switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) { + case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break; + case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break; + case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break; + case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break; + case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break; + case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break; + case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break; + case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break; + case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break; + case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break; + case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break; + case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break; + case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break; + case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break; + case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break; + default: + return; + } + code[0] |= m << 5; + if (subOp & NV50_IR_SUBOP_SUCLAMP_2D) + code[1] |= 1 << 16; +} + +void +CodeEmitterNVC0::emitSUCalc(Instruction *i) +{ + ImmediateValue *imm = NULL; + uint64_t opc; + + if (i->srcExists(2)) { + imm = i->getSrc(2)->asImm(); + if (imm) + i->setSrc(2, NULL); // special case, make emitForm_A not assert + } + + switch (i->op) { + case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break; + case OP_SUBFM: opc = HEX64(5c000000, 00000004); break; + case OP_SUEAU: opc = HEX64(60000000, 00000004); break; + default: + assert(0); + return; + } + emitForm_A(i, opc); + + if (i->op == OP_SUCLAMP) { + if (i->dType == TYPE_S32) + code[0] |= 1 << 9; + emitSUCLAMPMode(i->subOp); + } + + if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D) + code[1] |= 1 << 16; + + if (i->op != OP_SUEAU) { + if (i->def(0).getFile() == FILE_PREDICATE) { // p, # + code[0] |= 63 << 14; + code[1] |= i->getDef(0)->reg.data.id << 23; + } else + if (i->defExists(1)) { // r, p + assert(i->def(1).getFile() == FILE_PREDICATE); + code[1] |= i->getDef(1)->reg.data.id << 23; + } else { // r, # + code[1] |= 7 << 23; + } + } + if (imm) { + assert(i->op == OP_SUCLAMP); + i->setSrc(2, imm); + code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6 + } +} + +void +CodeEmitterNVC0::emitSUGType(DataType ty) +{ + switch (ty) { + case TYPE_S32: code[1] |= 1 << 13; break; + case TYPE_U8: code[1] |= 2 << 13; break; + case TYPE_S8: code[1] |= 3 << 13; break; + default: + assert(ty == TYPE_U32); + break; + } +} + +void +CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s) +{ + const uint32_t offset = i->getSrc(s)->reg.data.offset; + + assert(i->src(s).getFile() == FILE_MEMORY_CONST); + assert(offset == (offset & 0xfffc)); + + code[1] |= 1 << 21; + code[0] |= offset << 24; + code[1] |= offset >> 8; + code[1] |= i->getSrc(s)->reg.fileIndex << 8; +} + +void +CodeEmitterNVC0::setSUPred(const Instruction *i, const int s) +{ + if (!i->srcExists(s) || (i->predSrc == s)) { + code[1] |= 0x7 << 17; + } else { + if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT)) + code[1] |= 1 << 20; + srcId(i->src(s), 32 + 17); + } +} + +void +CodeEmitterNVC0::emitSULDGB(const TexInstruction *i) +{ + code[0] = 0x5; + code[1] = 0xd4000000 | (i->subOp << 15); + + emitLoadStoreType(i->dType); + emitSUGType(i->sType); + emitCachingMode(i->cache); + + emitPredicate(i); + defId(i->def(0), 14); // destination + srcId(i->src(0), 20); // address + // format + if (i->src(1).getFile() == FILE_GPR) + srcId(i->src(1), 26); + else + setSUConst16(i, 1); + setSUPred(i, 2); +} + +void +CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i) +{ + code[0] = 0x5; + code[1] = 0xdc000000 | (i->subOp << 15); + + if (i->op == OP_SUSTP) + code[1] |= i->tex.mask << 22; + else + emitLoadStoreType(i->dType); + emitSUGType(i->sType); + emitCachingMode(i->cache); + + emitPredicate(i); + srcId(i->src(0), 20); // address + // format + if (i->src(1).getFile() == FILE_GPR) + srcId(i->src(1), 26); + else + setSUConst16(i, 1); + srcId(i->src(3), 14); // values + setSUPred(i, 2); +} + +void +CodeEmitterNVC0::emitVectorSubOp(const Instruction *i) +{ + switch (NV50_IR_SUBOP_Vn(i->subOp)) { + case 0: + code[1] |= (i->subOp & 0x000f) << 12; // vsrc1 + code[1] |= (i->subOp & 0x00e0) >> 5; // vsrc2 + code[1] |= (i->subOp & 0x0100) << 7; // vsrc2 + code[1] |= (i->subOp & 0x3c00) << 13; // vdst + break; + case 1: + code[1] |= (i->subOp & 0x000f) << 8; // v2src1 + code[1] |= (i->subOp & 0x0010) << 11; // v2src1 + code[1] |= (i->subOp & 0x01e0) >> 1; // v2src2 + code[1] |= (i->subOp & 0x0200) << 6; // v2src2 + code[1] |= (i->subOp & 0x3c00) << 2; // v4dst + code[1] |= (i->mask & 0x3) << 2; + break; + case 2: + code[1] |= (i->subOp & 0x000f) << 8; // v4src1 + code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2 + code[1] |= (i->subOp & 0x3c00) << 2; // v4dst + code[1] |= (i->mask & 0x3) << 2; + code[1] |= (i->mask & 0xc) << 21; + break; + default: + assert(0); + break; + } +} + +void +CodeEmitterNVC0::emitVSHL(const Instruction *i) +{ + uint64_t opc = 0x4; + + switch (NV50_IR_SUBOP_Vn(i->subOp)) { + case 0: opc |= 0xe8ULL << 56; break; + case 1: opc |= 0xb4ULL << 56; break; + case 2: opc |= 0x94ULL << 56; break; + default: + assert(0); + break; + } + if (NV50_IR_SUBOP_Vn(i->subOp) == 1) { + if (isSignedType(i->dType)) opc |= 1ULL << 0x2a; + if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5); + } else { + if (isSignedType(i->dType)) opc |= 1ULL << 0x39; + if (isSignedType(i->sType)) opc |= 1 << 6; + } + emitForm_A(i, opc); + emitVectorSubOp(i); + + if (i->saturate) + code[0] |= 1 << 9; + if (i->flagsDef >= 0) + code[1] |= 1 << 16; +} + +bool +CodeEmitterNVC0::emitInstruction(Instruction *insn) +{ + unsigned int size = insn->encSize; + + if (writeIssueDelays && !(codeSize & 0x3f)) + size += 8; + + if (!insn->encSize) { + ERROR("skipping unencodable instruction: "); insn->print(); + return false; + } else + if (codeSize + size > codeSizeLimit) { + ERROR("code emitter output buffer too small\n"); + return false; + } + + if (writeIssueDelays) { + if (!(codeSize & 0x3f)) { + code[0] = 0x00000007; // cf issue delay "instruction" + code[1] = 0x20000000; + code += 2; + codeSize += 8; + } + const unsigned int id = (codeSize & 0x3f) / 8 - 1; + uint32_t *data = code - (id * 2 + 2); + if (id <= 2) { + data[0] |= insn->sched << (id * 8 + 4); + } else + if (id == 3) { + data[0] |= insn->sched << 28; + data[1] |= insn->sched >> 4; + } else { + data[1] |= insn->sched << ((id - 4) * 8 + 4); + } + } + + // assert that instructions with multiple defs don't corrupt registers + for (int d = 0; insn->defExists(d); ++d) + assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0); + + switch (insn->op) { + case OP_MOV: + case OP_RDSV: + emitMOV(insn); + break; + case OP_NOP: + break; + case OP_LOAD: + emitLOAD(insn); + break; + case OP_STORE: + emitSTORE(insn); + break; + case OP_LINTERP: + case OP_PINTERP: + emitINTERP(insn); + break; + case OP_VFETCH: + emitVFETCH(insn); + break; + case OP_EXPORT: + emitEXPORT(insn); + break; + case OP_PFETCH: + emitPFETCH(insn); + break; + case OP_EMIT: + case OP_RESTART: + emitOUT(insn); + break; + case OP_ADD: + case OP_SUB: + if (isFloatType(insn->dType)) + emitFADD(insn); + else + emitUADD(insn); + break; + case OP_MUL: + if (isFloatType(insn->dType)) + emitFMUL(insn); + else + emitUMUL(insn); + break; + case OP_MAD: + case OP_FMA: + if (isFloatType(insn->dType)) + emitFMAD(insn); + else + emitIMAD(insn); + break; + case OP_SAD: + emitISAD(insn); + break; + case OP_NOT: + emitNOT(insn); + break; + case OP_AND: + emitLogicOp(insn, 0); + break; + case OP_OR: + emitLogicOp(insn, 1); + break; + case OP_XOR: + emitLogicOp(insn, 2); + break; + case OP_SHL: + case OP_SHR: + emitShift(insn); + break; + case OP_SET: + case OP_SET_AND: + case OP_SET_OR: + case OP_SET_XOR: + emitSET(insn->asCmp()); + break; + case OP_SELP: + emitSELP(insn); + break; + case OP_SLCT: + emitSLCT(insn->asCmp()); + break; + case OP_MIN: + case OP_MAX: + emitMINMAX(insn); + break; + case OP_ABS: + case OP_NEG: + case OP_CEIL: + case OP_FLOOR: + case OP_TRUNC: + case OP_CVT: + case OP_SAT: + emitCVT(insn); + break; + case OP_RSQ: + emitSFnOp(insn, 5); + break; + case OP_RCP: + emitSFnOp(insn, 4); + break; + case OP_LG2: + emitSFnOp(insn, 3); + break; + case OP_EX2: + emitSFnOp(insn, 2); + break; + case OP_SIN: + emitSFnOp(insn, 1); + break; + case OP_COS: + emitSFnOp(insn, 0); + break; + case OP_PRESIN: + case OP_PREEX2: + emitPreOp(insn); + break; + case OP_TEX: + case OP_TXB: + case OP_TXL: + case OP_TXD: + case OP_TXF: + emitTEX(insn->asTex()); + break; + case OP_TXQ: + emitTXQ(insn->asTex()); + break; + case OP_TEXBAR: + emitTEXBAR(insn); + break; + case OP_SUBFM: + case OP_SUCLAMP: + case OP_SUEAU: + emitSUCalc(insn); + break; + case OP_MADSP: + emitMADSP(insn); + break; + case OP_SULDB: + if (targ->getChipset() >= NVISA_GK104_CHIPSET) + emitSULDGB(insn->asTex()); + else + ERROR("SULDB not yet supported on < nve4\n"); + break; + case OP_SUSTB: + case OP_SUSTP: + if (targ->getChipset() >= NVISA_GK104_CHIPSET) + emitSUSTGx(insn->asTex()); + else + ERROR("SUSTx not yet supported on < nve4\n"); + break; + case OP_ATOM: + emitATOM(insn); + break; + case OP_BRA: + case OP_CALL: + case OP_PRERET: + case OP_RET: + case OP_DISCARD: + case OP_EXIT: + case OP_PRECONT: + case OP_CONT: + case OP_PREBREAK: + case OP_BREAK: + case OP_JOINAT: + case OP_BRKPT: + case OP_QUADON: + case OP_QUADPOP: + emitFlow(insn); + break; + case OP_QUADOP: + emitQUADOP(insn, insn->subOp, insn->lanes); + break; + case OP_DFDX: + emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4); + break; + case OP_DFDY: + emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5); + break; + case OP_POPCNT: + emitPOPC(insn); + break; + case OP_INSBF: + emitINSBF(insn); + break; + case OP_EXTBF: + emitEXTBF(insn); + break; + case OP_PERMT: + emitPERMT(insn); + break; + case OP_JOIN: + emitNOP(insn); + insn->join = 1; + break; + case OP_BAR: + emitBAR(insn); + break; + case OP_MEMBAR: + emitMEMBAR(insn); + break; + case OP_CCTL: + emitCCTL(insn); + break; + case OP_VSHL: + emitVSHL(insn); + break; + case OP_PHI: + case OP_UNION: + case OP_CONSTRAINT: + ERROR("operation should have been eliminated"); + return false; + case OP_EXP: + case OP_LOG: + case OP_SQRT: + case OP_POW: + ERROR("operation should have been lowered\n"); + return false; + default: + ERROR("unknow op\n"); + return false; + } + + if (insn->join) { + code[0] |= 0x10; + assert(insn->encSize == 8); + } + + code += insn->encSize / 4; + codeSize += insn->encSize; + return true; +} + +uint32_t +CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const +{ + const Target::OpInfo &info = targ->getOpInfo(i); + + if (writeIssueDelays || info.minEncSize == 8 || 1) + return 8; + + if (i->ftz || i->saturate || i->join) + return 8; + if (i->rnd != ROUND_N) + return 8; + if (i->predSrc >= 0 && i->op == OP_MAD) + return 8; + + if (i->op == OP_PINTERP) { + if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work + return 8; + } else + if (i->op == OP_MOV && i->lanes != 0xf) { + return 8; + } + + for (int s = 0; i->srcExists(s); ++s) { + if (i->src(s).isIndirect(0)) + return 8; + + if (i->src(s).getFile() == FILE_MEMORY_CONST) { + if (SDATA(i->src(s)).offset >= 0x100) + return 8; + if (i->getSrc(s)->reg.fileIndex > 1 && + i->getSrc(s)->reg.fileIndex != 16) + return 8; + } else + if (i->src(s).getFile() == FILE_IMMEDIATE) { + if (i->dType == TYPE_F32) { + if (SDATA(i->src(s)).u32 >= 0x100) + return 8; + } else { + if (SDATA(i->src(s)).u32 > 0xff) + return 8; + } + } + + if (i->op == OP_CVT) + continue; + if (i->src(s).mod != Modifier(0)) { + if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS)) + if (i->op != OP_RSQ) + return 8; + if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG)) + if (i->op != OP_ADD || s != 0) + return 8; + } + } + + return 4; +} + +// Simplified, erring on safe side. +class SchedDataCalculator : public Pass +{ +public: + SchedDataCalculator(const Target *targ) : targ(targ) { } + +private: + struct RegScores + { + struct Resource { + int st[DATA_FILE_COUNT]; // LD to LD delay 3 + int ld[DATA_FILE_COUNT]; // ST to ST delay 3 + int tex; // TEX to non-TEX delay 17 (0x11) + int sfu; // SFU to SFU delay 3 (except PRE-ops) + int imul; // integer MUL to MUL delay 3 + } res; + struct ScoreData { + int r[64]; + int p[8]; + int c; + } rd, wr; + int base; + + void rebase(const int base) + { + const int delta = this->base - base; + if (!delta) + return; + this->base = 0; + + for (int i = 0; i < 64; ++i) { + rd.r[i] += delta; + wr.r[i] += delta; + } + for (int i = 0; i < 8; ++i) { + rd.p[i] += delta; + wr.p[i] += delta; + } + rd.c += delta; + wr.c += delta; + + for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) { + res.ld[f] += delta; + res.st[f] += delta; + } + res.sfu += delta; + res.imul += delta; + res.tex += delta; + } + void wipe() + { + memset(&rd, 0, sizeof(rd)); + memset(&wr, 0, sizeof(wr)); + memset(&res, 0, sizeof(res)); + } + int getLatest(const ScoreData& d) const + { + int max = 0; + for (int i = 0; i < 64; ++i) + if (d.r[i] > max) + max = d.r[i]; + for (int i = 0; i < 8; ++i) + if (d.p[i] > max) + max = d.p[i]; + if (d.c > max) + max = d.c; + return max; + } + inline int getLatestRd() const + { + return getLatest(rd); + } + inline int getLatestWr() const + { + return getLatest(wr); + } + inline int getLatest() const + { + const int a = getLatestRd(); + const int b = getLatestWr(); + + int max = MAX2(a, b); + for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) { + max = MAX2(res.ld[f], max); + max = MAX2(res.st[f], max); + } + max = MAX2(res.sfu, max); + max = MAX2(res.imul, max); + max = MAX2(res.tex, max); + return max; + } + void setMax(const RegScores *that) + { + for (int i = 0; i < 64; ++i) { + rd.r[i] = MAX2(rd.r[i], that->rd.r[i]); + wr.r[i] = MAX2(wr.r[i], that->wr.r[i]); + } + for (int i = 0; i < 8; ++i) { + rd.p[i] = MAX2(rd.p[i], that->rd.p[i]); + wr.p[i] = MAX2(wr.p[i], that->wr.p[i]); + } + rd.c = MAX2(rd.c, that->rd.c); + wr.c = MAX2(wr.c, that->wr.c); + + for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) { + res.ld[f] = MAX2(res.ld[f], that->res.ld[f]); + res.st[f] = MAX2(res.st[f], that->res.st[f]); + } + res.sfu = MAX2(res.sfu, that->res.sfu); + res.imul = MAX2(res.imul, that->res.imul); + res.tex = MAX2(res.tex, that->res.tex); + } + void print(int cycle) + { + for (int i = 0; i < 64; ++i) { + if (rd.r[i] > cycle) + INFO("rd $r%i @ %i\n", i, rd.r[i]); + if (wr.r[i] > cycle) + INFO("wr $r%i @ %i\n", i, wr.r[i]); + } + for (int i = 0; i < 8; ++i) { + if (rd.p[i] > cycle) + INFO("rd $p%i @ %i\n", i, rd.p[i]); + if (wr.p[i] > cycle) + INFO("wr $p%i @ %i\n", i, wr.p[i]); + } + if (rd.c > cycle) + INFO("rd $c @ %i\n", rd.c); + if (wr.c > cycle) + INFO("wr $c @ %i\n", wr.c); + if (res.sfu > cycle) + INFO("sfu @ %i\n", res.sfu); + if (res.imul > cycle) + INFO("imul @ %i\n", res.imul); + if (res.tex > cycle) + INFO("tex @ %i\n", res.tex); + } + }; + + RegScores *score; // for current BB + std::vector<RegScores> scoreBoards; + int cycle; + int prevData; + operation prevOp; + + const Target *targ; + + bool visit(Function *); + bool visit(BasicBlock *); + + void commitInsn(const Instruction *, int cycle); + int calcDelay(const Instruction *, int cycle) const; + void setDelay(Instruction *, int delay, Instruction *next); + + void recordRd(const Value *, const int ready); + void recordWr(const Value *, const int ready); + void checkRd(const Value *, int cycle, int& delay) const; + void checkWr(const Value *, int cycle, int& delay) const; + + int getCycles(const Instruction *, int origDelay) const; +}; + +void +SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next) +{ + if (insn->op == OP_EXIT || insn->op == OP_RET) + delay = MAX2(delay, 14); + + if (insn->op == OP_TEXBAR) { + // TODO: except if results not used before EXIT + insn->sched = 0xc2; + } else + if (insn->op == OP_JOIN || insn->join) { + insn->sched = 0x00; + } else + if (delay >= 0 || prevData == 0x04 || + !next || !targ->canDualIssue(insn, next)) { + insn->sched = static_cast<uint8_t>(MAX2(delay, 0)); + if (prevOp == OP_EXPORT) + insn->sched |= 0x40; + else + insn->sched |= 0x20; + } else { + insn->sched = 0x04; // dual-issue + } + + if (prevData != 0x04 || prevOp != OP_EXPORT) + if (insn->sched != 0x04 || insn->op == OP_EXPORT) + prevOp = insn->op; + + prevData = insn->sched; +} + +int +SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const +{ + if (insn->sched & 0x80) { + int c = (insn->sched & 0x0f) * 2 + 1; + if (insn->op == OP_TEXBAR && origDelay > 0) + c += origDelay; + return c; + } + if (insn->sched & 0x60) + return (insn->sched & 0x1f) + 1; + return (insn->sched == 0x04) ? 0 : 32; +} + +bool +SchedDataCalculator::visit(Function *func) +{ + scoreBoards.resize(func->cfg.getSize()); + for (size_t i = 0; i < scoreBoards.size(); ++i) + scoreBoards[i].wipe(); + return true; +} + +bool +SchedDataCalculator::visit(BasicBlock *bb) +{ + Instruction *insn; + Instruction *next = NULL; + + int cycle = 0; + + prevData = 0x00; + prevOp = OP_NOP; + score = &scoreBoards.at(bb->getId()); + + for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { + // back branches will wait until all target dependencies are satisfied + if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized + continue; + BasicBlock *in = BasicBlock::get(ei.getNode()); + if (in->getExit()) { + if (prevData != 0x04) + prevData = in->getExit()->sched; + prevOp = in->getExit()->op; + } + score->setMax(&scoreBoards.at(in->getId())); + } + if (bb->cfg.incidentCount() > 1) + prevOp = OP_NOP; + +#ifdef NVC0_DEBUG_SCHED_DATA + INFO("=== BB:%i initial scores\n", bb->getId()); + score->print(cycle); +#endif + + for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) { + next = insn->next; + + commitInsn(insn, cycle); + int delay = calcDelay(next, cycle); + setDelay(insn, delay, next); + cycle += getCycles(insn, delay); + +#ifdef NVC0_DEBUG_SCHED_DATA + INFO("cycle %i, sched %02x\n", cycle, insn->sched); + insn->print(); + next->print(); +#endif + } + if (!insn) + return true; + commitInsn(insn, cycle); + + int bbDelay = -1; + + for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { + BasicBlock *out = BasicBlock::get(ei.getNode()); + + if (ei.getType() != Graph::Edge::BACK) { + // only test the first instruction of the outgoing block + next = out->getEntry(); + if (next) + bbDelay = MAX2(bbDelay, calcDelay(next, cycle)); + } else { + // wait until all dependencies are satisfied + const int regsFree = score->getLatest(); + next = out->getFirst(); + for (int c = cycle; next && c < regsFree; next = next->next) { + bbDelay = MAX2(bbDelay, calcDelay(next, c)); + c += getCycles(next, bbDelay); + } + next = NULL; + } + } + if (bb->cfg.outgoingCount() != 1) + next = NULL; + setDelay(insn, bbDelay, next); + cycle += getCycles(insn, bbDelay); + + score->rebase(cycle); // common base for initializing out blocks' scores + return true; +} + +#define NVE4_MAX_ISSUE_DELAY 0x1f +int +SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const +{ + int delay = 0, ready = cycle; + + for (int s = 0; insn->srcExists(s); ++s) + checkRd(insn->getSrc(s), cycle, delay); + // WAR & WAW don't seem to matter + // for (int s = 0; insn->srcExists(s); ++s) + // recordRd(insn->getSrc(s), cycle); + + switch (Target::getOpClass(insn->op)) { + case OPCLASS_SFU: + ready = score->res.sfu; + break; + case OPCLASS_ARITH: + if (insn->op == OP_MUL && !isFloatType(insn->dType)) + ready = score->res.imul; + break; + case OPCLASS_TEXTURE: + ready = score->res.tex; + break; + case OPCLASS_LOAD: + ready = score->res.ld[insn->src(0).getFile()]; + break; + case OPCLASS_STORE: + ready = score->res.st[insn->src(0).getFile()]; + break; + default: + break; + } + if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE) + ready = MAX2(ready, score->res.tex); + + delay = MAX2(delay, ready - cycle); + + // if can issue next cycle, delay is 0, not 1 + return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY); +} + +void +SchedDataCalculator::commitInsn(const Instruction *insn, int cycle) +{ + const int ready = cycle + targ->getLatency(insn); + + for (int d = 0; insn->defExists(d); ++d) + recordWr(insn->getDef(d), ready); + // WAR & WAW don't seem to matter + // for (int s = 0; insn->srcExists(s); ++s) + // recordRd(insn->getSrc(s), cycle); + + switch (Target::getOpClass(insn->op)) { + case OPCLASS_SFU: + score->res.sfu = cycle + 4; + break; + case OPCLASS_ARITH: + if (insn->op == OP_MUL && !isFloatType(insn->dType)) + score->res.imul = cycle + 4; + break; + case OPCLASS_TEXTURE: + score->res.tex = cycle + 18; + break; + case OPCLASS_LOAD: + if (insn->src(0).getFile() == FILE_MEMORY_CONST) + break; + score->res.ld[insn->src(0).getFile()] = cycle + 4; + score->res.st[insn->src(0).getFile()] = ready; + break; + case OPCLASS_STORE: + score->res.st[insn->src(0).getFile()] = cycle + 4; + score->res.ld[insn->src(0).getFile()] = ready; + break; + case OPCLASS_OTHER: + if (insn->op == OP_TEXBAR) + score->res.tex = cycle; + break; + default: + break; + } + +#ifdef NVC0_DEBUG_SCHED_DATA + score->print(cycle); +#endif +} + +void +SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const +{ + int ready = cycle; + int a, b; + + switch (v->reg.file) { + case FILE_GPR: + a = v->reg.data.id; + b = a + v->reg.size / 4; + for (int r = a; r < b; ++r) + ready = MAX2(ready, score->rd.r[r]); + break; + case FILE_PREDICATE: + ready = MAX2(ready, score->rd.p[v->reg.data.id]); + break; + case FILE_FLAGS: + ready = MAX2(ready, score->rd.c); + break; + case FILE_SHADER_INPUT: + case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs + case FILE_MEMORY_LOCAL: + case FILE_MEMORY_CONST: + case FILE_MEMORY_SHARED: + case FILE_MEMORY_GLOBAL: + case FILE_SYSTEM_VALUE: + // TODO: any restrictions here ? + break; + case FILE_IMMEDIATE: + break; + default: + assert(0); + break; + } + if (cycle < ready) + delay = MAX2(delay, ready - cycle); +} + +void +SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const +{ + int ready = cycle; + int a, b; + + switch (v->reg.file) { + case FILE_GPR: + a = v->reg.data.id; + b = a + v->reg.size / 4; + for (int r = a; r < b; ++r) + ready = MAX2(ready, score->wr.r[r]); + break; + case FILE_PREDICATE: + ready = MAX2(ready, score->wr.p[v->reg.data.id]); + break; + default: + assert(v->reg.file == FILE_FLAGS); + ready = MAX2(ready, score->wr.c); + break; + } + if (cycle < ready) + delay = MAX2(delay, ready - cycle); +} + +void +SchedDataCalculator::recordWr(const Value *v, const int ready) +{ + int a = v->reg.data.id; + + if (v->reg.file == FILE_GPR) { + int b = a + v->reg.size / 4; + for (int r = a; r < b; ++r) + score->rd.r[r] = ready; + } else + // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry) + if (v->reg.file == FILE_PREDICATE) { + score->rd.p[a] = ready + 4; + } else { + assert(v->reg.file == FILE_FLAGS); + score->rd.c = ready + 4; + } +} + +void +SchedDataCalculator::recordRd(const Value *v, const int ready) +{ + int a = v->reg.data.id; + + if (v->reg.file == FILE_GPR) { + int b = a + v->reg.size / 4; + for (int r = a; r < b; ++r) + score->wr.r[r] = ready; + } else + if (v->reg.file == FILE_PREDICATE) { + score->wr.p[a] = ready; + } else + if (v->reg.file == FILE_FLAGS) { + score->wr.c = ready; + } +} + +bool +calculateSchedDataNVC0(const Target *targ, Function *func) +{ + SchedDataCalculator sched(targ); + return sched.run(func, true, true); +} + +void +CodeEmitterNVC0::prepareEmission(Function *func) +{ + CodeEmitter::prepareEmission(func); + + if (targ->hasSWSched) + calculateSchedDataNVC0(targ, func); +} + +CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target) + : CodeEmitter(target), + targNVC0(target), + writeIssueDelays(target->hasSWSched) +{ + code = NULL; + codeSize = codeSizeLimit = 0; + relocInfo = NULL; +} + +CodeEmitter * +TargetNVC0::createCodeEmitterNVC0(Program::Type type) +{ + CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this); + emit->setProgramType(type); + return emit; +} + +CodeEmitter * +TargetNVC0::getCodeEmitter(Program::Type type) +{ + if (chipset >= NVISA_GK110_CHIPSET) + return createCodeEmitterGK110(type); + return createCodeEmitterNVC0(type); +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp new file mode 100644 index 00000000000..3193ea668a3 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -0,0 +1,2852 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +extern "C" { +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_util.h" +} + +#include <set> + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_util.h" +#include "codegen/nv50_ir_build_util.h" + +namespace tgsi { + +class Source; + +static nv50_ir::operation translateOpcode(uint opcode); +static nv50_ir::DataFile translateFile(uint file); +static nv50_ir::TexTarget translateTexture(uint texTarg); +static nv50_ir::SVSemantic translateSysVal(uint sysval); + +class Instruction +{ +public: + Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { } + + class SrcRegister + { + public: + SrcRegister(const struct tgsi_full_src_register *src) + : reg(src->Register), + fsr(src) + { } + + SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { } + + SrcRegister(const struct tgsi_ind_register& ind) + : reg(tgsi_util_get_src_from_ind(&ind)), + fsr(NULL) + { } + + struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off) + { + struct tgsi_src_register reg; + memset(®, 0, sizeof(reg)); + reg.Index = off.Index; + reg.File = off.File; + reg.SwizzleX = off.SwizzleX; + reg.SwizzleY = off.SwizzleY; + reg.SwizzleZ = off.SwizzleZ; + return reg; + } + + SrcRegister(const struct tgsi_texture_offset& off) : + reg(offsetToSrc(off)), + fsr(NULL) + { } + + uint getFile() const { return reg.File; } + + bool is2D() const { return reg.Dimension; } + + bool isIndirect(int dim) const + { + return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect; + } + + int getIndex(int dim) const + { + return (dim && fsr) ? fsr->Dimension.Index : reg.Index; + } + + int getSwizzle(int chan) const + { + return tgsi_util_get_src_register_swizzle(®, chan); + } + + nv50_ir::Modifier getMod(int chan) const; + + SrcRegister getIndirect(int dim) const + { + assert(fsr && isIndirect(dim)); + if (dim) + return SrcRegister(fsr->DimIndirect); + return SrcRegister(fsr->Indirect); + } + + uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const + { + assert(reg.File == TGSI_FILE_IMMEDIATE); + assert(!reg.Absolute); + assert(!reg.Negate); + return info->immd.data[reg.Index * 4 + getSwizzle(c)]; + } + + private: + const struct tgsi_src_register reg; + const struct tgsi_full_src_register *fsr; + }; + + class DstRegister + { + public: + DstRegister(const struct tgsi_full_dst_register *dst) + : reg(dst->Register), + fdr(dst) + { } + + DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { } + + uint getFile() const { return reg.File; } + + bool is2D() const { return reg.Dimension; } + + bool isIndirect(int dim) const + { + return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect; + } + + int getIndex(int dim) const + { + return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index; + } + + unsigned int getMask() const { return reg.WriteMask; } + + bool isMasked(int chan) const { return !(getMask() & (1 << chan)); } + + SrcRegister getIndirect(int dim) const + { + assert(fdr && isIndirect(dim)); + if (dim) + return SrcRegister(fdr->DimIndirect); + return SrcRegister(fdr->Indirect); + } + + private: + const struct tgsi_dst_register reg; + const struct tgsi_full_dst_register *fdr; + }; + + inline uint getOpcode() const { return insn->Instruction.Opcode; } + + unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; } + unsigned int dstCount() const { return insn->Instruction.NumDstRegs; } + + // mask of used components of source s + unsigned int srcMask(unsigned int s) const; + + SrcRegister getSrc(unsigned int s) const + { + assert(s < srcCount()); + return SrcRegister(&insn->Src[s]); + } + + DstRegister getDst(unsigned int d) const + { + assert(d < dstCount()); + return DstRegister(&insn->Dst[d]); + } + + SrcRegister getTexOffset(unsigned int i) const + { + assert(i < TGSI_FULL_MAX_TEX_OFFSETS); + return SrcRegister(insn->TexOffsets[i]); + } + + unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; } + + bool checkDstSrcAliasing() const; + + inline nv50_ir::operation getOP() const { + return translateOpcode(getOpcode()); } + + nv50_ir::DataType inferSrcType() const; + nv50_ir::DataType inferDstType() const; + + nv50_ir::CondCode getSetCond() const; + + nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const; + + inline uint getLabel() { return insn->Label.Label; } + + unsigned getSaturate() const { return insn->Instruction.Saturate; } + + void print() const + { + tgsi_dump_instruction(insn, 1); + } + +private: + const struct tgsi_full_instruction *insn; +}; + +unsigned int Instruction::srcMask(unsigned int s) const +{ + unsigned int mask = insn->Dst[0].Register.WriteMask; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); + case TGSI_OPCODE_DP2: + return 0x3; + case TGSI_OPCODE_DP3: + return 0x7; + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */ + return 0xf; + case TGSI_OPCODE_DST: + return mask & (s ? 0xa : 0x6); + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_LOG: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SCS: + return 0x1; + case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: + return 0x1; + case TGSI_OPCODE_LIT: + return 0xb; + case TGSI_OPCODE_TEX2: + case TGSI_OPCODE_TXB2: + case TGSI_OPCODE_TXL2: + return (s == 0) ? 0xf : 0x3; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXD: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: + { + const struct tgsi_instruction_texture *tex = &insn->Texture; + + assert(insn->Instruction.Texture); + + mask = 0x7; + if (insn->Instruction.Opcode != TGSI_OPCODE_TEX && + insn->Instruction.Opcode != TGSI_OPCODE_TXD) + mask |= 0x8; /* bias, lod or proj */ + + switch (tex->Texture) { + case TGSI_TEXTURE_1D: + mask &= 0x9; + break; + case TGSI_TEXTURE_SHADOW1D: + mask &= 0xd; + break; + case TGSI_TEXTURE_1D_ARRAY: + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + mask &= 0xb; + break; + case TGSI_TEXTURE_CUBE_ARRAY: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + case TGSI_TEXTURE_SHADOWCUBE: + case TGSI_TEXTURE_SHADOWCUBE_ARRAY: + mask |= 0x8; + break; + default: + break; + } + } + return mask; + case TGSI_OPCODE_XPD: + { + unsigned int x = 0; + if (mask & 1) x |= 0x6; + if (mask & 2) x |= 0x5; + if (mask & 4) x |= 0x3; + return x; + } + default: + break; + } + + return mask; +} + +nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const +{ + nv50_ir::Modifier m(0); + + if (reg.Absolute) + m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS); + if (reg.Negate) + m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG); + return m; +} + +static nv50_ir::DataFile translateFile(uint file) +{ + switch (file) { + case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST; + case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT; + case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT; + case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR; + case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS; + case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; + case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; + case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; + case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL; + case TGSI_FILE_SAMPLER: + case TGSI_FILE_NULL: + default: + return nv50_ir::FILE_NULL; + } +} + +static nv50_ir::SVSemantic translateSysVal(uint sysval) +{ + switch (sysval) { + case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE; + case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE; + case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID; + case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID; + case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID; + case TGSI_SEMANTIC_GRID_SIZE: return nv50_ir::SV_NCTAID; + case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID; + case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID; + case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID; + default: + assert(0); + return nv50_ir::SV_CLOCK; + } +} + +#define NV50_IR_TEX_TARG_CASE(a, b) \ + case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b; + +static nv50_ir::TexTarget translateTexture(uint tex) +{ + switch (tex) { + NV50_IR_TEX_TARG_CASE(1D, 1D); + NV50_IR_TEX_TARG_CASE(2D, 2D); + NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS); + NV50_IR_TEX_TARG_CASE(3D, 3D); + NV50_IR_TEX_TARG_CASE(CUBE, CUBE); + NV50_IR_TEX_TARG_CASE(RECT, RECT); + NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY); + NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY); + NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY); + NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY); + NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW); + NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW); + NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW); + NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW); + NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW); + NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW); + NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW); + NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER); + + case TGSI_TEXTURE_UNKNOWN: + default: + assert(!"invalid texture target"); + return nv50_ir::TEX_TARGET_2D; + } +} + +nv50_ir::DataType Instruction::inferSrcType() const +{ + switch (getOpcode()) { + case TGSI_OPCODE_UIF: + case TGSI_OPCODE_AND: + case TGSI_OPCODE_OR: + case TGSI_OPCODE_XOR: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_U2F: + case TGSI_OPCODE_UADD: + case TGSI_OPCODE_UDIV: + case TGSI_OPCODE_UMOD: + case TGSI_OPCODE_UMAD: + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_UMAX: + case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: + case TGSI_OPCODE_USHR: + case TGSI_OPCODE_UCMP: + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMUMAX: + return nv50_ir::TYPE_U32; + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_IDIV: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_IABS: + case TGSI_OPCODE_INEG: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_ISSG: + case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version + case TGSI_OPCODE_MOD: + case TGSI_OPCODE_UARL: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMIMAX: + return nv50_ir::TYPE_S32; + default: + return nv50_ir::TYPE_F32; + } +} + +nv50_ir::DataType Instruction::inferDstType() const +{ + switch (getOpcode()) { + case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32; + case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32; + case TGSI_OPCODE_FSEQ: + case TGSI_OPCODE_FSGE: + case TGSI_OPCODE_FSLT: + case TGSI_OPCODE_FSNE: + return nv50_ir::TYPE_U32; + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_U2F: + return nv50_ir::TYPE_F32; + default: + return inferSrcType(); + } +} + +nv50_ir::CondCode Instruction::getSetCond() const +{ + using namespace nv50_ir; + + switch (getOpcode()) { + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_FSLT: + return CC_LT; + case TGSI_OPCODE_SLE: + return CC_LE; + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_FSGE: + return CC_GE; + case TGSI_OPCODE_SGT: + return CC_GT; + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_FSEQ: + return CC_EQ; + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_FSNE: + return CC_NEU; + case TGSI_OPCODE_USNE: + return CC_NE; + case TGSI_OPCODE_SFL: + return CC_NEVER; + case TGSI_OPCODE_STR: + default: + return CC_ALWAYS; + } +} + +#define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b + +static nv50_ir::operation translateOpcode(uint opcode) +{ + switch (opcode) { + NV50_IR_OPCODE_CASE(ARL, SHL); + NV50_IR_OPCODE_CASE(MOV, MOV); + + NV50_IR_OPCODE_CASE(RCP, RCP); + NV50_IR_OPCODE_CASE(RSQ, RSQ); + + NV50_IR_OPCODE_CASE(MUL, MUL); + NV50_IR_OPCODE_CASE(ADD, ADD); + + NV50_IR_OPCODE_CASE(MIN, MIN); + NV50_IR_OPCODE_CASE(MAX, MAX); + NV50_IR_OPCODE_CASE(SLT, SET); + NV50_IR_OPCODE_CASE(SGE, SET); + NV50_IR_OPCODE_CASE(MAD, MAD); + NV50_IR_OPCODE_CASE(SUB, SUB); + + NV50_IR_OPCODE_CASE(FLR, FLOOR); + NV50_IR_OPCODE_CASE(ROUND, CVT); + NV50_IR_OPCODE_CASE(EX2, EX2); + NV50_IR_OPCODE_CASE(LG2, LG2); + NV50_IR_OPCODE_CASE(POW, POW); + + NV50_IR_OPCODE_CASE(ABS, ABS); + + NV50_IR_OPCODE_CASE(COS, COS); + NV50_IR_OPCODE_CASE(DDX, DFDX); + NV50_IR_OPCODE_CASE(DDY, DFDY); + NV50_IR_OPCODE_CASE(KILL, DISCARD); + + NV50_IR_OPCODE_CASE(SEQ, SET); + NV50_IR_OPCODE_CASE(SFL, SET); + NV50_IR_OPCODE_CASE(SGT, SET); + NV50_IR_OPCODE_CASE(SIN, SIN); + NV50_IR_OPCODE_CASE(SLE, SET); + NV50_IR_OPCODE_CASE(SNE, SET); + NV50_IR_OPCODE_CASE(STR, SET); + NV50_IR_OPCODE_CASE(TEX, TEX); + NV50_IR_OPCODE_CASE(TXD, TXD); + NV50_IR_OPCODE_CASE(TXP, TEX); + + NV50_IR_OPCODE_CASE(BRA, BRA); + NV50_IR_OPCODE_CASE(CAL, CALL); + NV50_IR_OPCODE_CASE(RET, RET); + NV50_IR_OPCODE_CASE(CMP, SLCT); + + NV50_IR_OPCODE_CASE(TXB, TXB); + + NV50_IR_OPCODE_CASE(DIV, DIV); + + NV50_IR_OPCODE_CASE(TXL, TXL); + + NV50_IR_OPCODE_CASE(CEIL, CEIL); + NV50_IR_OPCODE_CASE(I2F, CVT); + NV50_IR_OPCODE_CASE(NOT, NOT); + NV50_IR_OPCODE_CASE(TRUNC, TRUNC); + NV50_IR_OPCODE_CASE(SHL, SHL); + + NV50_IR_OPCODE_CASE(AND, AND); + NV50_IR_OPCODE_CASE(OR, OR); + NV50_IR_OPCODE_CASE(MOD, MOD); + NV50_IR_OPCODE_CASE(XOR, XOR); + NV50_IR_OPCODE_CASE(SAD, SAD); + NV50_IR_OPCODE_CASE(TXF, TXF); + NV50_IR_OPCODE_CASE(TXQ, TXQ); + + NV50_IR_OPCODE_CASE(EMIT, EMIT); + NV50_IR_OPCODE_CASE(ENDPRIM, RESTART); + + NV50_IR_OPCODE_CASE(KILL_IF, DISCARD); + + NV50_IR_OPCODE_CASE(F2I, CVT); + NV50_IR_OPCODE_CASE(FSEQ, SET); + NV50_IR_OPCODE_CASE(FSGE, SET); + NV50_IR_OPCODE_CASE(FSLT, SET); + NV50_IR_OPCODE_CASE(FSNE, SET); + NV50_IR_OPCODE_CASE(IDIV, DIV); + NV50_IR_OPCODE_CASE(IMAX, MAX); + NV50_IR_OPCODE_CASE(IMIN, MIN); + NV50_IR_OPCODE_CASE(IABS, ABS); + NV50_IR_OPCODE_CASE(INEG, NEG); + NV50_IR_OPCODE_CASE(ISGE, SET); + NV50_IR_OPCODE_CASE(ISHR, SHR); + NV50_IR_OPCODE_CASE(ISLT, SET); + NV50_IR_OPCODE_CASE(F2U, CVT); + NV50_IR_OPCODE_CASE(U2F, CVT); + NV50_IR_OPCODE_CASE(UADD, ADD); + NV50_IR_OPCODE_CASE(UDIV, DIV); + NV50_IR_OPCODE_CASE(UMAD, MAD); + NV50_IR_OPCODE_CASE(UMAX, MAX); + NV50_IR_OPCODE_CASE(UMIN, MIN); + NV50_IR_OPCODE_CASE(UMOD, MOD); + NV50_IR_OPCODE_CASE(UMUL, MUL); + NV50_IR_OPCODE_CASE(USEQ, SET); + NV50_IR_OPCODE_CASE(USGE, SET); + NV50_IR_OPCODE_CASE(USHR, SHR); + NV50_IR_OPCODE_CASE(USLT, SET); + NV50_IR_OPCODE_CASE(USNE, SET); + + NV50_IR_OPCODE_CASE(SAMPLE, TEX); + NV50_IR_OPCODE_CASE(SAMPLE_B, TXB); + NV50_IR_OPCODE_CASE(SAMPLE_C, TEX); + NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX); + NV50_IR_OPCODE_CASE(SAMPLE_D, TXD); + NV50_IR_OPCODE_CASE(SAMPLE_L, TXL); + NV50_IR_OPCODE_CASE(SAMPLE_I, TXF); + NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF); + NV50_IR_OPCODE_CASE(GATHER4, TXG); + NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ); + + NV50_IR_OPCODE_CASE(ATOMUADD, ATOM); + NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM); + NV50_IR_OPCODE_CASE(ATOMCAS, ATOM); + NV50_IR_OPCODE_CASE(ATOMAND, ATOM); + NV50_IR_OPCODE_CASE(ATOMOR, ATOM); + NV50_IR_OPCODE_CASE(ATOMXOR, ATOM); + NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM); + NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM); + NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM); + NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM); + + NV50_IR_OPCODE_CASE(TEX2, TEX); + NV50_IR_OPCODE_CASE(TXB2, TXB); + NV50_IR_OPCODE_CASE(TXL2, TXL); + + NV50_IR_OPCODE_CASE(END, EXIT); + + default: + return nv50_ir::OP_NOP; + } +} + +static uint16_t opcodeToSubOp(uint opcode) +{ + switch (opcode) { + case TGSI_OPCODE_LFENCE: return NV50_IR_SUBOP_MEMBAR(L, GL); + case TGSI_OPCODE_SFENCE: return NV50_IR_SUBOP_MEMBAR(S, GL); + case TGSI_OPCODE_MFENCE: return NV50_IR_SUBOP_MEMBAR(M, GL); + case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD; + case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH; + case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS; + case TGSI_OPCODE_ATOMAND: return NV50_IR_SUBOP_ATOM_AND; + case TGSI_OPCODE_ATOMOR: return NV50_IR_SUBOP_ATOM_OR; + case TGSI_OPCODE_ATOMXOR: return NV50_IR_SUBOP_ATOM_XOR; + case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN; + case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN; + case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX; + case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX; + default: + return 0; + } +} + +bool Instruction::checkDstSrcAliasing() const +{ + if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory + return false; + + for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) { + if (insn->Src[s].Register.File == TGSI_FILE_NULL) + break; + if (insn->Src[s].Register.File == insn->Dst[0].Register.File && + insn->Src[s].Register.Index == insn->Dst[0].Register.Index) + return true; + } + return false; +} + +class Source +{ +public: + Source(struct nv50_ir_prog_info *); + ~Source(); + +public: + bool scanSource(); + unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; } + +public: + struct tgsi_shader_info scan; + struct tgsi_full_instruction *insns; + const struct tgsi_token *tokens; + struct nv50_ir_prog_info *info; + + nv50_ir::DynArray tempArrays; + nv50_ir::DynArray immdArrays; + + typedef nv50_ir::BuildUtil::Location Location; + // these registers are per-subroutine, cannot be used for parameter passing + std::set<Location> locals; + + bool mainTempsInLMem; + + int clipVertexOutput; + + struct TextureView { + uint8_t target; // TGSI_TEXTURE_* + }; + std::vector<TextureView> textureViews; + + struct Resource { + uint8_t target; // TGSI_TEXTURE_* + bool raw; + uint8_t slot; // $surface index + }; + std::vector<Resource> resources; + +private: + int inferSysValDirection(unsigned sn) const; + bool scanDeclaration(const struct tgsi_full_declaration *); + bool scanInstruction(const struct tgsi_full_instruction *); + void scanProperty(const struct tgsi_full_property *); + void scanImmediate(const struct tgsi_full_immediate *); + + inline bool isEdgeFlagPassthrough(const Instruction&) const; +}; + +Source::Source(struct nv50_ir_prog_info *prog) : info(prog) +{ + tokens = (const struct tgsi_token *)info->bin.source; + + if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) + tgsi_dump(tokens, 0); + + mainTempsInLMem = FALSE; +} + +Source::~Source() +{ + if (insns) + FREE(insns); + + if (info->immd.data) + FREE(info->immd.data); + if (info->immd.type) + FREE(info->immd.type); +} + +bool Source::scanSource() +{ + unsigned insnCount = 0; + struct tgsi_parse_context parse; + + tgsi_scan_shader(tokens, &scan); + + insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions * + sizeof(insns[0])); + if (!insns) + return false; + + clipVertexOutput = -1; + + textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1); + resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1); + + info->immd.bufSize = 0; + + info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1; + info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; + info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; + + if (info->type == PIPE_SHADER_FRAGMENT) { + info->prop.fp.writesDepth = scan.writes_z; + info->prop.fp.usesDiscard = scan.uses_kill; + } else + if (info->type == PIPE_SHADER_GEOMETRY) { + info->prop.gp.instanceCount = 1; // default value + } + + info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16); + info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte)); + + tgsi_parse_init(&parse, tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + scanImmediate(&parse.FullToken.FullImmediate); + break; + case TGSI_TOKEN_TYPE_DECLARATION: + scanDeclaration(&parse.FullToken.FullDeclaration); + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + insns[insnCount++] = parse.FullToken.FullInstruction; + scanInstruction(&parse.FullToken.FullInstruction); + break; + case TGSI_TOKEN_TYPE_PROPERTY: + scanProperty(&parse.FullToken.FullProperty); + break; + default: + INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type); + break; + } + } + tgsi_parse_free(&parse); + + if (mainTempsInLMem) + info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16; + + if (info->io.genUserClip > 0) { + info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1; + + const unsigned int nOut = (info->io.genUserClip + 3) / 4; + + for (unsigned int n = 0; n < nOut; ++n) { + unsigned int i = info->numOutputs++; + info->out[i].id = i; + info->out[i].sn = TGSI_SEMANTIC_CLIPDIST; + info->out[i].si = n; + info->out[i].mask = info->io.clipDistanceMask >> (n * 4); + } + } + + return info->assignSlots(info) == 0; +} + +void Source::scanProperty(const struct tgsi_full_property *prop) +{ + switch (prop->Property.PropertyName) { + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + info->prop.gp.outputPrim = prop->u[0].Data; + break; + case TGSI_PROPERTY_GS_INPUT_PRIM: + info->prop.gp.inputPrim = prop->u[0].Data; + break; + case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: + info->prop.gp.maxVertices = prop->u[0].Data; + break; +#if 0 + case TGSI_PROPERTY_GS_INSTANCE_COUNT: + info->prop.gp.instanceCount = prop->u[0].Data; + break; +#endif + case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: + info->prop.fp.separateFragData = TRUE; + break; + case TGSI_PROPERTY_FS_COORD_ORIGIN: + case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: + // we don't care + break; + case TGSI_PROPERTY_VS_PROHIBIT_UCPS: + info->io.genUserClip = -1; + break; + default: + INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); + break; + } +} + +void Source::scanImmediate(const struct tgsi_full_immediate *imm) +{ + const unsigned n = info->immd.count++; + + assert(n < scan.immediate_count); + + for (int c = 0; c < 4; ++c) + info->immd.data[n * 4 + c] = imm->u[c].Uint; + + info->immd.type[n] = imm->Immediate.DataType; +} + +int Source::inferSysValDirection(unsigned sn) const +{ + switch (sn) { + case TGSI_SEMANTIC_INSTANCEID: + case TGSI_SEMANTIC_VERTEXID: + return 1; +#if 0 + case TGSI_SEMANTIC_LAYER: + case TGSI_SEMANTIC_VIEWPORTINDEX: + return 0; +#endif + case TGSI_SEMANTIC_PRIMID: + return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0; + default: + return 0; + } +} + +bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) +{ + unsigned i, c; + unsigned sn = TGSI_SEMANTIC_GENERIC; + unsigned si = 0; + const unsigned first = decl->Range.First, last = decl->Range.Last; + + if (decl->Declaration.Semantic) { + sn = decl->Semantic.Name; + si = decl->Semantic.Index; + } + + if (decl->Declaration.Local) { + for (i = first; i <= last; ++i) { + for (c = 0; c < 4; ++c) { + locals.insert( + Location(decl->Declaration.File, decl->Dim.Index2D, i, c)); + } + } + } + + switch (decl->Declaration.File) { + case TGSI_FILE_INPUT: + if (info->type == PIPE_SHADER_VERTEX) { + // all vertex attributes are equal + for (i = first; i <= last; ++i) { + info->in[i].sn = TGSI_SEMANTIC_GENERIC; + info->in[i].si = i; + } + } else { + for (i = first; i <= last; ++i, ++si) { + info->in[i].id = i; + info->in[i].sn = sn; + info->in[i].si = si; + if (info->type == PIPE_SHADER_FRAGMENT) { + // translate interpolation mode + switch (decl->Interp.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + info->in[i].flat = 1; + break; + case TGSI_INTERPOLATE_COLOR: + info->in[i].sc = 1; + break; + case TGSI_INTERPOLATE_LINEAR: + info->in[i].linear = 1; + break; + default: + break; + } + if (decl->Interp.Centroid) + info->in[i].centroid = 1; + } + } + } + break; + case TGSI_FILE_OUTPUT: + for (i = first; i <= last; ++i, ++si) { + switch (sn) { + case TGSI_SEMANTIC_POSITION: + if (info->type == PIPE_SHADER_FRAGMENT) + info->io.fragDepth = i; + else + if (clipVertexOutput < 0) + clipVertexOutput = i; + break; + case TGSI_SEMANTIC_COLOR: + if (info->type == PIPE_SHADER_FRAGMENT) + info->prop.fp.numColourResults++; + break; + case TGSI_SEMANTIC_EDGEFLAG: + info->io.edgeFlagOut = i; + break; + case TGSI_SEMANTIC_CLIPVERTEX: + clipVertexOutput = i; + break; + case TGSI_SEMANTIC_CLIPDIST: + info->io.clipDistanceMask |= + decl->Declaration.UsageMask << (si * 4); + info->io.genUserClip = -1; + break; + default: + break; + } + info->out[i].id = i; + info->out[i].sn = sn; + info->out[i].si = si; + } + break; + case TGSI_FILE_SYSTEM_VALUE: + switch (sn) { + case TGSI_SEMANTIC_INSTANCEID: + info->io.instanceId = first; + break; + case TGSI_SEMANTIC_VERTEXID: + info->io.vertexId = first; + break; + default: + break; + } + for (i = first; i <= last; ++i, ++si) { + info->sv[i].sn = sn; + info->sv[i].si = si; + info->sv[i].input = inferSysValDirection(sn); + } + break; + case TGSI_FILE_RESOURCE: + for (i = first; i <= last; ++i) { + resources[i].target = decl->Resource.Resource; + resources[i].raw = decl->Resource.Raw; + resources[i].slot = i; + } + break; + case TGSI_FILE_SAMPLER_VIEW: + for (i = first; i <= last; ++i) + textureViews[i].target = decl->SamplerView.Resource; + break; + case TGSI_FILE_NULL: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_ADDRESS: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_IMMEDIATE: + case TGSI_FILE_PREDICATE: + case TGSI_FILE_SAMPLER: + break; + default: + ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File); + return false; + } + return true; +} + +inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const +{ + return insn.getOpcode() == TGSI_OPCODE_MOV && + insn.getDst(0).getIndex(0) == info->io.edgeFlagOut && + insn.getSrc(0).getFile() == TGSI_FILE_INPUT; +} + +bool Source::scanInstruction(const struct tgsi_full_instruction *inst) +{ + Instruction insn(inst); + + if (insn.getOpcode() == TGSI_OPCODE_BARRIER) + info->numBarriers = 1; + + if (insn.dstCount()) { + if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) { + Instruction::DstRegister dst = insn.getDst(0); + + if (dst.isIndirect(0)) + for (unsigned i = 0; i < info->numOutputs; ++i) + info->out[i].mask = 0xf; + else + info->out[dst.getIndex(0)].mask |= dst.getMask(); + + if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE || + info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID || + info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG) + info->out[dst.getIndex(0)].mask &= 1; + + if (isEdgeFlagPassthrough(insn)) + info->io.edgeFlagIn = insn.getSrc(0).getIndex(0); + } else + if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) { + if (insn.getDst(0).isIndirect(0)) + mainTempsInLMem = TRUE; + } + } + + for (unsigned s = 0; s < insn.srcCount(); ++s) { + Instruction::SrcRegister src = insn.getSrc(s); + if (src.getFile() == TGSI_FILE_TEMPORARY) { + if (src.isIndirect(0)) + mainTempsInLMem = TRUE; + } else + if (src.getFile() == TGSI_FILE_RESOURCE) { + if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL) + info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? + 0x1 : 0x2; + } + if (src.getFile() != TGSI_FILE_INPUT) + continue; + unsigned mask = insn.srcMask(s); + + if (src.isIndirect(0)) { + for (unsigned i = 0; i < info->numInputs; ++i) + info->in[i].mask = 0xf; + } else { + const int i = src.getIndex(0); + for (unsigned c = 0; c < 4; ++c) { + if (!(mask & (1 << c))) + continue; + int k = src.getSwizzle(c); + if (k <= TGSI_SWIZZLE_W) + info->in[i].mask |= 1 << k; + } + switch (info->in[i].sn) { + case TGSI_SEMANTIC_PSIZE: + case TGSI_SEMANTIC_PRIMID: + case TGSI_SEMANTIC_FOG: + info->in[i].mask &= 0x1; + break; + case TGSI_SEMANTIC_PCOORD: + info->in[i].mask &= 0x3; + break; + default: + break; + } + } + } + return true; +} + +nv50_ir::TexInstruction::Target +Instruction::getTexture(const tgsi::Source *code, int s) const +{ + // XXX: indirect access + unsigned int r; + + switch (getSrc(s).getFile()) { + case TGSI_FILE_RESOURCE: + r = getSrc(s).getIndex(0); + return translateTexture(code->resources.at(r).target); + case TGSI_FILE_SAMPLER_VIEW: + r = getSrc(s).getIndex(0); + return translateTexture(code->textureViews.at(r).target); + default: + return translateTexture(insn->Texture.Texture); + } +} + +} // namespace tgsi + +namespace { + +using namespace nv50_ir; + +class Converter : public BuildUtil +{ +public: + Converter(Program *, const tgsi::Source *); + ~Converter(); + + bool run(); + +private: + struct Subroutine + { + Subroutine(Function *f) : f(f) { } + Function *f; + ValueMap values; + }; + + Value *getVertexBase(int s); + DataArray *getArrayForFile(unsigned file, int idx); + Value *fetchSrc(int s, int c); + Value *acquireDst(int d, int c); + void storeDst(int d, int c, Value *); + + Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr); + void storeDst(const tgsi::Instruction::DstRegister dst, int c, + Value *val, Value *ptr); + + Value *applySrcMod(Value *, int s, int c); + + Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr); + Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c); + Symbol *dstToSym(tgsi::Instruction::DstRegister, int c); + + bool handleInstruction(const struct tgsi_full_instruction *); + void exportOutputs(); + inline Subroutine *getSubroutine(unsigned ip); + inline Subroutine *getSubroutine(Function *); + inline bool isEndOfSubroutine(uint ip); + + void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask); + + // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto) + void setTexRS(TexInstruction *, unsigned int& s, int R, int S); + void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy); + void handleTXF(Value *dst0[4], int R, int L_M); + void handleTXQ(Value *dst0[4], enum TexQuery); + void handleLIT(Value *dst0[4]); + void handleUserClipPlanes(); + + Symbol *getResourceBase(int r); + void getResourceCoords(std::vector<Value *>&, int r, int s); + + void handleLOAD(Value *dst0[4]); + void handleSTORE(); + void handleATOM(Value *dst0[4], DataType, uint16_t subOp); + + Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); + + void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); + + Value *buildDot(int dim); + + class BindArgumentsPass : public Pass { + public: + BindArgumentsPass(Converter &conv) : conv(conv) { } + + private: + Converter &conv; + Subroutine *sub; + + inline const Location *getValueLocation(Subroutine *, Value *); + + template<typename T> inline void + updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *), + T (Function::*proto)); + + template<typename T> inline void + updatePrototype(BitSet *set, void (Function::*updateSet)(), + T (Function::*proto)); + + protected: + bool visit(Function *); + bool visit(BasicBlock *bb) { return false; } + }; + +private: + const struct tgsi::Source *code; + const struct nv50_ir_prog_info *info; + + struct { + std::map<unsigned, Subroutine> map; + Subroutine *cur; + } sub; + + uint ip; // instruction pointer + + tgsi::Instruction tgsi; + + DataType dstTy; + DataType srcTy; + + DataArray tData; // TGSI_FILE_TEMPORARY + DataArray aData; // TGSI_FILE_ADDRESS + DataArray pData; // TGSI_FILE_PREDICATE + DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers) + + Value *zero; + Value *fragCoord[4]; + Value *clipVtx[4]; + + Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP) + uint8_t vtxBaseValid; + + Stack condBBs; // fork BB, then else clause BB + Stack joinBBs; // fork BB, for inserting join ops on ENDIF + Stack loopBBs; // loop headers + Stack breakBBs; // end of / after loop +}; + +Symbol * +Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c) +{ + const int swz = src.getSwizzle(c); + + return makeSym(src.getFile(), + src.is2D() ? src.getIndex(1) : 0, + src.isIndirect(0) ? -1 : src.getIndex(0), swz, + src.getIndex(0) * 16 + swz * 4); +} + +Symbol * +Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c) +{ + return makeSym(dst.getFile(), + dst.is2D() ? dst.getIndex(1) : 0, + dst.isIndirect(0) ? -1 : dst.getIndex(0), c, + dst.getIndex(0) * 16 + c * 4); +} + +Symbol * +Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) +{ + Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile)); + + sym->reg.fileIndex = fileIdx; + + if (idx >= 0) { + if (sym->reg.file == FILE_SHADER_INPUT) + sym->setOffset(info->in[idx].slot[c] * 4); + else + if (sym->reg.file == FILE_SHADER_OUTPUT) + sym->setOffset(info->out[idx].slot[c] * 4); + else + if (sym->reg.file == FILE_SYSTEM_VALUE) + sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c); + else + sym->setOffset(address); + } else { + sym->setOffset(address); + } + return sym; +} + +static inline uint8_t +translateInterpMode(const struct nv50_ir_varying *var, operation& op) +{ + uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; + + if (var->flat) + mode = NV50_IR_INTERP_FLAT; + else + if (var->linear) + mode = NV50_IR_INTERP_LINEAR; + else + if (var->sc) + mode = NV50_IR_INTERP_SC; + + op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) + ? OP_PINTERP : OP_LINTERP; + + if (var->centroid) + mode |= NV50_IR_INTERP_CENTROID; + + return mode; +} + +Value * +Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr) +{ + operation op; + + // XXX: no way to know interpolation mode if we don't know what's accessed + const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 : + src.getIndex(0)], op); + + Instruction *insn = new_Instruction(func, op, TYPE_F32); + + insn->setDef(0, getScratch()); + insn->setSrc(0, srcToSym(src, c)); + if (op == OP_PINTERP) + insn->setSrc(1, fragCoord[3]); + if (ptr) + insn->setIndirect(0, 0, ptr); + + insn->setInterpolate(mode); + + bb->insertTail(insn); + return insn->getDef(0); +} + +Value * +Converter::applySrcMod(Value *val, int s, int c) +{ + Modifier m = tgsi.getSrc(s).getMod(c); + DataType ty = tgsi.inferSrcType(); + + if (m & Modifier(NV50_IR_MOD_ABS)) + val = mkOp1v(OP_ABS, ty, getScratch(), val); + + if (m & Modifier(NV50_IR_MOD_NEG)) + val = mkOp1v(OP_NEG, ty, getScratch(), val); + + return val; +} + +Value * +Converter::getVertexBase(int s) +{ + assert(s < 5); + if (!(vtxBaseValid & (1 << s))) { + const int index = tgsi.getSrc(s).getIndex(1); + Value *rel = NULL; + if (tgsi.getSrc(s).isIndirect(1)) + rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL); + vtxBaseValid |= 1 << s; + vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(index), rel); + } + return vtxBase[s]; +} + +Value * +Converter::fetchSrc(int s, int c) +{ + Value *res; + Value *ptr = NULL, *dimRel = NULL; + + tgsi::Instruction::SrcRegister src = tgsi.getSrc(s); + + if (src.isIndirect(0)) + ptr = fetchSrc(src.getIndirect(0), 0, NULL); + + if (src.is2D()) { + switch (src.getFile()) { + case TGSI_FILE_INPUT: + dimRel = getVertexBase(s); + break; + case TGSI_FILE_CONSTANT: + // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k] + if (src.isIndirect(1)) + dimRel = fetchSrc(src.getIndirect(1), 0, 0); + break; + default: + break; + } + } + + res = fetchSrc(src, c, ptr); + + if (dimRel) + res->getInsn()->setIndirect(0, 1, dimRel); + + return applySrcMod(res, s, c); +} + +Converter::DataArray * +Converter::getArrayForFile(unsigned file, int idx) +{ + switch (file) { + case TGSI_FILE_TEMPORARY: + return &tData; + case TGSI_FILE_PREDICATE: + return &pData; + case TGSI_FILE_ADDRESS: + return &aData; + case TGSI_FILE_OUTPUT: + assert(prog->getType() == Program::TYPE_FRAGMENT); + return &oData; + default: + assert(!"invalid/unhandled TGSI source file"); + return NULL; + } +} + +Value * +Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) +{ + const int idx2d = src.is2D() ? src.getIndex(1) : 0; + const int idx = src.getIndex(0); + const int swz = src.getSwizzle(c); + + switch (src.getFile()) { + case TGSI_FILE_IMMEDIATE: + assert(!ptr); + return loadImm(NULL, info->immd.data[idx * 4 + swz]); + case TGSI_FILE_CONSTANT: + return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); + case TGSI_FILE_INPUT: + if (prog->getType() == Program::TYPE_FRAGMENT) { + // don't load masked inputs, won't be assigned a slot + if (!ptr && !(info->in[idx].mask & (1 << swz))) + return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f); + if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE) + return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0)); + return interpolate(src, c, ptr); + } + return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); + case TGSI_FILE_OUTPUT: + assert(!"load from output file"); + return NULL; + case TGSI_FILE_SYSTEM_VALUE: + assert(!ptr); + return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); + default: + return getArrayForFile(src.getFile(), idx2d)->load( + sub.cur->values, idx, swz, ptr); + } +} + +Value * +Converter::acquireDst(int d, int c) +{ + const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); + const unsigned f = dst.getFile(); + const int idx = dst.getIndex(0); + const int idx2d = dst.is2D() ? dst.getIndex(1) : 0; + + if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE) + return NULL; + + if (dst.isIndirect(0) || + f == TGSI_FILE_SYSTEM_VALUE || + (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT)) + return getScratch(); + + return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c); +} + +void +Converter::storeDst(int d, int c, Value *val) +{ + const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); + + switch (tgsi.getSaturate()) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + mkOp1(OP_SAT, dstTy, val, val); + break; + case TGSI_SAT_MINUS_PLUS_ONE: + mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f)); + mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f)); + break; + default: + assert(!"invalid saturation mode"); + break; + } + + Value *ptr = dst.isIndirect(0) ? + fetchSrc(dst.getIndirect(0), 0, NULL) : NULL; + + if (info->io.genUserClip > 0 && + dst.getFile() == TGSI_FILE_OUTPUT && + !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) { + mkMov(clipVtx[c], val); + val = clipVtx[c]; + } + + storeDst(dst, c, val, ptr); +} + +void +Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, + Value *val, Value *ptr) +{ + const unsigned f = dst.getFile(); + const int idx = dst.getIndex(0); + const int idx2d = dst.is2D() ? dst.getIndex(1) : 0; + + if (f == TGSI_FILE_SYSTEM_VALUE) { + assert(!ptr); + mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val); + } else + if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) { + if (ptr || (info->out[idx].mask & (1 << c))) + mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); + } else + if (f == TGSI_FILE_TEMPORARY || + f == TGSI_FILE_PREDICATE || + f == TGSI_FILE_ADDRESS || + f == TGSI_FILE_OUTPUT) { + getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val); + } else { + assert(!"invalid dst file"); + } +} + +#define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \ + for (chan = 0; chan < 4; ++chan) \ + if (!inst.getDst(d).isMasked(chan)) + +Value * +Converter::buildDot(int dim) +{ + assert(dim > 0); + + Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0); + Value *dotp = getScratch(); + + mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1); + + for (int c = 1; c < dim; ++c) { + src0 = fetchSrc(0, c); + src1 = fetchSrc(1, c); + mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp); + } + return dotp; +} + +void +Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork) +{ + FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL); + join->fixed = 1; + conv->insertHead(join); + + fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv); + fork->insertBefore(fork->getExit(), fork->joinAt); +} + +void +Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S) +{ + unsigned rIdx = 0, sIdx = 0; + + if (R >= 0) + rIdx = tgsi.getSrc(R).getIndex(0); + if (S >= 0) + sIdx = tgsi.getSrc(S).getIndex(0); + + tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx); + + if (tgsi.getSrc(R).isIndirect(0)) { + tex->tex.rIndirectSrc = s; + tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL)); + } + if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) { + tex->tex.sIndirectSrc = s; + tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL)); + } +} + +void +Converter::handleTXQ(Value *dst0[4], enum TexQuery query) +{ + TexInstruction *tex = new_TexInstruction(func, OP_TXQ); + tex->tex.query = query; + unsigned int c, d; + + for (d = 0, c = 0; c < 4; ++c) { + if (!dst0[c]) + continue; + tex->tex.mask |= 1 << c; + tex->setDef(d++, dst0[c]); + } + tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level + + setTexRS(tex, c, 1, -1); + + bb->insertTail(tex); +} + +void +Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask) +{ + Value *proj = fetchSrc(0, 3); + Instruction *insn = proj->getUniqueInsn(); + int c; + + if (insn->op == OP_PINTERP) { + bb->insertTail(insn = cloneForward(func, insn)); + insn->op = OP_LINTERP; + insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode()); + insn->setSrc(1, NULL); + proj = insn->getDef(0); + } + proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj); + + for (c = 0; c < 4; ++c) { + if (!(mask & (1 << c))) + continue; + if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP) + continue; + mask &= ~(1 << c); + + bb->insertTail(insn = cloneForward(func, insn)); + insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode()); + insn->setSrc(1, proj); + dst[c] = insn->getDef(0); + } + if (!mask) + return; + + proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3)); + + for (c = 0; c < 4; ++c) + if (mask & (1 << c)) + dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj); +} + +// order of nv50 ir sources: x y z layer lod/bias shadow +// order of TGSI TEX sources: x y z layer shadow lod/bias +// lowering will finally set the hw specific order (like array first on nvc0) +void +Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) +{ + Value *val; + Value *arg[4], *src[8]; + Value *lod = NULL, *shd = NULL; + unsigned int s, c, d; + TexInstruction *texi = new_TexInstruction(func, tgsi.getOP()); + + TexInstruction::Target tgt = tgsi.getTexture(code, R); + + for (s = 0; s < tgt.getArgCount(); ++s) + arg[s] = src[s] = fetchSrc(0, s); + + if (texi->op == OP_TXL || texi->op == OP_TXB) + lod = fetchSrc(L >> 4, L & 3); + + if (C == 0x0f) + C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src + + if (tgt.isShadow()) + shd = fetchSrc(C >> 4, C & 3); + + if (texi->op == OP_TXD) { + for (c = 0; c < tgt.getDim(); ++c) { + texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c)); + texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c)); + } + } + + // cube textures don't care about projection value, it's divided out + if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) { + unsigned int n = tgt.getDim(); + if (shd) { + arg[n] = shd; + ++n; + assert(tgt.getDim() == tgt.getArgCount()); + } + loadProjTexCoords(src, arg, (1 << n) - 1); + if (shd) + shd = src[n - 1]; + } + + if (tgt.isCube()) { + for (c = 0; c < 3; ++c) + src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]); + val = getScratch(); + mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); + mkOp2(OP_MAX, TYPE_F32, val, src[2], val); + mkOp1(OP_RCP, TYPE_F32, val, val); + for (c = 0; c < 3; ++c) + src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val); + } + + for (c = 0, d = 0; c < 4; ++c) { + if (dst[c]) { + texi->setDef(d++, dst[c]); + texi->tex.mask |= 1 << c; + } else { + // NOTE: maybe hook up def too, for CSE + } + } + for (s = 0; s < tgt.getArgCount(); ++s) + texi->setSrc(s, src[s]); + if (lod) + texi->setSrc(s++, lod); + if (shd) + texi->setSrc(s++, shd); + + setTexRS(texi, s, R, S); + + if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ) + texi->tex.levelZero = true; + + bb->insertTail(texi); +} + +// 1st source: xyz = coordinates, w = lod/sample +// 2nd source: offset +void +Converter::handleTXF(Value *dst[4], int R, int L_M) +{ + TexInstruction *texi = new_TexInstruction(func, tgsi.getOP()); + int ms; + unsigned int c, d, s; + + texi->tex.target = tgsi.getTexture(code, R); + + ms = texi->tex.target.isMS() ? 1 : 0; + texi->tex.levelZero = ms; /* MS textures don't have mip-maps */ + + for (c = 0, d = 0; c < 4; ++c) { + if (dst[c]) { + texi->setDef(d++, dst[c]); + texi->tex.mask |= 1 << c; + } + } + for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c) + texi->setSrc(c, fetchSrc(0, c)); + texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms + + setTexRS(texi, c, R, -1); + + for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { + for (c = 0; c < 3; ++c) { + texi->tex.offset[s][c] = tgsi.getTexOffset(s).getValueU32(c, info); + if (texi->tex.offset[s][c]) + texi->tex.useOffsets = s + 1; + } + } + + bb->insertTail(texi); +} + +void +Converter::handleLIT(Value *dst0[4]) +{ + Value *val0 = NULL; + unsigned int mask = tgsi.getDst(0).getMask(); + + if (mask & (1 << 0)) + loadImm(dst0[0], 1.0f); + + if (mask & (1 << 3)) + loadImm(dst0[3], 1.0f); + + if (mask & (3 << 1)) { + val0 = getScratch(); + mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero); + if (mask & (1 << 1)) + mkMov(dst0[1], val0); + } + + if (mask & (1 << 2)) { + Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3); + Value *val1 = getScratch(), *val3 = getScratch(); + + Value *pos128 = loadImm(NULL, +127.999999f); + Value *neg128 = loadImm(NULL, -127.999999f); + + mkOp2(OP_MAX, TYPE_F32, val1, src1, zero); + mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128); + mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128); + mkOp2(OP_POW, TYPE_F32, val3, val1, val3); + + mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], val3, zero, val0); + } +} + +static inline bool +isResourceSpecial(const int r) +{ + return (r == TGSI_RESOURCE_GLOBAL || + r == TGSI_RESOURCE_LOCAL || + r == TGSI_RESOURCE_PRIVATE || + r == TGSI_RESOURCE_INPUT); +} + +static inline bool +isResourceRaw(const struct tgsi::Source *code, const int r) +{ + return isResourceSpecial(r) || code->resources[r].raw; +} + +static inline nv50_ir::TexTarget +getResourceTarget(const struct tgsi::Source *code, int r) +{ + if (isResourceSpecial(r)) + return nv50_ir::TEX_TARGET_BUFFER; + return tgsi::translateTexture(code->resources.at(r).target); +} + +Symbol * +Converter::getResourceBase(const int r) +{ + Symbol *sym = NULL; + + switch (r) { + case TGSI_RESOURCE_GLOBAL: + sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15); + break; + case TGSI_RESOURCE_LOCAL: + assert(prog->getType() == Program::TYPE_COMPUTE); + sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32, + info->prop.cp.sharedOffset); + break; + case TGSI_RESOURCE_PRIVATE: + sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32, + info->bin.tlsSpace); + break; + case TGSI_RESOURCE_INPUT: + assert(prog->getType() == Program::TYPE_COMPUTE); + sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32, + info->prop.cp.inputOffset); + break; + default: + sym = new_Symbol(prog, + nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot); + break; + } + return sym; +} + +void +Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s) +{ + const int arg = + TexInstruction::Target(getResourceTarget(code, r)).getArgCount(); + + for (int c = 0; c < arg; ++c) + coords.push_back(fetchSrc(s, c)); + + // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk + if (r == TGSI_RESOURCE_LOCAL || + r == TGSI_RESOURCE_PRIVATE || + r == TGSI_RESOURCE_INPUT) + coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS), + coords[0]); +} + +static inline int +partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask) +{ + int n = 0; + + while (mask) { + if (mask & 1) { + size[n]++; + } else { + if (size[n]) + comp[n = 1] = size[0] + 1; + else + comp[n]++; + } + mask >>= 1; + } + if (size[0] == 3) { + n = 1; + size[0] = (comp[0] == 1) ? 1 : 2; + size[1] = 3 - size[0]; + comp[1] = comp[0] + size[0]; + } + return n + 1; +} + +// For raw loads, granularity is 4 byte. +// Usage of the texture read mask on OP_SULDP is not allowed. +void +Converter::handleLOAD(Value *dst0[4]) +{ + const int r = tgsi.getSrc(0).getIndex(0); + int c; + std::vector<Value *> off, src, ldv, def; + + getResourceCoords(off, r, 1); + + if (isResourceRaw(code, r)) { + uint8_t mask = 0; + uint8_t comp[2] = { 0, 0 }; + uint8_t size[2] = { 0, 0 }; + + Symbol *base = getResourceBase(r); + + // determine the base and size of the at most 2 load ops + for (c = 0; c < 4; ++c) + if (!tgsi.getDst(0).isMasked(c)) + mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X); + + int n = partitionLoadStore(comp, size, mask); + + src = off; + + def.resize(4); // index by component, the ones we need will be non-NULL + for (c = 0; c < 4; ++c) { + if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c)) + def[c] = dst0[c]; + else + if (mask & (1 << c)) + def[c] = getScratch(); + } + + const bool useLd = isResourceSpecial(r) || + (info->io.nv50styleSurfaces && + code->resources[r].target == TGSI_TEXTURE_BUFFER); + + for (int i = 0; i < n; ++i) { + ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]); + + if (comp[i]) // adjust x component of source address if necessary + src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file), + off[0], mkImm(comp[i] * 4)); + else + src[0] = off[0]; + + if (useLd) { + Instruction *ld = + mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]); + for (size_t c = 1; c < ldv.size(); ++c) + ld->setDef(c, ldv[c]); + } else { + mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot, + 0, ldv, src)->dType = typeOfSize(size[i] * 4); + } + } + } else { + def.resize(4); + for (c = 0; c < 4; ++c) { + if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c)) + def[c] = getScratch(); + else + def[c] = dst0[c]; + } + + mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0, + def, off); + } + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + if (dst0[c] != def[c]) + mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]); +} + +// For formatted stores, the write mask on OP_SUSTP can be used. +// Raw stores have to be split. +void +Converter::handleSTORE() +{ + const int r = tgsi.getDst(0).getIndex(0); + int c; + std::vector<Value *> off, src, dummy; + + getResourceCoords(off, r, 0); + src = off; + const int s = src.size(); + + if (isResourceRaw(code, r)) { + uint8_t comp[2] = { 0, 0 }; + uint8_t size[2] = { 0, 0 }; + + int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask()); + + Symbol *base = getResourceBase(r); + + const bool useSt = isResourceSpecial(r) || + (info->io.nv50styleSurfaces && + code->resources[r].target == TGSI_TEXTURE_BUFFER); + + for (int i = 0; i < n; ++i) { + if (comp[i]) // adjust x component of source address if necessary + src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file), + off[0], mkImm(comp[i] * 4)); + else + src[0] = off[0]; + + const DataType stTy = typeOfSize(size[i] * 4); + + if (useSt) { + Instruction *st = + mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i])); + for (c = 1; c < size[i]; ++c) + st->setSrc(1 + c, fetchSrc(1, comp[i] + c)); + st->setIndirect(0, 0, src[0]); + } else { + // attach values to be stored + src.resize(s + size[i]); + for (c = 0; c < size[i]; ++c) + src[s + c] = fetchSrc(1, comp[i] + c); + mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot, + 0, dummy, src)->setType(stTy); + } + } + } else { + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + src.push_back(fetchSrc(1, c)); + + mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0, + dummy, src)->tex.mask = tgsi.getDst(0).getMask(); + } +} + +// XXX: These only work on resources with the single-component u32/s32 formats. +// Therefore the result is replicated. This might not be intended by TGSI, but +// operating on more than 1 component would produce undefined results because +// they do not exist. +void +Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) +{ + const int r = tgsi.getSrc(0).getIndex(0); + std::vector<Value *> srcv; + std::vector<Value *> defv; + LValue *dst = getScratch(); + + getResourceCoords(srcv, r, 1); + + if (isResourceSpecial(r)) { + assert(r != TGSI_RESOURCE_INPUT); + Instruction *insn; + insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0)); + insn->subOp = subOp; + if (subOp == NV50_IR_SUBOP_ATOM_CAS) + insn->setSrc(2, fetchSrc(3, 0)); + insn->setIndirect(0, 0, srcv.at(0)); + } else { + operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP; + TexTarget targ = getResourceTarget(code, r); + int idx = code->resources[r].slot; + defv.push_back(dst); + srcv.push_back(fetchSrc(2, 0)); + if (subOp == NV50_IR_SUBOP_ATOM_CAS) + srcv.push_back(fetchSrc(3, 0)); + TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv); + tex->subOp = subOp; + tex->tex.mask = 1; + tex->setType(ty); + } + + for (int c = 0; c < 4; ++c) + if (dst0[c]) + dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov +} + +Converter::Subroutine * +Converter::getSubroutine(unsigned ip) +{ + std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip); + + if (it == sub.map.end()) + it = sub.map.insert(std::make_pair( + ip, Subroutine(new Function(prog, "SUB", ip)))).first; + + return &it->second; +} + +Converter::Subroutine * +Converter::getSubroutine(Function *f) +{ + unsigned ip = f->getLabel(); + std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip); + + if (it == sub.map.end()) + it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first; + + return &it->second; +} + +bool +Converter::isEndOfSubroutine(uint ip) +{ + assert(ip < code->scan.num_instructions); + tgsi::Instruction insn(&code->insns[ip]); + return (insn.getOpcode() == TGSI_OPCODE_END || + insn.getOpcode() == TGSI_OPCODE_ENDSUB || + // does END occur at end of main or the very end ? + insn.getOpcode() == TGSI_OPCODE_BGNSUB); +} + +bool +Converter::handleInstruction(const struct tgsi_full_instruction *insn) +{ + Instruction *geni; + + Value *dst0[4], *rDst0[4]; + Value *src0, *src1, *src2; + Value *val0, *val1; + int c; + + tgsi = tgsi::Instruction(insn); + + bool useScratchDst = tgsi.checkDstSrcAliasing(); + + operation op = tgsi.getOP(); + dstTy = tgsi.inferDstType(); + srcTy = tgsi.inferSrcType(); + + unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0; + + if (tgsi.dstCount()) { + for (c = 0; c < 4; ++c) { + rDst0[c] = acquireDst(0, c); + dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c]; + } + } + + switch (tgsi.getOpcode()) { + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_UADD: + case TGSI_OPCODE_AND: + case TGSI_OPCODE_DIV: + case TGSI_OPCODE_IDIV: + case TGSI_OPCODE_UDIV: + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_UMAX: + case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_MOD: + case TGSI_OPCODE_UMOD: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_OR: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_USHR: + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_XOR: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + src1 = fetchSrc(1, c); + mkOp2(op, dstTy, dst0[c], src0, src1); + } + break; + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_UMAD: + case TGSI_OPCODE_SAD: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + src1 = fetchSrc(1, c); + src2 = fetchSrc(2, c); + mkOp3(op, dstTy, dst0[c], src0, src1, src2); + } + break; + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_CEIL: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_TRUNC: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_IABS: + case TGSI_OPCODE_INEG: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkOp1(op, dstTy, dst0[c], fetchSrc(0, c)); + break; + case TGSI_OPCODE_RSQ: + src0 = fetchSrc(0, 0); + val0 = getScratch(); + mkOp1(OP_ABS, TYPE_F32, val0, src0); + mkOp1(OP_RSQ, TYPE_F32, val0, val0); + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkMov(dst0[c], val0); + break; + case TGSI_OPCODE_ARL: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = ROUND_M; + mkOp2(OP_SHL, TYPE_U32, dst0[c], dst0[c], mkImm(4)); + } + break; + case TGSI_OPCODE_UARL: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkOp2(OP_SHL, TYPE_U32, dst0[c], fetchSrc(0, c), mkImm(4)); + break; + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0); + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); + break; + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + val0 = getScratch(); + if (mask & 7) { + mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0)); + mkOp1(op, TYPE_F32, val0, val0); + for (c = 0; c < 3; ++c) + if (dst0[c]) + mkMov(dst0[c], val0); + } + if (dst0[3]) { + mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3)); + mkOp1(op, TYPE_F32, dst0[3], val0); + } + break; + case TGSI_OPCODE_SCS: + if (mask & 3) { + val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0)); + if (dst0[0]) + mkOp1(OP_COS, TYPE_F32, dst0[0], val0); + if (dst0[1]) + mkOp1(OP_SIN, TYPE_F32, dst0[1], val0); + } + if (dst0[2]) + loadImm(dst0[2], 0.0f); + if (dst0[3]) + loadImm(dst0[3], 1.0f); + break; + case TGSI_OPCODE_EXP: + src0 = fetchSrc(0, 0); + val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0); + if (dst0[1]) + mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0); + if (dst0[0]) + mkOp1(OP_EX2, TYPE_F32, dst0[0], val0); + if (dst0[2]) + mkOp1(OP_EX2, TYPE_F32, dst0[2], src0); + if (dst0[3]) + loadImm(dst0[3], 1.0f); + break; + case TGSI_OPCODE_LOG: + src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0)); + val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0); + if (dst0[0] || dst0[1]) + val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0); + if (dst0[1]) { + mkOp1(OP_EX2, TYPE_F32, dst0[1], val1); + mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]); + mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0); + } + if (dst0[3]) + loadImm(dst0[3], 1.0f); + break; + case TGSI_OPCODE_DP2: + val0 = buildDot(2); + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkMov(dst0[c], val0); + break; + case TGSI_OPCODE_DP3: + val0 = buildDot(3); + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkMov(dst0[c], val0); + break; + case TGSI_OPCODE_DP4: + val0 = buildDot(4); + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkMov(dst0[c], val0); + break; + case TGSI_OPCODE_DPH: + val0 = buildDot(3); + src1 = fetchSrc(1, 3); + mkOp2(OP_ADD, TYPE_F32, val0, val0, src1); + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkMov(dst0[c], val0); + break; + case TGSI_OPCODE_DST: + if (dst0[0]) + loadImm(dst0[0], 1.0f); + if (dst0[1]) { + src0 = fetchSrc(0, 1); + src1 = fetchSrc(1, 1); + mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1); + } + if (dst0[2]) + mkMov(dst0[2], fetchSrc(0, 2)); + if (dst0[3]) + mkMov(dst0[3], fetchSrc(1, 3)); + break; + case TGSI_OPCODE_LRP: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + src1 = fetchSrc(1, c); + src2 = fetchSrc(2, c); + mkOp3(OP_MAD, TYPE_F32, dst0[c], + mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2); + } + break; + case TGSI_OPCODE_LIT: + handleLIT(dst0); + break; + case TGSI_OPCODE_XPD: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + if (c < 3) { + val0 = getSSA(); + src0 = fetchSrc(1, (c + 1) % 3); + src1 = fetchSrc(0, (c + 2) % 3); + mkOp2(OP_MUL, TYPE_F32, val0, src0, src1); + mkOp1(OP_NEG, TYPE_F32, val0, val0); + + src0 = fetchSrc(0, (c + 1) % 3); + src1 = fetchSrc(1, (c + 2) % 3); + mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0); + } else { + loadImm(dst0[c], 1.0f); + } + } + break; + case TGSI_OPCODE_ISSG: + case TGSI_OPCODE_SSG: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + val0 = getScratch(); + val1 = getScratch(); + mkCmp(OP_SET, CC_GT, srcTy, val0, src0, zero); + mkCmp(OP_SET, CC_LT, srcTy, val1, src0, zero); + if (srcTy == TYPE_F32) + mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1); + else + mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0); + } + break; + case TGSI_OPCODE_UCMP: + case TGSI_OPCODE_CMP: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + src1 = fetchSrc(1, c); + src2 = fetchSrc(2, c); + if (src1 == src2) + mkMov(dst0[c], src1); + else + mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE, + srcTy, dst0[c], src1, src2, src0); + } + break; + case TGSI_OPCODE_FRC: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + val0 = getScratch(); + mkOp1(OP_FLOOR, TYPE_F32, val0, src0); + mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); + } + break; + case TGSI_OPCODE_ROUND: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c)) + ->rnd = ROUND_NI; + break; + case TGSI_OPCODE_CLAMP: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + src1 = fetchSrc(1, c); + src2 = fetchSrc(2, c); + val0 = getScratch(); + mkOp2(OP_MIN, TYPE_F32, val0, src0, src1); + mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2); + } + break; + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SFL: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_STR: + case TGSI_OPCODE_FSEQ: + case TGSI_OPCODE_FSGE: + case TGSI_OPCODE_FSLT: + case TGSI_OPCODE_FSNE: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + src1 = fetchSrc(1, c); + mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], src0, src1); + } + break; + case TGSI_OPCODE_KILL_IF: + val0 = new_LValue(func, FILE_PREDICATE); + for (c = 0; c < 4; ++c) { + mkCmp(OP_SET, CC_LT, TYPE_F32, val0, fetchSrc(0, c), zero); + mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0); + } + break; + case TGSI_OPCODE_KILL: + mkOp(OP_DISCARD, TYPE_NONE, NULL); + break; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: + // R S L C Dx Dy + handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00); + break; + case TGSI_OPCODE_TXD: + handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20); + break; + case TGSI_OPCODE_TEX2: + handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00); + break; + case TGSI_OPCODE_TXB2: + case TGSI_OPCODE_TXL2: + handleTEX(dst0, 2, 2, 0x10, 0x11, 0x00, 0x00); + break; + case TGSI_OPCODE_SAMPLE: + case TGSI_OPCODE_SAMPLE_B: + case TGSI_OPCODE_SAMPLE_D: + case TGSI_OPCODE_SAMPLE_L: + case TGSI_OPCODE_SAMPLE_C: + case TGSI_OPCODE_SAMPLE_C_LZ: + handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40); + break; + case TGSI_OPCODE_TXF: + handleTXF(dst0, 1, 0x03); + break; + case TGSI_OPCODE_SAMPLE_I: + handleTXF(dst0, 1, 0x03); + break; + case TGSI_OPCODE_SAMPLE_I_MS: + handleTXF(dst0, 1, 0x20); + break; + case TGSI_OPCODE_TXQ: + case TGSI_OPCODE_SVIEWINFO: + handleTXQ(dst0, TXQ_DIMS); + break; + case TGSI_OPCODE_F2I: + case TGSI_OPCODE_F2U: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z; + break; + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_U2F: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c)); + break; + case TGSI_OPCODE_EMIT: + case TGSI_OPCODE_ENDPRIM: + // get vertex stream if specified (must be immediate) + src0 = tgsi.srcCount() ? + mkImm(tgsi.getSrc(0).getValueU32(0, info)) : zero; + mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1; + break; + case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: + { + BasicBlock *ifBB = new BasicBlock(func); + + bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE); + condBBs.push(bb); + joinBBs.push(bb); + + mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy); + + setPosition(ifBB, true); + } + break; + case TGSI_OPCODE_ELSE: + { + BasicBlock *elseBB = new BasicBlock(func); + BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p); + + forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE); + condBBs.push(bb); + + forkBB->getExit()->asFlow()->target.bb = elseBB; + if (!bb->isTerminated()) + mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL); + + setPosition(elseBB, true); + } + break; + case TGSI_OPCODE_ENDIF: + { + BasicBlock *convBB = new BasicBlock(func); + BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p); + BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p); + + if (!bb->isTerminated()) { + // we only want join if none of the clauses ended with CONT/BREAK/RET + if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6) + insertConvergenceOps(convBB, forkBB); + mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL); + bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD); + } + + if (prevBB->getExit()->op == OP_BRA) { + prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD); + prevBB->getExit()->asFlow()->target.bb = convBB; + } + setPosition(convBB, true); + } + break; + case TGSI_OPCODE_BGNLOOP: + { + BasicBlock *lbgnBB = new BasicBlock(func); + BasicBlock *lbrkBB = new BasicBlock(func); + + loopBBs.push(lbgnBB); + breakBBs.push(lbrkBB); + if (loopBBs.getSize() > func->loopNestingBound) + func->loopNestingBound++; + + mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL); + + bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE); + setPosition(lbgnBB, true); + mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL); + } + break; + case TGSI_OPCODE_ENDLOOP: + { + BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p); + + if (!bb->isTerminated()) { + mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); + bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); + } + setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true); + } + break; + case TGSI_OPCODE_BRK: + { + if (bb->isTerminated()) + break; + BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p); + mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL); + bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS); + } + break; + case TGSI_OPCODE_CONT: + { + if (bb->isTerminated()) + break; + BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p); + mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL); + contBB->explicitCont = true; + bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK); + } + break; + case TGSI_OPCODE_BGNSUB: + { + Subroutine *s = getSubroutine(ip); + BasicBlock *entry = new BasicBlock(s->f); + BasicBlock *leave = new BasicBlock(s->f); + + // multiple entrypoints possible, keep the graph connected + if (prog->getType() == Program::TYPE_COMPUTE) + prog->main->call.attach(&s->f->call, Graph::Edge::TREE); + + sub.cur = s; + s->f->setEntry(entry); + s->f->setExit(leave); + setPosition(entry, true); + return true; + } + case TGSI_OPCODE_ENDSUB: + { + sub.cur = getSubroutine(prog->main); + setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true); + return true; + } + case TGSI_OPCODE_CAL: + { + Subroutine *s = getSubroutine(tgsi.getLabel()); + mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL); + func->call.attach(&s->f->call, Graph::Edge::TREE); + return true; + } + case TGSI_OPCODE_RET: + { + if (bb->isTerminated()) + return true; + BasicBlock *leave = BasicBlock::get(func->cfgExit); + + if (!isEndOfSubroutine(ip + 1)) { + // insert a PRERET at the entry if this is an early return + // (only needed for sharing code in the epilogue) + BasicBlock *pos = getBB(); + setPosition(BasicBlock::get(func->cfg.getRoot()), false); + mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1; + setPosition(pos, true); + } + mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1; + bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS); + } + break; + case TGSI_OPCODE_END: + { + // attach and generate epilogue code + BasicBlock *epilogue = BasicBlock::get(func->cfgExit); + bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE); + setPosition(epilogue, true); + if (prog->getType() == Program::TYPE_FRAGMENT) + exportOutputs(); + if (info->io.genUserClip > 0) + handleUserClipPlanes(); + mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; + } + break; + case TGSI_OPCODE_SWITCH: + case TGSI_OPCODE_CASE: + ERROR("switch/case opcode encountered, should have been lowered\n"); + abort(); + break; + case TGSI_OPCODE_LOAD: + handleLOAD(dst0); + break; + case TGSI_OPCODE_STORE: + handleSTORE(); + break; + case TGSI_OPCODE_BARRIER: + geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0)); + geni->fixed = 1; + geni->subOp = NV50_IR_SUBOP_BAR_SYNC; + break; + case TGSI_OPCODE_MFENCE: + case TGSI_OPCODE_LFENCE: + case TGSI_OPCODE_SFENCE: + geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); + geni->fixed = 1; + geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); + break; + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMAX: + handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode())); + break; + default: + ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode()); + assert(0); + break; + } + + if (tgsi.dstCount()) { + for (c = 0; c < 4; ++c) { + if (!dst0[c]) + continue; + if (dst0[c] != rDst0[c]) + mkMov(rDst0[c], dst0[c]); + storeDst(0, c, rDst0[c]); + } + } + vtxBaseValid = 0; + + return true; +} + +void +Converter::handleUserClipPlanes() +{ + Value *res[8]; + int n, i, c; + + for (c = 0; c < 4; ++c) { + for (i = 0; i < info->io.genUserClip; ++i) { + Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot, + TYPE_F32, info->io.ucpBase + i * 16 + c * 4); + Value *ucp = mkLoadv(TYPE_F32, sym, NULL); + if (c == 0) + res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); + else + mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]); + } + } + + const int first = info->numOutputs - (info->io.genUserClip + 3) / 4; + + for (i = 0; i < info->io.genUserClip; ++i) { + n = i / 4 + first; + c = i % 4; + Symbol *sym = + mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4); + mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]); + } +} + +void +Converter::exportOutputs() +{ + for (unsigned int i = 0; i < info->numOutputs; ++i) { + for (unsigned int c = 0; c < 4; ++c) { + if (!oData.exists(sub.cur->values, i, c)) + continue; + Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, + info->out[i].slot[c] * 4); + Value *val = oData.load(sub.cur->values, i, c, NULL); + if (val) + mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val); + } + } +} + +Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir), + code(code), + tgsi(NULL), + tData(this), aData(this), pData(this), oData(this) +{ + info = code->info; + + const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR; + + const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY); + const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE); + const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS); + const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT); + + tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0); + pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0); + aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_ADDRESS, 0); + oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0); + + zero = mkImm((uint32_t)0); + + vtxBaseValid = 0; +} + +Converter::~Converter() +{ +} + +inline const Converter::Location * +Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v) +{ + ValueMap::l_iterator it = s->values.l.find(v); + return it == s->values.l.end() ? NULL : &it->second; +} + +template<typename T> inline void +Converter::BindArgumentsPass::updateCallArgs( + Instruction *i, void (Instruction::*setArg)(int, Value *), + T (Function::*proto)) +{ + Function *g = i->asFlow()->target.fn; + Subroutine *subg = conv.getSubroutine(g); + + for (unsigned a = 0; a < (g->*proto).size(); ++a) { + Value *v = (g->*proto)[a].get(); + const Converter::Location &l = *getValueLocation(subg, v); + Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx); + + (i->*setArg)(a, array->acquire(sub->values, l.i, l.c)); + } +} + +template<typename T> inline void +Converter::BindArgumentsPass::updatePrototype( + BitSet *set, void (Function::*updateSet)(), T (Function::*proto)) +{ + (func->*updateSet)(); + + for (unsigned i = 0; i < set->getSize(); ++i) { + Value *v = func->getLValue(i); + const Converter::Location *l = getValueLocation(sub, v); + + // only include values with a matching TGSI register + if (set->test(i) && l && !conv.code->locals.count(*l)) + (func->*proto).push_back(v); + } +} + +bool +Converter::BindArgumentsPass::visit(Function *f) +{ + sub = conv.getSubroutine(f); + + for (ArrayList::Iterator bi = f->allBBlocks.iterator(); + !bi.end(); bi.next()) { + for (Instruction *i = BasicBlock::get(bi)->getFirst(); + i; i = i->next) { + if (i->op == OP_CALL && !i->asFlow()->builtin) { + updateCallArgs(i, &Instruction::setSrc, &Function::ins); + updateCallArgs(i, &Instruction::setDef, &Function::outs); + } + } + } + + if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE) + return true; + updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet, + &Function::buildLiveSets, &Function::ins); + updatePrototype(&BasicBlock::get(f->cfgExit)->defSet, + &Function::buildDefSets, &Function::outs); + + return true; +} + +bool +Converter::run() +{ + BasicBlock *entry = new BasicBlock(prog->main); + BasicBlock *leave = new BasicBlock(prog->main); + + prog->main->setEntry(entry); + prog->main->setExit(leave); + + setPosition(entry, true); + sub.cur = getSubroutine(prog->main); + + if (info->io.genUserClip > 0) { + for (int c = 0; c < 4; ++c) + clipVtx[c] = getScratch(); + } + + if (prog->getType() == Program::TYPE_FRAGMENT) { + Symbol *sv = mkSysVal(SV_POSITION, 3); + fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv); + mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]); + } + + for (ip = 0; ip < code->scan.num_instructions; ++ip) { + if (!handleInstruction(&code->insns[ip])) + return false; + } + + if (!BindArgumentsPass(*this).run(prog)) + return false; + + return true; +} + +} // unnamed namespace + +namespace nv50_ir { + +bool +Program::makeFromTGSI(struct nv50_ir_prog_info *info) +{ + tgsi::Source src(info); + if (!src.scanSource()) + return false; + tlsSize = info->bin.tlsSpace; + + Converter builder(this, &src); + return builder.run(); +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp new file mode 100644 index 00000000000..3f8d00a1c99 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp @@ -0,0 +1,436 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir_graph.h" +#include <limits> +#include <list> +#include <stack> +#include "codegen/nv50_ir.h" + +namespace nv50_ir { + +Graph::Graph() +{ + root = NULL; + size = 0; + sequence = 0; +} + +Graph::~Graph() +{ + for (IteratorRef it = safeIteratorDFS(); !it->end(); it->next()) + reinterpret_cast<Node *>(it->get())->cut(); +} + +void Graph::insert(Node *node) +{ + if (!root) + root = node; + + node->graph = this; + size++; +} + +void Graph::Edge::unlink() +{ + if (origin) { + prev[0]->next[0] = next[0]; + next[0]->prev[0] = prev[0]; + if (origin->out == this) + origin->out = (next[0] == this) ? NULL : next[0]; + + --origin->outCount; + } + if (target) { + prev[1]->next[1] = next[1]; + next[1]->prev[1] = prev[1]; + if (target->in == this) + target->in = (next[1] == this) ? NULL : next[1]; + + --target->inCount; + } +} + +const char *Graph::Edge::typeStr() const +{ + switch (type) { + case TREE: return "tree"; + case FORWARD: return "forward"; + case BACK: return "back"; + case CROSS: return "cross"; + case DUMMY: return "dummy"; + case UNKNOWN: + default: + return "unk"; + } +} + +Graph::Node::Node(void *priv) : data(priv), + in(0), out(0), graph(0), + visited(0), + inCount(0), outCount(0) +{ + // nothing to do +} + +void Graph::Node::attach(Node *node, Edge::Type kind) +{ + Edge *edge = new Edge(this, node, kind); + + // insert head + if (this->out) { + edge->next[0] = this->out; + edge->prev[0] = this->out->prev[0]; + edge->prev[0]->next[0] = edge; + this->out->prev[0] = edge; + } + this->out = edge; + + if (node->in) { + edge->next[1] = node->in; + edge->prev[1] = node->in->prev[1]; + edge->prev[1]->next[1] = edge; + node->in->prev[1] = edge; + } + node->in = edge; + + ++this->outCount; + ++node->inCount; + + assert(graph || node->graph); + if (!node->graph) + graph->insert(node); + if (!graph) + node->graph->insert(this); + + if (kind == Edge::UNKNOWN) + graph->classifyEdges(); +} + +bool Graph::Node::detach(Graph::Node *node) +{ + EdgeIterator ei = this->outgoing(); + for (; !ei.end(); ei.next()) + if (ei.getNode() == node) + break; + if (ei.end()) { + ERROR("no such node attached\n"); + return false; + } + delete ei.getEdge(); + return true; +} + +// Cut a node from the graph, deleting all attached edges. +void Graph::Node::cut() +{ + while (out) + delete out; + while (in) + delete in; + + if (graph) { + if (graph->root == this) + graph->root = NULL; + graph = NULL; + } +} + +Graph::Edge::Edge(Node *org, Node *tgt, Type kind) +{ + target = tgt; + origin = org; + type = kind; + + next[0] = next[1] = this; + prev[0] = prev[1] = this; +} + +bool +Graph::Node::reachableBy(const Node *node, const Node *term) const +{ + std::stack<const Node *> stack; + const Node *pos = NULL; + const int seq = graph->nextSequence(); + + stack.push(node); + + while (!stack.empty()) { + pos = stack.top(); + stack.pop(); + + if (pos == this) + return true; + if (pos == term) + continue; + + for (EdgeIterator ei = pos->outgoing(); !ei.end(); ei.next()) { + if (ei.getType() == Edge::BACK || ei.getType() == Edge::DUMMY) + continue; + if (ei.getNode()->visit(seq)) + stack.push(ei.getNode()); + } + } + return pos == this; +} + +class DFSIterator : public Iterator +{ +public: + DFSIterator(Graph *graph, const bool preorder) + { + unsigned int seq = graph->nextSequence(); + + nodes = new Graph::Node * [graph->getSize() + 1]; + count = 0; + pos = 0; + nodes[graph->getSize()] = 0; + + if (graph->getRoot()) { + graph->getRoot()->visit(seq); + search(graph->getRoot(), preorder, seq); + } + } + + ~DFSIterator() + { + if (nodes) + delete[] nodes; + } + + void search(Graph::Node *node, const bool preorder, const int sequence) + { + if (preorder) + nodes[count++] = node; + + for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) + if (ei.getNode()->visit(sequence)) + search(ei.getNode(), preorder, sequence); + + if (!preorder) + nodes[count++] = node; + } + + virtual bool end() const { return pos >= count; } + virtual void next() { if (pos < count) ++pos; } + virtual void *get() const { return nodes[pos]; } + virtual void reset() { pos = 0; } + +protected: + Graph::Node **nodes; + int count; + int pos; +}; + +IteratorRef Graph::iteratorDFS(bool preorder) +{ + return IteratorRef(new DFSIterator(this, preorder)); +} + +IteratorRef Graph::safeIteratorDFS(bool preorder) +{ + return this->iteratorDFS(preorder); +} + +class CFGIterator : public Iterator +{ +public: + CFGIterator(Graph *graph) + { + nodes = new Graph::Node * [graph->getSize() + 1]; + count = 0; + pos = 0; + nodes[graph->getSize()] = 0; + + // TODO: argh, use graph->sequence instead of tag and just raise it by > 1 + for (IteratorRef it = graph->iteratorDFS(); !it->end(); it->next()) + reinterpret_cast<Graph::Node *>(it->get())->tag = 0; + + if (graph->getRoot()) + search(graph->getRoot(), graph->nextSequence()); + } + + ~CFGIterator() + { + if (nodes) + delete[] nodes; + } + + virtual void *get() const { return nodes[pos]; } + virtual bool end() const { return pos >= count; } + virtual void next() { if (pos < count) ++pos; } + virtual void reset() { pos = 0; } + +private: + void search(Graph::Node *node, const int sequence) + { + Stack bb, cross; + + bb.push(node); + + while (bb.getSize()) { + node = reinterpret_cast<Graph::Node *>(bb.pop().u.p); + assert(node); + if (!node->visit(sequence)) + continue; + node->tag = 0; + + for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) { + switch (ei.getType()) { + case Graph::Edge::TREE: + case Graph::Edge::FORWARD: + case Graph::Edge::DUMMY: + if (++(ei.getNode()->tag) == ei.getNode()->incidentCountFwd()) + bb.push(ei.getNode()); + break; + case Graph::Edge::BACK: + continue; + case Graph::Edge::CROSS: + if (++(ei.getNode()->tag) == 1) + cross.push(ei.getNode()); + break; + default: + assert(!"unknown edge kind in CFG"); + break; + } + } + nodes[count++] = node; + + if (bb.getSize() == 0) + cross.moveTo(bb); + } + } + +private: + Graph::Node **nodes; + int count; + int pos; +}; + +IteratorRef Graph::iteratorCFG() +{ + return IteratorRef(new CFGIterator(this)); +} + +IteratorRef Graph::safeIteratorCFG() +{ + return this->iteratorCFG(); +} + +void Graph::classifyEdges() +{ + int seq; + + for (IteratorRef it = iteratorDFS(true); !it->end(); it->next()) { + Node *node = reinterpret_cast<Node *>(it->get()); + node->visit(0); + node->tag = 0; + } + + classifyDFS(root, (seq = 0)); + + sequence = seq; +} + +void Graph::classifyDFS(Node *curr, int& seq) +{ + Graph::Edge *edge; + Graph::Node *node; + + curr->visit(++seq); + curr->tag = 1; + + for (edge = curr->out; edge; edge = edge->next[0]) { + node = edge->target; + if (edge->type == Edge::DUMMY) + continue; + + if (node->getSequence() == 0) { + edge->type = Edge::TREE; + classifyDFS(node, seq); + } else + if (node->getSequence() > curr->getSequence()) { + edge->type = Edge::FORWARD; + } else { + edge->type = node->tag ? Edge::BACK : Edge::CROSS; + } + } + + for (edge = curr->in; edge; edge = edge->next[1]) { + node = edge->origin; + if (edge->type == Edge::DUMMY) + continue; + + if (node->getSequence() == 0) { + edge->type = Edge::TREE; + classifyDFS(node, seq); + } else + if (node->getSequence() > curr->getSequence()) { + edge->type = Edge::FORWARD; + } else { + edge->type = node->tag ? Edge::BACK : Edge::CROSS; + } + } + + curr->tag = 0; +} + +// @dist is indexed by Node::tag, returns -1 if no path found +int +Graph::findLightestPathWeight(Node *a, Node *b, const std::vector<int> &weight) +{ + std::vector<int> path(weight.size(), std::numeric_limits<int>::max()); + std::list<Node *> nodeList; + const int seq = nextSequence(); + + path[a->tag] = 0; + for (Node *c = a; c && c != b;) { + const int p = path[c->tag] + weight[c->tag]; + for (EdgeIterator ei = c->outgoing(); !ei.end(); ei.next()) { + Node *t = ei.getNode(); + if (t->getSequence() < seq) { + if (path[t->tag] == std::numeric_limits<int>::max()) + nodeList.push_front(t); + if (p < path[t->tag]) + path[t->tag] = p; + } + } + c->visit(seq); + Node *next = NULL; + for (std::list<Node *>::iterator n = nodeList.begin(); + n != nodeList.end(); ++n) { + if (!next || path[(*n)->tag] < path[next->tag]) + next = *n; + if ((*n) == c) { + // erase visited + n = nodeList.erase(n); + --n; + } + } + c = next; + } + if (path[b->tag] == std::numeric_limits<int>::max()) + return -1; + return path[b->tag]; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h new file mode 100644 index 00000000000..b0981ff6943 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h @@ -0,0 +1,228 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NV50_IR_GRAPH_H__ +#define __NV50_IR_GRAPH_H__ + +#include "codegen/nv50_ir_util.h" +#include <vector> + +namespace nv50_ir { + +#define ITER_NODE(x) reinterpret_cast<Graph::Node *>((x).get()) +#define ITER_EDGE(x) reinterpret_cast<Graph::Edge *>((x).get()) + +// A connected graph. +class Graph +{ +public: + class Node; + + class Edge + { + public: + enum Type + { + UNKNOWN, + TREE, + FORWARD, + BACK, + CROSS, // e.g. loop break + DUMMY + }; + + Edge(Node *dst, Node *src, Type kind); + ~Edge() { unlink(); } + + inline Node *getOrigin() const { return origin; } + inline Node *getTarget() const { return target; } + + inline Type getType() const { return type; } + const char *typeStr() const; + + private: + Node *origin; + Node *target; + + Type type; + Edge *next[2]; // next edge outgoing/incident from/to origin/target + Edge *prev[2]; + + void unlink(); + + friend class Graph; + }; + + class EdgeIterator : public Iterator + { + public: + EdgeIterator() : e(0), t(0), d(0), rev(false) { } + EdgeIterator(Graph::Edge *first, int dir, bool reverse) + : d(dir), rev(reverse) + { + t = e = ((rev && first) ? first->prev[d] : first); + } + + virtual void next() + { + Graph::Edge *n = (rev ? e->prev[d] : e->next[d]); + e = (n == t ? NULL : n); + } + virtual bool end() const { return !e; } + virtual void *get() const { return e; } + + inline Node *getNode() const { assert(e); return d ? + e->origin : e->target; } + inline Edge *getEdge() const { return e; } + inline Edge::Type getType() { return e ? e->getType() : Edge::UNKNOWN; } + + private: + Graph::Edge *e; + Graph::Edge *t; + int d; + bool rev; + }; + + class Node + { + public: + Node(void *); + ~Node() { cut(); } + + void attach(Node *, Edge::Type); + bool detach(Node *); + void cut(); + + inline EdgeIterator outgoing(bool reverse = false) const; + inline EdgeIterator incident(bool reverse = false) const; + + inline Node *parent() const; // returns NULL if count(incident edges) != 1 + + bool reachableBy(const Node *node, const Node *term) const; + + inline bool visit(int); + inline int getSequence() const; + + inline int incidentCountFwd() const; // count of incident non-back edges + inline int incidentCount() const { return inCount; } + inline int outgoingCount() const { return outCount; } + + Graph *getGraph() const { return graph; } + + void *data; + + private: + Edge *in; + Edge *out; + Graph *graph; + + int visited; + + int16_t inCount; + int16_t outCount; + public: + int tag; // for temporary use + + friend class Graph; + }; + +public: + Graph(); + ~Graph(); // does *not* free the nodes (make it an option ?) + + inline Node *getRoot() const { return root; } + + inline unsigned int getSize() const { return size; } + + inline int nextSequence(); + + void insert(Node *node); // attach to or set as root + + IteratorRef iteratorDFS(bool preorder = true); + IteratorRef iteratorCFG(); + + // safe iterators are unaffected by changes to the *edges* of the graph + IteratorRef safeIteratorDFS(bool preorder = true); + IteratorRef safeIteratorCFG(); + + void classifyEdges(); + + // @weights: indexed by Node::tag + int findLightestPathWeight(Node *, Node *, const std::vector<int>& weights); + +private: + void classifyDFS(Node *, int&); + +private: + Node *root; + unsigned int size; + int sequence; +}; + +int Graph::nextSequence() +{ + return ++sequence; +} + +Graph::Node *Graph::Node::parent() const +{ + if (inCount != 1) + return NULL; + assert(in); + return in->origin; +} + +bool Graph::Node::visit(int v) +{ + if (visited == v) + return false; + visited = v; + return true; +} + +int Graph::Node::getSequence() const +{ + return visited; +} + +Graph::EdgeIterator Graph::Node::outgoing(bool reverse) const +{ + return EdgeIterator(out, 0, reverse); +} + +Graph::EdgeIterator Graph::Node::incident(bool reverse) const +{ + return EdgeIterator(in, 1, reverse); +} + +int Graph::Node::incidentCountFwd() const +{ + int n = 0; + for (EdgeIterator ei = incident(); !ei.end(); ei.next()) + if (ei.getType() != Edge::BACK) + ++n; + return n; +} + +} // namespace nv50_ir + +#endif // __NV50_IR_GRAPH_H__ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h new file mode 100644 index 00000000000..255324fec40 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h @@ -0,0 +1,420 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NV50_IR_INLINES_H__ +#define __NV50_IR_INLINES_H__ + +static inline CondCode reverseCondCode(CondCode cc) +{ + static const uint8_t ccRev[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; + + return static_cast<CondCode>(ccRev[cc & 7] | (cc & ~7)); +} + +static inline CondCode inverseCondCode(CondCode cc) +{ + return static_cast<CondCode>(cc ^ 7); +} + +static inline bool isMemoryFile(DataFile f) +{ + return (f >= FILE_MEMORY_CONST && f <= FILE_MEMORY_LOCAL); +} + +// contrary to asTex(), this will never include SULD/SUST +static inline bool isTextureOp(operation op) +{ + return (op >= OP_TEX && op <= OP_TEXPREP); +} + +static inline bool isSurfaceOp(operation op) +{ + return (op >= OP_SULDB && op <= OP_SULEA); +} + +static inline unsigned int typeSizeof(DataType ty) +{ + switch (ty) { + case TYPE_U8: + case TYPE_S8: + return 1; + case TYPE_F16: + case TYPE_U16: + case TYPE_S16: + return 2; + case TYPE_F32: + case TYPE_U32: + case TYPE_S32: + return 4; + case TYPE_F64: + case TYPE_U64: + case TYPE_S64: + return 8; + case TYPE_B96: + return 12; + case TYPE_B128: + return 16; + default: + return 0; + } +} + +static inline unsigned int typeSizeofLog2(DataType ty) +{ + switch (ty) { + case TYPE_F16: + case TYPE_U16: + case TYPE_S16: + return 1; + case TYPE_F32: + case TYPE_U32: + case TYPE_S32: + return 2; + case TYPE_F64: + case TYPE_U64: + case TYPE_S64: + return 3; + case TYPE_B96: + case TYPE_B128: + return 4; + case TYPE_U8: + case TYPE_S8: + default: + return 0; + } +} + +static inline DataType typeOfSize(unsigned int size, + bool flt = false, bool sgn = false) +{ + switch (size) { + case 1: return sgn ? TYPE_S8 : TYPE_U8; + case 2: return flt ? TYPE_F16 : (sgn ? TYPE_S16 : TYPE_U16); + case 8: return flt ? TYPE_F64 : (sgn ? TYPE_S64 : TYPE_U64); + case 12: return TYPE_B96; + case 16: return TYPE_B128; + case 4: + return flt ? TYPE_F32 : (sgn ? TYPE_S32 : TYPE_U32); + default: + return TYPE_NONE; + } +} + +static inline bool isFloatType(DataType ty) +{ + return (ty >= TYPE_F16 && ty <= TYPE_F64); +} + +static inline bool isSignedIntType(DataType ty) +{ + return (ty == TYPE_S8 || ty == TYPE_S16 || ty == TYPE_S32); +} + +static inline bool isSignedType(DataType ty) +{ + switch (ty) { + case TYPE_NONE: + case TYPE_U8: + case TYPE_U16: + case TYPE_U32: + case TYPE_B96: + case TYPE_B128: + return false; + default: + return true; + } +} + +static inline DataType intTypeToSigned(DataType ty) +{ + switch (ty) { + case TYPE_U32: return TYPE_S32; + case TYPE_U16: return TYPE_S16; + case TYPE_U8: return TYPE_S8; + default: + return ty; + } +} + +const ValueRef *ValueRef::getIndirect(int dim) const +{ + return isIndirect(dim) ? &insn->src(indirect[dim]) : NULL; +} + +DataFile ValueRef::getFile() const +{ + return value ? value->reg.file : FILE_NULL; +} + +unsigned int ValueRef::getSize() const +{ + return value ? value->reg.size : 0; +} + +Value *ValueRef::rep() const +{ + assert(value); + return value->join; +} + +Value *ValueDef::rep() const +{ + assert(value); + return value->join; +} + +DataFile ValueDef::getFile() const +{ + return value ? value->reg.file : FILE_NULL; +} + +unsigned int ValueDef::getSize() const +{ + return value ? value->reg.size : 0; +} + +void ValueDef::setSSA(LValue *lval) +{ + origin = value->asLValue(); + set(lval); +} + +const LValue *ValueDef::preSSA() const +{ + return origin; +} + +Instruction *Value::getInsn() const +{ + return defs.empty() ? NULL : defs.front()->getInsn(); +} + +Instruction *Value::getUniqueInsn() const +{ + if (defs.empty()) + return NULL; + + // after regalloc, the definitions of coalesced values are linked + if (join != this) { + for (DefCIterator it = defs.begin(); it != defs.end(); ++it) + if ((*it)->get() == this) + return (*it)->getInsn(); + // should be unreachable and trigger assertion at the end + } +#ifdef DEBUG + if (reg.data.id < 0) { + int n = 0; + for (DefCIterator it = defs.begin(); n < 2 && it != defs.end(); ++it) + if ((*it)->get() == this) // don't count joined values + ++n; + if (n > 1) + WARN("value %%%i not uniquely defined\n", id); // return NULL ? + } +#endif + assert(defs.front()->get() == this); + return defs.front()->getInsn(); +} + +inline bool Instruction::constrainedDefs() const +{ + return defExists(1) || op == OP_UNION; +} + +Value *Instruction::getIndirect(int s, int dim) const +{ + return srcs[s].isIndirect(dim) ? getSrc(srcs[s].indirect[dim]) : NULL; +} + +Value *Instruction::getPredicate() const +{ + return (predSrc >= 0) ? getSrc(predSrc) : NULL; +} + +void Instruction::setFlagsDef(int d, Value *val) +{ + if (val) { + if (flagsDef < 0) + flagsDef = d; + setDef(flagsDef, val); + } else { + if (flagsDef >= 0) { + setDef(flagsDef, NULL); + flagsDef = -1; + } + } +} + +void Instruction::setFlagsSrc(int s, Value *val) +{ + flagsSrc = s; + setSrc(flagsSrc, val); +} + +Value *TexInstruction::getIndirectR() const +{ + return tex.rIndirectSrc >= 0 ? getSrc(tex.rIndirectSrc) : NULL; +} + +Value *TexInstruction::getIndirectS() const +{ + return tex.rIndirectSrc >= 0 ? getSrc(tex.rIndirectSrc) : NULL; +} + +CmpInstruction *Instruction::asCmp() +{ + if (op >= OP_SET_AND && op <= OP_SLCT && op != OP_SELP) + return static_cast<CmpInstruction *>(this); + return NULL; +} + +const CmpInstruction *Instruction::asCmp() const +{ + if (op >= OP_SET_AND && op <= OP_SLCT && op != OP_SELP) + return static_cast<const CmpInstruction *>(this); + return NULL; +} + +FlowInstruction *Instruction::asFlow() +{ + if (op >= OP_BRA && op <= OP_JOIN) + return static_cast<FlowInstruction *>(this); + return NULL; +} + +const FlowInstruction *Instruction::asFlow() const +{ + if (op >= OP_BRA && op <= OP_JOINAT) + return static_cast<const FlowInstruction *>(this); + return NULL; +} + +TexInstruction *Instruction::asTex() +{ + if (op >= OP_TEX && op <= OP_SULEA) + return static_cast<TexInstruction *>(this); + return NULL; +} + +const TexInstruction *Instruction::asTex() const +{ + if (op >= OP_TEX && op <= OP_SULEA) + return static_cast<const TexInstruction *>(this); + return NULL; +} + +static inline Instruction *cloneForward(Function *ctx, Instruction *obj) +{ + DeepClonePolicy<Function> pol(ctx); + + for (int i = 0; obj->srcExists(i); ++i) + pol.set(obj->getSrc(i), obj->getSrc(i)); + + return obj->clone(pol); +} + +// XXX: use a virtual function so we're really really safe ? +LValue *Value::asLValue() +{ + if (reg.file >= FILE_GPR && reg.file <= FILE_ADDRESS) + return static_cast<LValue *>(this); + return NULL; +} + +Symbol *Value::asSym() +{ + if (reg.file >= FILE_MEMORY_CONST) + return static_cast<Symbol *>(this); + return NULL; +} + +const Symbol *Value::asSym() const +{ + if (reg.file >= FILE_MEMORY_CONST) + return static_cast<const Symbol *>(this); + return NULL; +} + +void Symbol::setOffset(int32_t offset) +{ + reg.data.offset = offset; +} + +void Symbol::setAddress(Symbol *base, int32_t offset) +{ + baseSym = base; + reg.data.offset = offset; +} + +void Symbol::setSV(SVSemantic sv, uint32_t index) +{ + reg.data.sv.sv = sv; + reg.data.sv.index = index; +} + +ImmediateValue *Value::asImm() +{ + if (reg.file == FILE_IMMEDIATE) + return static_cast<ImmediateValue *>(this); + return NULL; +} + +const ImmediateValue *Value::asImm() const +{ + if (reg.file == FILE_IMMEDIATE) + return static_cast<const ImmediateValue *>(this); + return NULL; +} + +Value *Value::get(Iterator &it) +{ + return reinterpret_cast<Value *>(it.get()); +} + +bool BasicBlock::reachableBy(const BasicBlock *by, const BasicBlock *term) +{ + return cfg.reachableBy(&by->cfg, &term->cfg); +} + +BasicBlock *BasicBlock::get(Iterator &iter) +{ + return reinterpret_cast<BasicBlock *>(iter.get()); +} + +BasicBlock *BasicBlock::get(Graph::Node *node) +{ + assert(node); + return reinterpret_cast<BasicBlock *>(node->data); +} + +Function *Function::get(Graph::Node *node) +{ + assert(node); + return reinterpret_cast<Function *>(node->data); +} + +LValue *Function::getLValue(int id) +{ + assert((unsigned int)id < (unsigned int)allLValues.getSize()); + return reinterpret_cast<LValue *>(allLValues.get(id)); +} + +#endif // __NV50_IR_INLINES_H__ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp new file mode 100644 index 00000000000..56eaad3bbf9 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -0,0 +1,1101 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_build_util.h" + +#include "codegen/nv50_ir_target_nv50.h" + +namespace nv50_ir { + +// nv50 doesn't support 32 bit integer multiplication +// +// ah al * bh bl = LO32: (al * bh + ah * bl) << 16 + (al * bl) +// ------------------- +// al*bh 00 HI32: (al * bh + ah * bl) >> 16 + (ah * bh) + +// ah*bh 00 00 ( carry1) << 16 + ( carry2) +// al*bl +// ah*bl 00 +// +// fffe0001 + fffe0001 +static bool +expandIntegerMUL(BuildUtil *bld, Instruction *mul) +{ + const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH; + + DataType fTy = mul->sType; // full type + DataType hTy; + switch (fTy) { + case TYPE_S32: hTy = TYPE_S16; break; + case TYPE_U32: hTy = TYPE_U16; break; + case TYPE_U64: hTy = TYPE_U32; break; + case TYPE_S64: hTy = TYPE_S32; break; + default: + return false; + } + unsigned int fullSize = typeSizeof(fTy); + unsigned int halfSize = typeSizeof(hTy); + + Instruction *i[9]; + + bld->setPosition(mul, true); + + Value *a[2], *b[2]; + Value *c[2]; + Value *t[4]; + for (int j = 0; j < 4; ++j) + t[j] = bld->getSSA(fullSize); + + // split sources into halves + i[0] = bld->mkSplit(a, halfSize, mul->getSrc(0)); + i[1] = bld->mkSplit(b, halfSize, mul->getSrc(1)); + + i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]); + i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]); + i[7] = bld->mkOp2(OP_SHL, fTy, t[2], t[1], bld->mkImm(halfSize * 8)); + i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]); + + if (highResult) { + Value *r[3]; + Value *imm = bld->loadImm(NULL, 1 << (halfSize * 8)); + c[0] = bld->getSSA(1, FILE_FLAGS); + c[1] = bld->getSSA(1, FILE_FLAGS); + for (int j = 0; j < 3; ++j) + r[j] = bld->getSSA(fullSize); + + i[8] = bld->mkOp2(OP_SHR, fTy, r[0], t[1], bld->mkImm(halfSize * 8)); + i[6] = bld->mkOp2(OP_ADD, fTy, r[1], r[0], imm); + bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[0]); + i[5] = bld->mkOp3(OP_MAD, fTy, mul->getDef(0), a[1], b[1], r[2]); + + // set carry defs / sources + i[3]->setFlagsDef(1, c[0]); + i[4]->setFlagsDef(0, c[1]); // actual result not required, just the carry + i[6]->setPredicate(CC_C, c[0]); + i[5]->setFlagsSrc(3, c[1]); + } else { + bld->mkMov(mul->getDef(0), t[3]); + } + delete_Instruction(bld->getProgram(), mul); + + for (int j = 2; j <= (highResult ? 5 : 4); ++j) + if (i[j]) + i[j]->sType = hTy; + + return true; +} + +#define QOP_ADD 0 +#define QOP_SUBR 1 +#define QOP_SUB 2 +#define QOP_MOV2 3 + +// UL UR LL LR +#define QUADOP(q, r, s, t) \ + ((QOP_##q << 6) | (QOP_##r << 4) | \ + (QOP_##s << 2) | (QOP_##t << 0)) + +class NV50LegalizePostRA : public Pass +{ +private: + virtual bool visit(Function *); + virtual bool visit(BasicBlock *); + + void handlePRERET(FlowInstruction *); + void replaceZero(Instruction *); + + LValue *r63; +}; + +bool +NV50LegalizePostRA::visit(Function *fn) +{ + Program *prog = fn->getProgram(); + + r63 = new_LValue(fn, FILE_GPR); + r63->reg.data.id = 63; + + // this is actually per-program, but we can do it all on visiting main() + std::list<Instruction *> *outWrites = + reinterpret_cast<std::list<Instruction *> *>(prog->targetPriv); + + if (outWrites) { + for (std::list<Instruction *>::iterator it = outWrites->begin(); + it != outWrites->end(); ++it) + (*it)->getSrc(1)->defs.front()->getInsn()->setDef(0, (*it)->getSrc(0)); + // instructions will be deleted on exit + outWrites->clear(); + } + + return true; +} + +void +NV50LegalizePostRA::replaceZero(Instruction *i) +{ + for (int s = 0; i->srcExists(s); ++s) { + ImmediateValue *imm = i->getSrc(s)->asImm(); + if (imm && imm->reg.data.u64 == 0) + i->setSrc(s, r63); + } +} + +// Emulate PRERET: jump to the target and call to the origin from there +// +// WARNING: atm only works if BBs are affected by at most a single PRERET +// +// BB:0 +// preret BB:3 +// (...) +// BB:3 +// (...) +// ---> +// BB:0 +// bra BB:3 + n0 (directly to the call; move to beginning of BB and fixate) +// (...) +// BB:3 +// bra BB:3 + n1 (skip the call) +// call BB:0 + n2 (skip bra at beginning of BB:0) +// (...) +void +NV50LegalizePostRA::handlePRERET(FlowInstruction *pre) +{ + BasicBlock *bbE = pre->bb; + BasicBlock *bbT = pre->target.bb; + + pre->subOp = NV50_IR_SUBOP_EMU_PRERET + 0; + bbE->remove(pre); + bbE->insertHead(pre); + + Instruction *skip = new_FlowInstruction(func, OP_PRERET, bbT); + Instruction *call = new_FlowInstruction(func, OP_PRERET, bbE); + + bbT->insertHead(call); + bbT->insertHead(skip); + + // NOTE: maybe split blocks to prevent the instructions from moving ? + + skip->subOp = NV50_IR_SUBOP_EMU_PRERET + 1; + call->subOp = NV50_IR_SUBOP_EMU_PRERET + 2; +} + +bool +NV50LegalizePostRA::visit(BasicBlock *bb) +{ + Instruction *i, *next; + + // remove pseudo operations and non-fixed no-ops, split 64 bit operations + for (i = bb->getFirst(); i; i = next) { + next = i->next; + if (i->isNop()) { + bb->remove(i); + } else + if (i->op == OP_PRERET && prog->getTarget()->getChipset() < 0xa0) { + handlePRERET(i->asFlow()); + } else { + // TODO: We will want to do this before register allocation, + // since have to use a $c register for the carry flag. + if (typeSizeof(i->dType) == 8) { + Instruction *hi = BuildUtil::split64BitOpPostRA(func, i, r63, NULL); + if (hi) + next = hi; + } + + if (i->op != OP_MOV && i->op != OP_PFETCH && + i->op != OP_BAR && + (!i->defExists(0) || i->def(0).getFile() != FILE_ADDRESS)) + replaceZero(i); + } + } + if (!bb->getEntry()) + return true; + + return true; +} + +class NV50LegalizeSSA : public Pass +{ +public: + NV50LegalizeSSA(Program *); + + virtual bool visit(BasicBlock *bb); + +private: + void propagateWriteToOutput(Instruction *); + void handleDIV(Instruction *); + void handleMOD(Instruction *); + void handleMUL(Instruction *); + void handleAddrDef(Instruction *); + + inline bool isARL(const Instruction *) const; + + BuildUtil bld; + + std::list<Instruction *> *outWrites; +}; + +NV50LegalizeSSA::NV50LegalizeSSA(Program *prog) +{ + bld.setProgram(prog); + + if (prog->optLevel >= 2 && + (prog->getType() == Program::TYPE_GEOMETRY || + prog->getType() == Program::TYPE_VERTEX)) + outWrites = + reinterpret_cast<std::list<Instruction *> *>(prog->targetPriv); + else + outWrites = NULL; +} + +void +NV50LegalizeSSA::propagateWriteToOutput(Instruction *st) +{ + if (st->src(0).isIndirect(0) || st->getSrc(1)->refCount() != 1) + return; + + // check def instruction can store + Instruction *di = st->getSrc(1)->defs.front()->getInsn(); + + // TODO: move exports (if beneficial) in common opt pass + if (di->isPseudo() || isTextureOp(di->op) || di->defCount(0xff, true) > 1) + return; + for (int s = 0; di->srcExists(s); ++s) + if (di->src(s).getFile() == FILE_IMMEDIATE) + return; + + // We cannot set defs to non-lvalues before register allocation, so + // save & remove (to save registers) the exports and replace later. + outWrites->push_back(st); + st->bb->remove(st); +} + +bool +NV50LegalizeSSA::isARL(const Instruction *i) const +{ + ImmediateValue imm; + + if (i->op != OP_SHL || i->src(0).getFile() != FILE_GPR) + return false; + if (!i->src(1).getImmediate(imm)) + return false; + return imm.isInteger(0); +} + +void +NV50LegalizeSSA::handleAddrDef(Instruction *i) +{ + Instruction *arl; + + i->getDef(0)->reg.size = 2; // $aX are only 16 bit + + // only ADDR <- SHL(GPR, IMM) and ADDR <- ADD(ADDR, IMM) are valid + if (i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE) { + if (i->op == OP_SHL && i->src(0).getFile() == FILE_GPR) + return; + if (i->op == OP_ADD && i->src(0).getFile() == FILE_ADDRESS) + return; + } + + // turn $a sources into $r sources (can't operate on $a) + for (int s = 0; i->srcExists(s); ++s) { + Value *a = i->getSrc(s); + Value *r; + if (a->reg.file == FILE_ADDRESS) { + if (a->getInsn() && isARL(a->getInsn())) { + i->setSrc(s, a->getInsn()->getSrc(0)); + } else { + bld.setPosition(i, false); + r = bld.getSSA(); + bld.mkMov(r, a); + i->setSrc(s, r); + } + } + } + if (i->op == OP_SHL && i->src(1).getFile() == FILE_IMMEDIATE) + return; + + // turn result back into $a + bld.setPosition(i, true); + arl = bld.mkOp2(OP_SHL, TYPE_U32, i->getDef(0), bld.getSSA(), bld.mkImm(0)); + i->setDef(0, arl->getSrc(0)); +} + +void +NV50LegalizeSSA::handleMUL(Instruction *mul) +{ + if (isFloatType(mul->sType) || typeSizeof(mul->sType) <= 2) + return; + Value *def = mul->getDef(0); + Value *pred = mul->getPredicate(); + CondCode cc = mul->cc; + if (pred) + mul->setPredicate(CC_ALWAYS, NULL); + + if (mul->op == OP_MAD) { + Instruction *add = mul; + bld.setPosition(add, false); + Value *res = cloneShallow(func, mul->getDef(0)); + mul = bld.mkOp2(OP_MUL, add->sType, res, add->getSrc(0), add->getSrc(1)); + add->op = OP_ADD; + add->setSrc(0, mul->getDef(0)); + add->setSrc(1, add->getSrc(2)); + for (int s = 2; add->srcExists(s); ++s) + add->setSrc(s, NULL); + mul->subOp = add->subOp; + add->subOp = 0; + } + expandIntegerMUL(&bld, mul); + if (pred) + def->getInsn()->setPredicate(cc, pred); +} + +// Use f32 division: first compute an approximate result, use it to reduce +// the dividend, which should then be representable as f32, divide the reduced +// dividend, and add the quotients. +void +NV50LegalizeSSA::handleDIV(Instruction *div) +{ + const DataType ty = div->sType; + + if (ty != TYPE_U32 && ty != TYPE_S32) + return; + + Value *q, *q0, *qf, *aR, *aRf, *qRf, *qR, *t, *s, *m, *cond; + + bld.setPosition(div, false); + + Value *a, *af = bld.getSSA(); + Value *b, *bf = bld.getSSA(); + + bld.mkCvt(OP_CVT, TYPE_F32, af, ty, div->getSrc(0)); + bld.mkCvt(OP_CVT, TYPE_F32, bf, ty, div->getSrc(1)); + + if (isSignedType(ty)) { + af->getInsn()->src(0).mod = Modifier(NV50_IR_MOD_ABS); + bf->getInsn()->src(0).mod = Modifier(NV50_IR_MOD_ABS); + a = bld.getSSA(); + b = bld.getSSA(); + bld.mkOp1(OP_ABS, ty, a, div->getSrc(0)); + bld.mkOp1(OP_ABS, ty, b, div->getSrc(1)); + } else { + a = div->getSrc(0); + b = div->getSrc(1); + } + + bf = bld.mkOp1v(OP_RCP, TYPE_F32, bld.getSSA(), bf); + bf = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), bf, bld.mkImm(-2)); + + bld.mkOp2(OP_MUL, TYPE_F32, (qf = bld.getSSA()), af, bf)->rnd = ROUND_Z; + bld.mkCvt(OP_CVT, ty, (q0 = bld.getSSA()), TYPE_F32, qf)->rnd = ROUND_Z; + + // get error of 1st result + expandIntegerMUL(&bld, + bld.mkOp2(OP_MUL, TYPE_U32, (t = bld.getSSA()), q0, b)); + bld.mkOp2(OP_SUB, TYPE_U32, (aRf = bld.getSSA()), a, t); + + bld.mkCvt(OP_CVT, TYPE_F32, (aR = bld.getSSA()), TYPE_U32, aRf); + + bld.mkOp2(OP_MUL, TYPE_F32, (qRf = bld.getSSA()), aR, bf)->rnd = ROUND_Z; + bld.mkCvt(OP_CVT, TYPE_U32, (qR = bld.getSSA()), TYPE_F32, qRf) + ->rnd = ROUND_Z; + bld.mkOp2(OP_ADD, ty, (q = bld.getSSA()), q0, qR); // add quotients + + // correction: if modulus >= divisor, add 1 + expandIntegerMUL(&bld, + bld.mkOp2(OP_MUL, TYPE_U32, (t = bld.getSSA()), q, b)); + bld.mkOp2(OP_SUB, TYPE_U32, (m = bld.getSSA()), a, t); + bld.mkCmp(OP_SET, CC_GE, TYPE_U32, (s = bld.getSSA()), m, b); + if (!isSignedType(ty)) { + div->op = OP_SUB; + div->setSrc(0, q); + div->setSrc(1, s); + } else { + t = q; + bld.mkOp2(OP_SUB, TYPE_U32, (q = bld.getSSA()), t, s); + s = bld.getSSA(); + t = bld.getSSA(); + // fix the sign + bld.mkOp2(OP_XOR, TYPE_U32, NULL, div->getSrc(0), div->getSrc(1)) + ->setFlagsDef(0, (cond = bld.getSSA(1, FILE_FLAGS))); + bld.mkOp1(OP_NEG, ty, s, q)->setPredicate(CC_S, cond); + bld.mkOp1(OP_MOV, ty, t, q)->setPredicate(CC_NS, cond); + + div->op = OP_UNION; + div->setSrc(0, s); + div->setSrc(1, t); + } +} + +void +NV50LegalizeSSA::handleMOD(Instruction *mod) +{ + if (mod->dType != TYPE_U32 && mod->dType != TYPE_S32) + return; + bld.setPosition(mod, false); + + Value *q = bld.getSSA(); + Value *m = bld.getSSA(); + + bld.mkOp2(OP_DIV, mod->dType, q, mod->getSrc(0), mod->getSrc(1)); + handleDIV(q->getInsn()); + + bld.setPosition(mod, false); + expandIntegerMUL(&bld, bld.mkOp2(OP_MUL, TYPE_U32, m, q, mod->getSrc(1))); + + mod->op = OP_SUB; + mod->setSrc(1, m); +} + +bool +NV50LegalizeSSA::visit(BasicBlock *bb) +{ + Instruction *insn, *next; + // skipping PHIs (don't pass them to handleAddrDef) ! + for (insn = bb->getEntry(); insn; insn = next) { + next = insn->next; + + switch (insn->op) { + case OP_EXPORT: + if (outWrites) + propagateWriteToOutput(insn); + break; + case OP_DIV: + handleDIV(insn); + break; + case OP_MOD: + handleMOD(insn); + break; + case OP_MAD: + case OP_MUL: + handleMUL(insn); + break; + default: + break; + } + + if (insn->defExists(0) && insn->getDef(0)->reg.file == FILE_ADDRESS) + handleAddrDef(insn); + } + return true; +} + +class NV50LoweringPreSSA : public Pass +{ +public: + NV50LoweringPreSSA(Program *); + +private: + virtual bool visit(Instruction *); + virtual bool visit(Function *); + + bool handleRDSV(Instruction *); + bool handleWRSV(Instruction *); + + bool handleEXPORT(Instruction *); + + bool handleDIV(Instruction *); + bool handleSQRT(Instruction *); + bool handlePOW(Instruction *); + + bool handleSET(Instruction *); + bool handleSLCT(CmpInstruction *); + bool handleSELP(Instruction *); + + bool handleTEX(TexInstruction *); + bool handleTXB(TexInstruction *); // I really + bool handleTXL(TexInstruction *); // hate + bool handleTXD(TexInstruction *); // these 3 + + bool handleCALL(Instruction *); + bool handlePRECONT(Instruction *); + bool handleCONT(Instruction *); + + void checkPredicate(Instruction *); + +private: + const Target *const targ; + + BuildUtil bld; + + Value *tid; +}; + +NV50LoweringPreSSA::NV50LoweringPreSSA(Program *prog) : + targ(prog->getTarget()), tid(NULL) +{ + bld.setProgram(prog); +} + +bool +NV50LoweringPreSSA::visit(Function *f) +{ + BasicBlock *root = BasicBlock::get(func->cfg.getRoot()); + + if (prog->getType() == Program::TYPE_COMPUTE) { + // Add implicit "thread id" argument in $r0 to the function + Value *arg = new_LValue(func, FILE_GPR); + arg->reg.data.id = 0; + f->ins.push_back(arg); + + bld.setPosition(root, false); + tid = bld.mkMov(bld.getScratch(), arg, TYPE_U32)->getDef(0); + } + + return true; +} + +bool +NV50LoweringPreSSA::handleTEX(TexInstruction *i) +{ + const int arg = i->tex.target.getArgCount(); + const int dref = arg; + const int lod = i->tex.target.isShadow() ? (arg + 1) : arg; + + // dref comes before bias/lod + if (i->tex.target.isShadow()) + if (i->op == OP_TXB || i->op == OP_TXL) + i->swapSources(dref, lod); + + // array index must be converted to u32 + if (i->tex.target.isArray()) { + Value *layer = i->getSrc(arg - 1); + LValue *src = new_LValue(func, FILE_GPR); + bld.mkCvt(OP_CVT, TYPE_U32, src, TYPE_F32, layer); + bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511)); + i->setSrc(arg - 1, src); + + if (i->tex.target.isCube()) { + std::vector<Value *> acube, a2d; + int c; + + acube.resize(4); + for (c = 0; c < 4; ++c) + acube[c] = i->getSrc(c); + a2d.resize(4); + for (c = 0; c < 3; ++c) + a2d[c] = new_LValue(func, FILE_GPR); + a2d[3] = NULL; + + bld.mkTex(OP_TEXPREP, TEX_TARGET_CUBE_ARRAY, i->tex.r, i->tex.s, + a2d, acube)->asTex()->tex.mask = 0x7; + + for (c = 0; c < 3; ++c) + i->setSrc(c, a2d[c]); + i->setSrc(c, NULL); + for (; i->srcExists(c + 1); ++c) + i->setSrc(c, i->getSrc(c + 1)); + + i->tex.target = i->tex.target.isShadow() ? + TEX_TARGET_2D_ARRAY_SHADOW : TEX_TARGET_2D_ARRAY; + } + } + + // texel offsets are 3 immediate fields in the instruction, + // nv50 cannot do textureGatherOffsets + assert(i->tex.useOffsets <= 1); + + return true; +} + +// Bias must be equal for all threads of a quad or lod calculation will fail. +// +// The lanes of a quad are grouped by the bit in the condition register they +// have set, which is selected by differing bias values. +// Move the input values for TEX into a new register set for each group and +// execute TEX only for a specific group. +// We always need to use 4 new registers for the inputs/outputs because the +// implicitly calculated derivatives must be correct. +// +// TODO: move to SSA phase so we can easily determine whether bias is constant +bool +NV50LoweringPreSSA::handleTXB(TexInstruction *i) +{ + const CondCode cc[4] = { CC_EQU, CC_S, CC_C, CC_O }; + int l, d; + + handleTEX(i); + Value *bias = i->getSrc(i->tex.target.getArgCount()); + if (bias->isUniform()) + return true; + + Instruction *cond = bld.mkOp1(OP_UNION, TYPE_U32, bld.getScratch(), + bld.loadImm(NULL, 1)); + bld.setPosition(cond, false); + + for (l = 1; l < 4; ++l) { + const uint8_t qop = QUADOP(SUBR, SUBR, SUBR, SUBR); + Value *bit = bld.getSSA(); + Value *pred = bld.getScratch(1, FILE_FLAGS); + Value *imm = bld.loadImm(NULL, (1 << l)); + bld.mkQuadop(qop, pred, l, bias, bias)->flagsDef = 0; + bld.mkMov(bit, imm)->setPredicate(CC_EQ, pred); + cond->setSrc(l, bit); + } + Value *flags = bld.getScratch(1, FILE_FLAGS); + bld.setPosition(cond, true); + bld.mkCvt(OP_CVT, TYPE_U8, flags, TYPE_U32, cond->getDef(0)); + + Instruction *tex[4]; + for (l = 0; l < 4; ++l) { + (tex[l] = cloneForward(func, i))->setPredicate(cc[l], flags); + bld.insert(tex[l]); + } + + Value *res[4][4]; + for (d = 0; i->defExists(d); ++d) + res[0][d] = tex[0]->getDef(d); + for (l = 1; l < 4; ++l) { + for (d = 0; tex[l]->defExists(d); ++d) { + res[l][d] = cloneShallow(func, res[0][d]); + bld.mkMov(res[l][d], tex[l]->getDef(d))->setPredicate(cc[l], flags); + } + } + + for (d = 0; i->defExists(d); ++d) { + Instruction *dst = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(d)); + for (l = 0; l < 4; ++l) + dst->setSrc(l, res[l][d]); + } + delete_Instruction(prog, i); + return true; +} + +// LOD must be equal for all threads of a quad. +// Unlike with TXB, here we can just diverge since there's no LOD calculation +// that would require all 4 threads' sources to be set up properly. +bool +NV50LoweringPreSSA::handleTXL(TexInstruction *i) +{ + handleTEX(i); + Value *lod = i->getSrc(i->tex.target.getArgCount()); + if (lod->isUniform()) + return true; + + BasicBlock *currBB = i->bb; + BasicBlock *texiBB = i->bb->splitBefore(i, false); + BasicBlock *joinBB = i->bb->splitAfter(i); + + bld.setPosition(currBB, true); + currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL); + + for (int l = 0; l <= 3; ++l) { + const uint8_t qop = QUADOP(SUBR, SUBR, SUBR, SUBR); + Value *pred = bld.getScratch(1, FILE_FLAGS); + bld.setPosition(currBB, true); + bld.mkQuadop(qop, pred, l, lod, lod)->flagsDef = 0; + bld.mkFlow(OP_BRA, texiBB, CC_EQ, pred)->fixed = 1; + currBB->cfg.attach(&texiBB->cfg, Graph::Edge::FORWARD); + if (l <= 2) { + BasicBlock *laneBB = new BasicBlock(func); + currBB->cfg.attach(&laneBB->cfg, Graph::Edge::TREE); + currBB = laneBB; + } + } + bld.setPosition(joinBB, false); + bld.mkOp(OP_JOIN, TYPE_NONE, NULL); + return true; +} + +bool +NV50LoweringPreSSA::handleTXD(TexInstruction *i) +{ + static const uint8_t qOps[4][2] = + { + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 + { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 + { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 + }; + Value *def[4][4]; + Value *crd[3]; + Instruction *tex; + Value *zero = bld.loadImm(bld.getSSA(), 0); + int l, c; + const int dim = i->tex.target.getDim(); + + handleTEX(i); + i->op = OP_TEX; // no need to clone dPdx/dPdy later + + for (c = 0; c < dim; ++c) + crd[c] = bld.getScratch(); + + bld.mkOp(OP_QUADON, TYPE_NONE, NULL); + for (l = 0; l < 4; ++l) { + // mov coordinates from lane l to all lanes + for (c = 0; c < dim; ++c) + bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero); + // add dPdx from lane l to lanes dx + for (c = 0; c < dim; ++c) + bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]); + // add dPdy from lane l to lanes dy + for (c = 0; c < dim; ++c) + bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); + // texture + bld.insert(tex = cloneForward(func, i)); + for (c = 0; c < dim; ++c) + tex->setSrc(c, crd[c]); + // save results + for (c = 0; i->defExists(c); ++c) { + Instruction *mov; + def[c][l] = bld.getSSA(); + mov = bld.mkMov(def[c][l], tex->getDef(c)); + mov->fixed = 1; + mov->lanes = 1 << l; + } + } + bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); + + for (c = 0; i->defExists(c); ++c) { + Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); + for (l = 0; l < 4; ++l) + u->setSrc(l, def[c][l]); + } + + i->bb->remove(i); + return true; +} + +bool +NV50LoweringPreSSA::handleSET(Instruction *i) +{ + if (i->dType == TYPE_F32) { + bld.setPosition(i, true); + i->dType = TYPE_U32; + bld.mkOp1(OP_ABS, TYPE_S32, i->getDef(0), i->getDef(0)); + bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), TYPE_S32, i->getDef(0)); + } + return true; +} + +bool +NV50LoweringPreSSA::handleSLCT(CmpInstruction *i) +{ + Value *src0 = bld.getSSA(); + Value *src1 = bld.getSSA(); + Value *pred = bld.getScratch(1, FILE_FLAGS); + + Value *v0 = i->getSrc(0); + Value *v1 = i->getSrc(1); + // XXX: these probably shouldn't be immediates in the first place ... + if (v0->asImm()) + v0 = bld.mkMov(bld.getSSA(), v0)->getDef(0); + if (v1->asImm()) + v1 = bld.mkMov(bld.getSSA(), v1)->getDef(0); + + bld.setPosition(i, true); + bld.mkMov(src0, v0)->setPredicate(CC_NE, pred); + bld.mkMov(src1, v1)->setPredicate(CC_EQ, pred); + bld.mkOp2(OP_UNION, i->dType, i->getDef(0), src0, src1); + + bld.setPosition(i, false); + i->op = OP_SET; + i->setFlagsDef(0, pred); + i->dType = TYPE_U8; + i->setSrc(0, i->getSrc(2)); + i->setSrc(2, NULL); + i->setSrc(1, bld.loadImm(NULL, 0)); + + return true; +} + +bool +NV50LoweringPreSSA::handleSELP(Instruction *i) +{ + Value *src0 = bld.getSSA(); + Value *src1 = bld.getSSA(); + + Value *v0 = i->getSrc(0); + Value *v1 = i->getSrc(1); + if (v0->asImm()) + v0 = bld.mkMov(bld.getSSA(), v0)->getDef(0); + if (v1->asImm()) + v1 = bld.mkMov(bld.getSSA(), v1)->getDef(0); + + bld.mkMov(src0, v0)->setPredicate(CC_NE, i->getSrc(2)); + bld.mkMov(src1, v1)->setPredicate(CC_EQ, i->getSrc(2)); + bld.mkOp2(OP_UNION, i->dType, i->getDef(0), src0, src1); + delete_Instruction(prog, i); + return true; +} + +bool +NV50LoweringPreSSA::handleWRSV(Instruction *i) +{ + Symbol *sym = i->getSrc(0)->asSym(); + + // these are all shader outputs, $sreg are not writeable + uint32_t addr = targ->getSVAddress(FILE_SHADER_OUTPUT, sym); + if (addr >= 0x400) + return false; + sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr); + + bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0), i->getSrc(1)); + + bld.getBB()->remove(i); + return true; +} + +bool +NV50LoweringPreSSA::handleCALL(Instruction *i) +{ + if (prog->getType() == Program::TYPE_COMPUTE) { + // Add implicit "thread id" argument in $r0 to the function + i->setSrc(i->srcCount(), tid); + } + return true; +} + +bool +NV50LoweringPreSSA::handlePRECONT(Instruction *i) +{ + delete_Instruction(prog, i); + return true; +} + +bool +NV50LoweringPreSSA::handleCONT(Instruction *i) +{ + i->op = OP_BRA; + return true; +} + +bool +NV50LoweringPreSSA::handleRDSV(Instruction *i) +{ + Symbol *sym = i->getSrc(0)->asSym(); + uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym); + Value *def = i->getDef(0); + SVSemantic sv = sym->reg.data.sv.sv; + int idx = sym->reg.data.sv.index; + + if (addr >= 0x400) // mov $sreg + return true; + + switch (sv) { + case SV_POSITION: + assert(prog->getType() == Program::TYPE_FRAGMENT); + bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL); + break; + case SV_FACE: + bld.mkInterp(NV50_IR_INTERP_FLAT, def, addr, NULL); + if (i->dType == TYPE_F32) { + bld.mkOp2(OP_AND, TYPE_U32, def, def, bld.mkImm(0x80000000)); + bld.mkOp2(OP_XOR, TYPE_U32, def, def, bld.mkImm(0xbf800000)); + } + break; + case SV_NCTAID: + case SV_CTAID: + case SV_NTID: + if ((sv == SV_NCTAID && idx >= 2) || + (sv == SV_NTID && idx >= 3)) { + bld.mkMov(def, bld.mkImm(1)); + } else if (sv == SV_CTAID && idx >= 2) { + bld.mkMov(def, bld.mkImm(0)); + } else { + Value *x = bld.getSSA(2); + bld.mkOp1(OP_LOAD, TYPE_U16, x, + bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U16, addr)); + bld.mkCvt(OP_CVT, TYPE_U32, def, TYPE_U16, x); + } + break; + case SV_TID: + if (idx == 0) { + bld.mkOp2(OP_AND, TYPE_U32, def, tid, bld.mkImm(0x0000ffff)); + } else if (idx == 1) { + bld.mkOp2(OP_AND, TYPE_U32, def, tid, bld.mkImm(0x03ff0000)); + bld.mkOp2(OP_SHR, TYPE_U32, def, def, bld.mkImm(16)); + } else if (idx == 2) { + bld.mkOp2(OP_SHR, TYPE_U32, def, tid, bld.mkImm(26)); + } else { + bld.mkMov(def, bld.mkImm(0)); + } + break; + default: + bld.mkFetch(i->getDef(0), i->dType, + FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), NULL); + break; + } + bld.getBB()->remove(i); + return true; +} + +bool +NV50LoweringPreSSA::handleDIV(Instruction *i) +{ + if (!isFloatType(i->dType)) + return true; + bld.setPosition(i, false); + Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1)); + i->op = OP_MUL; + i->setSrc(1, rcp->getDef(0)); + return true; +} + +bool +NV50LoweringPreSSA::handleSQRT(Instruction *i) +{ + Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32, + bld.getSSA(), i->getSrc(0)); + i->op = OP_MUL; + i->setSrc(1, rsq->getDef(0)); + + return true; +} + +bool +NV50LoweringPreSSA::handlePOW(Instruction *i) +{ + LValue *val = bld.getScratch(); + + bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0)); + bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1; + bld.mkOp1(OP_PREEX2, TYPE_F32, val, val); + + i->op = OP_EX2; + i->setSrc(0, val); + i->setSrc(1, NULL); + + return true; +} + +bool +NV50LoweringPreSSA::handleEXPORT(Instruction *i) +{ + if (prog->getType() == Program::TYPE_FRAGMENT) { + if (i->getIndirect(0, 0)) { + // TODO: redirect to l[] here, load to GPRs at exit + return false; + } else { + int id = i->getSrc(0)->reg.data.offset / 4; // in 32 bit reg units + + i->op = OP_MOV; + i->subOp = NV50_IR_SUBOP_MOV_FINAL; + i->src(0).set(i->src(1)); + i->setSrc(1, NULL); + i->setDef(0, new_LValue(func, FILE_GPR)); + i->getDef(0)->reg.data.id = id; + + prog->maxGPR = MAX2(prog->maxGPR, id); + } + } + return true; +} + +// Set flags according to predicate and make the instruction read $cX. +void +NV50LoweringPreSSA::checkPredicate(Instruction *insn) +{ + Value *pred = insn->getPredicate(); + Value *cdst; + + if (!pred || pred->reg.file == FILE_FLAGS) + return; + cdst = bld.getSSA(1, FILE_FLAGS); + + bld.mkCmp(OP_SET, CC_NEU, insn->dType, cdst, bld.loadImm(NULL, 0), pred); + + insn->setPredicate(insn->cc, cdst); +} + +// +// - add quadop dance for texturing +// - put FP outputs in GPRs +// - convert instruction sequences +// +bool +NV50LoweringPreSSA::visit(Instruction *i) +{ + bld.setPosition(i, false); + + if (i->cc != CC_ALWAYS) + checkPredicate(i); + + switch (i->op) { + case OP_TEX: + case OP_TXF: + case OP_TXG: + return handleTEX(i->asTex()); + case OP_TXB: + return handleTXB(i->asTex()); + case OP_TXL: + return handleTXL(i->asTex()); + case OP_TXD: + return handleTXD(i->asTex()); + case OP_EX2: + bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0)); + i->setSrc(0, i->getDef(0)); + break; + case OP_SET: + return handleSET(i); + case OP_SLCT: + return handleSLCT(i->asCmp()); + case OP_SELP: + return handleSELP(i); + case OP_POW: + return handlePOW(i); + case OP_DIV: + return handleDIV(i); + case OP_SQRT: + return handleSQRT(i); + case OP_EXPORT: + return handleEXPORT(i); + case OP_RDSV: + return handleRDSV(i); + case OP_WRSV: + return handleWRSV(i); + case OP_CALL: + return handleCALL(i); + case OP_PRECONT: + return handlePRECONT(i); + case OP_CONT: + return handleCONT(i); + default: + break; + } + return true; +} + +bool +TargetNV50::runLegalizePass(Program *prog, CGStage stage) const +{ + bool ret = false; + + if (stage == CG_STAGE_PRE_SSA) { + NV50LoweringPreSSA pass(prog); + ret = pass.run(prog, false, true); + } else + if (stage == CG_STAGE_SSA) { + if (!prog->targetPriv) + prog->targetPriv = new std::list<Instruction *>(); + NV50LegalizeSSA pass(prog); + ret = pass.run(prog, false, true); + } else + if (stage == CG_STAGE_POST_RA) { + NV50LegalizePostRA pass; + ret = pass.run(prog, false, true); + if (prog->targetPriv) + delete reinterpret_cast<std::list<Instruction *> *>(prog->targetPriv); + } + return ret; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp new file mode 100644 index 00000000000..8d94dd1ce5d --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -0,0 +1,1597 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_build_util.h" + +#include "codegen/nv50_ir_target_nvc0.h" + +#include <limits> + +namespace nv50_ir { + +#define QOP_ADD 0 +#define QOP_SUBR 1 +#define QOP_SUB 2 +#define QOP_MOV2 3 + +// UL UR LL LR +#define QUADOP(q, r, s, t) \ + ((QOP_##q << 6) | (QOP_##r << 4) | \ + (QOP_##s << 2) | (QOP_##t << 0)) + +class NVC0LegalizeSSA : public Pass +{ +private: + virtual bool visit(BasicBlock *); + virtual bool visit(Function *); + + // we want to insert calls to the builtin library only after optimization + void handleDIV(Instruction *); // integer division, modulus + void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt + +private: + BuildUtil bld; +}; + +void +NVC0LegalizeSSA::handleDIV(Instruction *i) +{ + FlowInstruction *call; + int builtin; + Value *def[2]; + + bld.setPosition(i, false); + def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0); + def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0); + switch (i->dType) { + case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break; + case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break; + default: + return; + } + call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL); + bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]); + bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2); + bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0); + + call->fixed = 1; + call->absolute = call->builtin = 1; + call->target.builtin = builtin; + delete_Instruction(prog, i); +} + +void +NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) +{ + // TODO +} + +bool +NVC0LegalizeSSA::visit(Function *fn) +{ + bld.setProgram(fn->getProgram()); + return true; +} + +bool +NVC0LegalizeSSA::visit(BasicBlock *bb) +{ + Instruction *next; + for (Instruction *i = bb->getEntry(); i; i = next) { + next = i->next; + if (i->dType == TYPE_F32) + continue; + switch (i->op) { + case OP_DIV: + case OP_MOD: + handleDIV(i); + break; + case OP_RCP: + case OP_RSQ: + if (i->dType == TYPE_F64) + handleRCPRSQ(i); + break; + default: + break; + } + } + return true; +} + +class NVC0LegalizePostRA : public Pass +{ +public: + NVC0LegalizePostRA(const Program *); + +private: + virtual bool visit(Function *); + virtual bool visit(BasicBlock *); + + void replaceZero(Instruction *); + bool tryReplaceContWithBra(BasicBlock *); + void propagateJoin(BasicBlock *); + + struct TexUse + { + TexUse(Instruction *use, const Instruction *tex) + : insn(use), tex(tex), level(-1) { } + Instruction *insn; + const Instruction *tex; // or split / mov + int level; + }; + struct Limits + { + Limits() { } + Limits(int min, int max) : min(min), max(max) { } + int min, max; + }; + bool insertTextureBarriers(Function *); + inline bool insnDominatedBy(const Instruction *, const Instruction *) const; + void findFirstUses(const Instruction *tex, const Instruction *def, + std::list<TexUse>&); + void findOverwritingDefs(const Instruction *tex, Instruction *insn, + const BasicBlock *term, + std::list<TexUse>&); + void addTexUse(std::list<TexUse>&, Instruction *, const Instruction *); + const Instruction *recurseDef(const Instruction *); + +private: + LValue *rZero; + LValue *carry; + const bool needTexBar; +}; + +NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog) + : rZero(NULL), + carry(NULL), + needTexBar(prog->getTarget()->getChipset() >= 0xe0) +{ +} + +bool +NVC0LegalizePostRA::insnDominatedBy(const Instruction *later, + const Instruction *early) const +{ + if (early->bb == later->bb) + return early->serial < later->serial; + return later->bb->dominatedBy(early->bb); +} + +void +NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses, + Instruction *usei, const Instruction *insn) +{ + bool add = true; + for (std::list<TexUse>::iterator it = uses.begin(); + it != uses.end();) { + if (insnDominatedBy(usei, it->insn)) { + add = false; + break; + } + if (insnDominatedBy(it->insn, usei)) + it = uses.erase(it); + else + ++it; + } + if (add) + uses.push_back(TexUse(usei, insn)); +} + +void +NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi, + Instruction *insn, + const BasicBlock *term, + std::list<TexUse> &uses) +{ + while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0))) + insn = insn->getSrc(0)->getUniqueInsn(); + + if (!insn || !insn->bb->reachableBy(texi->bb, term)) + return; + + switch (insn->op) { + /* Values not connected to the tex's definition through any of these should + * not be conflicting. + */ + case OP_SPLIT: + case OP_MERGE: + case OP_PHI: + case OP_UNION: + /* recurse again */ + for (int s = 0; insn->srcExists(s); ++s) + findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term, + uses); + break; + default: + // if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ? + addTexUse(uses, insn, texi); + break; + } +} + +void +NVC0LegalizePostRA::findFirstUses(const Instruction *texi, + const Instruction *insn, + std::list<TexUse> &uses) +{ + for (int d = 0; insn->defExists(d); ++d) { + Value *v = insn->getDef(d); + for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) { + Instruction *usei = (*u)->getInsn(); + + if (usei->op == OP_PHI || usei->op == OP_UNION) { + // need a barrier before WAW cases + for (int s = 0; usei->srcExists(s); ++s) { + Instruction *defi = usei->getSrc(s)->getUniqueInsn(); + if (defi && &usei->src(s) != *u) + findOverwritingDefs(texi, defi, usei->bb, uses); + } + } + + if (usei->op == OP_SPLIT || + usei->op == OP_MERGE || + usei->op == OP_PHI || + usei->op == OP_UNION) { + // these uses don't manifest in the machine code + findFirstUses(texi, usei, uses); + } else + if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) && + usei->subOp != NV50_IR_SUBOP_MOV_FINAL) { + findFirstUses(texi, usei, uses); + } else { + addTexUse(uses, usei, insn); + } + } + } +} + +// Texture barriers: +// This pass is a bit long and ugly and can probably be optimized. +// +// 1. obtain a list of TEXes and their outputs' first use(s) +// 2. calculate the barrier level of each first use (minimal number of TEXes, +// over all paths, between the TEX and the use in question) +// 3. for each barrier, if all paths from the source TEX to that barrier +// contain a barrier of lesser level, it can be culled +bool +NVC0LegalizePostRA::insertTextureBarriers(Function *fn) +{ + std::list<TexUse> *uses; + std::vector<Instruction *> texes; + std::vector<int> bbFirstTex; + std::vector<int> bbFirstUse; + std::vector<int> texCounts; + std::vector<TexUse> useVec; + ArrayList insns; + + fn->orderInstructions(insns); + + texCounts.resize(fn->allBBlocks.getSize(), 0); + bbFirstTex.resize(fn->allBBlocks.getSize(), insns.getSize()); + bbFirstUse.resize(fn->allBBlocks.getSize(), insns.getSize()); + + // tag BB CFG nodes by their id for later + for (ArrayList::Iterator i = fn->allBBlocks.iterator(); !i.end(); i.next()) { + BasicBlock *bb = reinterpret_cast<BasicBlock *>(i.get()); + if (bb) + bb->cfg.tag = bb->getId(); + } + + // gather the first uses for each TEX + for (int i = 0; i < insns.getSize(); ++i) { + Instruction *tex = reinterpret_cast<Instruction *>(insns.get(i)); + if (isTextureOp(tex->op)) { + texes.push_back(tex); + if (!texCounts.at(tex->bb->getId())) + bbFirstTex[tex->bb->getId()] = texes.size() - 1; + texCounts[tex->bb->getId()]++; + } + } + insns.clear(); + if (texes.empty()) + return false; + uses = new std::list<TexUse>[texes.size()]; + if (!uses) + return false; + for (size_t i = 0; i < texes.size(); ++i) + findFirstUses(texes[i], texes[i], uses[i]); + + // determine the barrier level at each use + for (size_t i = 0; i < texes.size(); ++i) { + for (std::list<TexUse>::iterator u = uses[i].begin(); u != uses[i].end(); + ++u) { + BasicBlock *tb = texes[i]->bb; + BasicBlock *ub = u->insn->bb; + if (tb == ub) { + u->level = 0; + for (size_t j = i + 1; j < texes.size() && + texes[j]->bb == tb && texes[j]->serial < u->insn->serial; + ++j) + u->level++; + } else { + u->level = fn->cfg.findLightestPathWeight(&tb->cfg, + &ub->cfg, texCounts); + if (u->level < 0) { + WARN("Failed to find path TEX -> TEXBAR\n"); + u->level = 0; + continue; + } + // this counted all TEXes in the origin block, correct that + u->level -= i - bbFirstTex.at(tb->getId()) + 1 /* this TEX */; + // and did not count the TEXes in the destination block, add those + for (size_t j = bbFirstTex.at(ub->getId()); j < texes.size() && + texes[j]->bb == ub && texes[j]->serial < u->insn->serial; + ++j) + u->level++; + } + assert(u->level >= 0); + useVec.push_back(*u); + } + } + delete[] uses; + uses = NULL; + + // insert the barriers + for (size_t i = 0; i < useVec.size(); ++i) { + Instruction *prev = useVec[i].insn->prev; + if (useVec[i].level < 0) + continue; + if (prev && prev->op == OP_TEXBAR) { + if (prev->subOp > useVec[i].level) + prev->subOp = useVec[i].level; + prev->setSrc(prev->srcCount(), useVec[i].tex->getDef(0)); + } else { + Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE); + bar->fixed = 1; + bar->subOp = useVec[i].level; + // make use explicit to ease latency calculation + bar->setSrc(bar->srcCount(), useVec[i].tex->getDef(0)); + useVec[i].insn->bb->insertBefore(useVec[i].insn, bar); + } + } + + if (fn->getProgram()->optLevel < 3) { + if (uses) + delete[] uses; + return true; + } + + std::vector<Limits> limitT, limitB, limitS; // entry, exit, single + + limitT.resize(fn->allBBlocks.getSize(), Limits(0, 0)); + limitB.resize(fn->allBBlocks.getSize(), Limits(0, 0)); + limitS.resize(fn->allBBlocks.getSize()); + + // cull unneeded barriers (should do that earlier, but for simplicity) + IteratorRef bi = fn->cfg.iteratorCFG(); + // first calculate min/max outstanding TEXes for each BB + for (bi->reset(); !bi->end(); bi->next()) { + Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); + BasicBlock *bb = BasicBlock::get(n); + int min = 0; + int max = std::numeric_limits<int>::max(); + for (Instruction *i = bb->getFirst(); i; i = i->next) { + if (isTextureOp(i->op)) { + min++; + if (max < std::numeric_limits<int>::max()) + max++; + } else + if (i->op == OP_TEXBAR) { + min = MIN2(min, i->subOp); + max = MIN2(max, i->subOp); + } + } + // limits when looking at an isolated block + limitS[bb->getId()].min = min; + limitS[bb->getId()].max = max; + } + // propagate the min/max values + for (unsigned int l = 0; l <= fn->loopNestingBound; ++l) { + for (bi->reset(); !bi->end(); bi->next()) { + Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); + BasicBlock *bb = BasicBlock::get(n); + const int bbId = bb->getId(); + for (Graph::EdgeIterator ei = n->incident(); !ei.end(); ei.next()) { + BasicBlock *in = BasicBlock::get(ei.getNode()); + const int inId = in->getId(); + limitT[bbId].min = MAX2(limitT[bbId].min, limitB[inId].min); + limitT[bbId].max = MAX2(limitT[bbId].max, limitB[inId].max); + } + // I just hope this is correct ... + if (limitS[bbId].max == std::numeric_limits<int>::max()) { + // no barrier + limitB[bbId].min = limitT[bbId].min + limitS[bbId].min; + limitB[bbId].max = limitT[bbId].max + limitS[bbId].min; + } else { + // block contained a barrier + limitB[bbId].min = MIN2(limitS[bbId].max, + limitT[bbId].min + limitS[bbId].min); + limitB[bbId].max = MIN2(limitS[bbId].max, + limitT[bbId].max + limitS[bbId].min); + } + } + } + // finally delete unnecessary barriers + for (bi->reset(); !bi->end(); bi->next()) { + Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); + BasicBlock *bb = BasicBlock::get(n); + Instruction *prev = NULL; + Instruction *next; + int max = limitT[bb->getId()].max; + for (Instruction *i = bb->getFirst(); i; i = next) { + next = i->next; + if (i->op == OP_TEXBAR) { + if (i->subOp >= max) { + delete_Instruction(prog, i); + i = NULL; + } else { + max = i->subOp; + if (prev && prev->op == OP_TEXBAR && prev->subOp >= max) { + delete_Instruction(prog, prev); + prev = NULL; + } + } + } else + if (isTextureOp(i->op)) { + max++; + } + if (i && !i->isNop()) + prev = i; + } + } + if (uses) + delete[] uses; + return true; +} + +bool +NVC0LegalizePostRA::visit(Function *fn) +{ + if (needTexBar) + insertTextureBarriers(fn); + + rZero = new_LValue(fn, FILE_GPR); + carry = new_LValue(fn, FILE_FLAGS); + + rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR); + carry->reg.data.id = 0; + + return true; +} + +void +NVC0LegalizePostRA::replaceZero(Instruction *i) +{ + for (int s = 0; i->srcExists(s); ++s) { + if (s == 2 && i->op == OP_SUCLAMP) + continue; + ImmediateValue *imm = i->getSrc(s)->asImm(); + if (imm && imm->reg.data.u64 == 0) + i->setSrc(s, rZero); + } +} + +// replace CONT with BRA for single unconditional continue +bool +NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb) +{ + if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT) + return false; + Graph::EdgeIterator ei = bb->cfg.incident(); + if (ei.getType() != Graph::Edge::BACK) + ei.next(); + if (ei.getType() != Graph::Edge::BACK) + return false; + BasicBlock *contBB = BasicBlock::get(ei.getNode()); + + if (!contBB->getExit() || contBB->getExit()->op != OP_CONT || + contBB->getExit()->getPredicate()) + return false; + contBB->getExit()->op = OP_BRA; + bb->remove(bb->getEntry()); // delete PRECONT + + ei.next(); + assert(ei.end() || ei.getType() != Graph::Edge::BACK); + return true; +} + +// replace branches to join blocks with join ops +void +NVC0LegalizePostRA::propagateJoin(BasicBlock *bb) +{ + if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit) + return; + for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { + BasicBlock *in = BasicBlock::get(ei.getNode()); + Instruction *exit = in->getExit(); + if (!exit) { + in->insertTail(new FlowInstruction(func, OP_JOIN, bb)); + // there should always be a terminator instruction + WARN("inserted missing terminator in BB:%i\n", in->getId()); + } else + if (exit->op == OP_BRA) { + exit->op = OP_JOIN; + exit->asFlow()->limit = 1; // must-not-propagate marker + } + } + bb->remove(bb->getEntry()); +} + +bool +NVC0LegalizePostRA::visit(BasicBlock *bb) +{ + Instruction *i, *next; + + // remove pseudo operations and non-fixed no-ops, split 64 bit operations + for (i = bb->getFirst(); i; i = next) { + next = i->next; + if (i->op == OP_EMIT || i->op == OP_RESTART) { + if (!i->getDef(0)->refCount()) + i->setDef(0, NULL); + if (i->src(0).getFile() == FILE_IMMEDIATE) + i->setSrc(0, rZero); // initial value must be 0 + } else + if (i->isNop()) { + bb->remove(i); + } else { + // TODO: Move this to before register allocation for operations that + // need the $c register ! + if (typeSizeof(i->dType) == 8) { + Instruction *hi; + hi = BuildUtil::split64BitOpPostRA(func, i, rZero, carry); + if (hi) + next = hi; + } + + if (i->op != OP_MOV && i->op != OP_PFETCH) + replaceZero(i); + } + } + if (!bb->getEntry()) + return true; + + if (!tryReplaceContWithBra(bb)) + propagateJoin(bb); + + return true; +} + +class NVC0LoweringPass : public Pass +{ +public: + NVC0LoweringPass(Program *); + +private: + virtual bool visit(Function *); + virtual bool visit(BasicBlock *); + virtual bool visit(Instruction *); + + bool handleRDSV(Instruction *); + bool handleWRSV(Instruction *); + bool handleEXPORT(Instruction *); + bool handleOUT(Instruction *); + bool handleDIV(Instruction *); + bool handleMOD(Instruction *); + bool handleSQRT(Instruction *); + bool handlePOW(Instruction *); + bool handleTEX(TexInstruction *); + bool handleTXD(TexInstruction *); + bool handleTXQ(TexInstruction *); + bool handleManualTXD(TexInstruction *); + bool handleATOM(Instruction *); + bool handleCasExch(Instruction *, bool needCctl); + void handleSurfaceOpNVE4(TexInstruction *); + + void checkPredicate(Instruction *); + + void readTessCoord(LValue *dst, int c); + + Value *loadResInfo32(Value *ptr, uint32_t off); + Value *loadMsInfo32(Value *ptr, uint32_t off); + Value *loadTexHandle(Value *ptr, unsigned int slot); + + void adjustCoordinatesMS(TexInstruction *); + void processSurfaceCoordsNVE4(TexInstruction *); + +private: + const Target *const targ; + + BuildUtil bld; + + Symbol *gMemBase; + LValue *gpEmitAddress; +}; + +NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget()) +{ + bld.setProgram(prog); + gMemBase = NULL; +} + +bool +NVC0LoweringPass::visit(Function *fn) +{ + if (prog->getType() == Program::TYPE_GEOMETRY) { + assert(!strncmp(fn->getName(), "MAIN", 4)); + // TODO: when we generate actual functions pass this value along somehow + bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false); + gpEmitAddress = bld.loadImm(NULL, 0)->asLValue(); + if (fn->cfgExit) { + bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false); + bld.mkMovToReg(0, gpEmitAddress); + } + } + return true; +} + +bool +NVC0LoweringPass::visit(BasicBlock *bb) +{ + return true; +} + +inline Value * +NVC0LoweringPass::loadTexHandle(Value *ptr, unsigned int slot) +{ + uint8_t b = prog->driver->io.resInfoCBSlot; + uint32_t off = prog->driver->io.texBindBase + slot * 4; + return bld. + mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); +} + +// move array source to first slot, convert to u16, add indirections +bool +NVC0LoweringPass::handleTEX(TexInstruction *i) +{ + const int dim = i->tex.target.getDim() + i->tex.target.isCube(); + const int arg = i->tex.target.getArgCount(); + const int lyr = arg - (i->tex.target.isMS() ? 2 : 1); + + if (prog->getTarget()->getChipset() >= NVISA_GK104_CHIPSET) { + if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { + WARN("indirect TEX not implemented\n"); + } + if (i->tex.r == i->tex.s) { + i->tex.r += prog->driver->io.texBindBase / 4; + i->tex.s = 0; // only a single cX[] value possible here + } else { + Value *hnd = bld.getScratch(); + Value *rHnd = loadTexHandle(NULL, i->tex.r); + Value *sHnd = loadTexHandle(NULL, i->tex.s); + + bld.mkOp3(OP_INSBF, TYPE_U32, hnd, rHnd, bld.mkImm(0x1400), sHnd); + + i->tex.r = 0; // not used for indirect tex + i->tex.s = 0; + i->setIndirectR(hnd); + } + if (i->tex.target.isArray()) { + LValue *layer = new_LValue(func, FILE_GPR); + Value *src = i->getSrc(lyr); + const int sat = (i->op == OP_TXF) ? 1 : 0; + DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; + bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; + for (int s = dim; s >= 1; --s) + i->setSrc(s, i->getSrc(s - 1)); + i->setSrc(0, layer); + } + } else + // (nvc0) generate and move the tsc/tic/array source to the front + if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { + LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa + + Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL; + for (int s = dim; s >= 1; --s) + i->setSrc(s, i->getSrc(s - 1)); + i->setSrc(0, arrayIndex); + + Value *ticRel = i->getIndirectR(); + Value *tscRel = i->getIndirectS(); + + if (arrayIndex) { + int sat = (i->op == OP_TXF) ? 1 : 0; + DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; + bld.mkCvt(OP_CVT, TYPE_U16, src, sTy, arrayIndex)->saturate = sat; + } else { + bld.loadImm(src, 0); + } + + if (ticRel) { + i->setSrc(i->tex.rIndirectSrc, NULL); + bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src); + } + if (tscRel) { + i->setSrc(i->tex.sIndirectSrc, NULL); + bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src); + } + + i->setSrc(0, src); + } + + // offset is last source (lod 1st, dc 2nd) + if (i->tex.useOffsets) { + uint32_t value = 0; + int n, c; + int s = i->srcCount(0xff, true); + if (i->srcExists(s)) // move potential predicate out of the way + i->moveSources(s, 1); + for (n = 0; n < i->tex.useOffsets; ++n) + for (c = 0; c < 3; ++c) + value |= (i->tex.offset[n][c] & 0xf) << (n * 12 + c * 4); + i->setSrc(s, bld.loadImm(NULL, value)); + } + + if (prog->getTarget()->getChipset() >= NVISA_GK104_CHIPSET) { + // + // If TEX requires more than 4 sources, the 2nd register tuple must be + // aligned to 4, even if it consists of just a single 4-byte register. + // + // XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case. + // + int s = i->srcCount(0xff, true); + if (s > 4 && s < 7) { + if (i->srcExists(s)) // move potential predicate out of the way + i->moveSources(s, 7 - s); + while (s < 7) + i->setSrc(s++, bld.loadImm(NULL, 0)); + } + } + + return true; +} + +bool +NVC0LoweringPass::handleManualTXD(TexInstruction *i) +{ + static const uint8_t qOps[4][2] = + { + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 + { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 + { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 + }; + Value *def[4][4]; + Value *crd[3]; + Instruction *tex; + Value *zero = bld.loadImm(bld.getSSA(), 0); + int l, c; + const int dim = i->tex.target.getDim(); + + i->op = OP_TEX; // no need to clone dPdx/dPdy later + + for (c = 0; c < dim; ++c) + crd[c] = bld.getScratch(); + + bld.mkOp(OP_QUADON, TYPE_NONE, NULL); + for (l = 0; l < 4; ++l) { + // mov coordinates from lane l to all lanes + for (c = 0; c < dim; ++c) + bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero); + // add dPdx from lane l to lanes dx + for (c = 0; c < dim; ++c) + bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]); + // add dPdy from lane l to lanes dy + for (c = 0; c < dim; ++c) + bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); + // texture + bld.insert(tex = cloneForward(func, i)); + for (c = 0; c < dim; ++c) + tex->setSrc(c, crd[c]); + // save results + for (c = 0; i->defExists(c); ++c) { + Instruction *mov; + def[c][l] = bld.getSSA(); + mov = bld.mkMov(def[c][l], tex->getDef(c)); + mov->fixed = 1; + mov->lanes = 1 << l; + } + } + bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); + + for (c = 0; i->defExists(c); ++c) { + Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); + for (l = 0; l < 4; ++l) + u->setSrc(l, def[c][l]); + } + + i->bb->remove(i); + return true; +} + +bool +NVC0LoweringPass::handleTXD(TexInstruction *txd) +{ + int dim = txd->tex.target.getDim(); + int arg = txd->tex.target.getArgCount(); + + handleTEX(txd); + while (txd->srcExists(arg)) + ++arg; + + txd->tex.derivAll = true; + if (dim > 2 || + txd->tex.target.isCube() || + arg > 4 || + txd->tex.target.isShadow()) + return handleManualTXD(txd); + + for (int c = 0; c < dim; ++c) { + txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]); + txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]); + txd->dPdx[c].set(NULL); + txd->dPdy[c].set(NULL); + } + return true; +} + +bool +NVC0LoweringPass::handleTXQ(TexInstruction *txq) +{ + // TODO: indirect resource/sampler index + return true; +} + +bool +NVC0LoweringPass::handleATOM(Instruction *atom) +{ + SVSemantic sv; + + switch (atom->src(0).getFile()) { + case FILE_MEMORY_LOCAL: + sv = SV_LBASE; + break; + case FILE_MEMORY_SHARED: + sv = SV_SBASE; + break; + default: + assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); + return true; + } + Value *base = + bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0)); + Value *ptr = atom->getIndirect(0, 0); + + atom->setSrc(0, cloneShallow(func, atom->getSrc(0))); + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; + if (ptr) + base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr); + atom->setIndirect(0, 0, base); + + return true; +} + +bool +NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) +{ + if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS && + cas->subOp != NV50_IR_SUBOP_ATOM_EXCH) + return false; + bld.setPosition(cas, true); + + if (needCctl) { + Instruction *cctl = bld.mkOp1(OP_CCTL, TYPE_NONE, NULL, cas->getSrc(0)); + cctl->setIndirect(0, 0, cas->getIndirect(0, 0)); + cctl->fixed = 1; + cctl->subOp = NV50_IR_SUBOP_CCTL_IV; + if (cas->isPredicated()) + cctl->setPredicate(cas->cc, cas->getPredicate()); + } + + if (cas->defExists(0) && cas->subOp == NV50_IR_SUBOP_ATOM_CAS) { + // CAS is crazy. It's 2nd source is a double reg, and the 3rd source + // should be set to the high part of the double reg or bad things will + // happen elsewhere in the universe. + // Also, it sometimes returns the new value instead of the old one + // under mysterious circumstances. + Value *dreg = bld.getSSA(8); + bld.setPosition(cas, false); + bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2)); + cas->setSrc(1, dreg); + } + + return true; +} + +inline Value * +NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off) +{ + uint8_t b = prog->driver->io.resInfoCBSlot; + off += prog->driver->io.suInfoBase; + return bld. + mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); +} + +inline Value * +NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off) +{ + uint8_t b = prog->driver->io.msInfoCBSlot; + off += prog->driver->io.msInfoBase; + return bld. + mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); +} + +/* On nvc0, surface info is obtained via the surface binding points passed + * to the SULD/SUST instructions. + * On nve4, surface info is stored in c[] and is used by various special + * instructions, e.g. for clamping coordiantes or generating an address. + * They couldn't just have added an equivalent to TIC now, couldn't they ? + */ +#define NVE4_SU_INFO_ADDR 0x00 +#define NVE4_SU_INFO_FMT 0x04 +#define NVE4_SU_INFO_DIM_X 0x08 +#define NVE4_SU_INFO_PITCH 0x0c +#define NVE4_SU_INFO_DIM_Y 0x10 +#define NVE4_SU_INFO_ARRAY 0x14 +#define NVE4_SU_INFO_DIM_Z 0x18 +#define NVE4_SU_INFO_UNK1C 0x1c +#define NVE4_SU_INFO_WIDTH 0x20 +#define NVE4_SU_INFO_HEIGHT 0x24 +#define NVE4_SU_INFO_DEPTH 0x28 +#define NVE4_SU_INFO_TARGET 0x2c +#define NVE4_SU_INFO_CALL 0x30 +#define NVE4_SU_INFO_RAW_X 0x34 +#define NVE4_SU_INFO_MS_X 0x38 +#define NVE4_SU_INFO_MS_Y 0x3c + +#define NVE4_SU_INFO__STRIDE 0x40 + +#define NVE4_SU_INFO_DIM(i) (0x08 + (i) * 8) +#define NVE4_SU_INFO_SIZE(i) (0x20 + (i) * 4) +#define NVE4_SU_INFO_MS(i) (0x38 + (i) * 4) + +static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) +{ + switch (su->tex.target.getEnum()) { + case TEX_TARGET_BUFFER: return NV50_IR_SUBOP_SUCLAMP_PL(0, 1); + case TEX_TARGET_RECT: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); + case TEX_TARGET_1D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); + case TEX_TARGET_1D_ARRAY: return (c == 1) ? + NV50_IR_SUBOP_SUCLAMP_PL(0, 2) : + NV50_IR_SUBOP_SUCLAMP_SD(0, 2); + case TEX_TARGET_2D: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2); + case TEX_TARGET_2D_MS: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2); + case TEX_TARGET_2D_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); + case TEX_TARGET_2D_MS_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); + case TEX_TARGET_3D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); + case TEX_TARGET_CUBE: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); + case TEX_TARGET_CUBE_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); + default: + assert(0); + return 0; + } +} + +void +NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex) +{ + const uint16_t base = tex->tex.r * NVE4_SU_INFO__STRIDE; + const int arg = tex->tex.target.getArgCount(); + + if (tex->tex.target == TEX_TARGET_2D_MS) + tex->tex.target = TEX_TARGET_2D; + else + if (tex->tex.target == TEX_TARGET_2D_MS_ARRAY) + tex->tex.target = TEX_TARGET_2D_ARRAY; + else + return; + + Value *x = tex->getSrc(0); + Value *y = tex->getSrc(1); + Value *s = tex->getSrc(arg - 1); + + Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); + + Value *ms_x = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(0)); + Value *ms_y = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(1)); + + bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); + bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); + + s = bld.mkOp2v(OP_AND, TYPE_U32, ts, s, bld.loadImm(NULL, 0x7)); + s = bld.mkOp2v(OP_SHL, TYPE_U32, ts, ts, bld.mkImm(3)); + + Value *dx = loadMsInfo32(ts, 0x0); + Value *dy = loadMsInfo32(ts, 0x4); + + bld.mkOp2(OP_ADD, TYPE_U32, tx, tx, dx); + bld.mkOp2(OP_ADD, TYPE_U32, ty, ty, dy); + + tex->setSrc(0, tx); + tex->setSrc(1, ty); + tex->moveSources(arg, -1); +} + +// Sets 64-bit "generic address", predicate and format sources for SULD/SUST. +// They're computed from the coordinates using the surface info in c[] space. +void +NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) +{ + Instruction *insn; + const bool atom = su->op == OP_SUREDB || su->op == OP_SUREDP; + const bool raw = + su->op == OP_SULDB || su->op == OP_SUSTB || su->op == OP_SUREDB; + const int idx = su->tex.r; + const int dim = su->tex.target.getDim(); + const int arg = dim + (su->tex.target.isArray() ? 1 : 0); + const uint16_t base = idx * NVE4_SU_INFO__STRIDE; + int c; + Value *zero = bld.mkImm(0); + Value *p1 = NULL; + Value *v; + Value *src[3]; + Value *bf, *eau, *off; + Value *addr, *pred; + + off = bld.getScratch(4); + bf = bld.getScratch(4); + addr = bld.getSSA(8); + pred = bld.getScratch(1, FILE_PREDICATE); + + bld.setPosition(su, false); + + adjustCoordinatesMS(su); + + // calculate clamped coordinates + for (c = 0; c < arg; ++c) { + src[c] = bld.getScratch(); + if (c == 0 && raw) + v = loadResInfo32(NULL, base + NVE4_SU_INFO_RAW_X); + else + v = loadResInfo32(NULL, base + NVE4_SU_INFO_DIM(c)); + bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero) + ->subOp = getSuClampSubOp(su, c); + } + for (; c < 3; ++c) + src[c] = zero; + + // set predicate output + if (su->tex.target == TEX_TARGET_BUFFER) { + src[0]->getInsn()->setFlagsDef(1, pred); + } else + if (su->tex.target.isArray()) { + p1 = bld.getSSA(1, FILE_PREDICATE); + src[dim]->getInsn()->setFlagsDef(1, p1); + } + + // calculate pixel offset + if (dim == 1) { + if (su->tex.target != TEX_TARGET_BUFFER) + bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff)); + } else + if (dim == 3) { + v = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C); + bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1]) + ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l + + v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH); + bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0]) + ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l + } else { + assert(dim == 2); + v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH); + bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0]) + ->subOp = su->tex.target.isArray() ? + NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l + } + + // calculate effective address part 1 + if (su->tex.target == TEX_TARGET_BUFFER) { + if (raw) { + bf = src[0]; + } else { + v = loadResInfo32(NULL, base + NVE4_SU_INFO_FMT); + bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero) + ->subOp = NV50_IR_SUBOP_V1(7,6,8|2); + } + } else { + Value *y = src[1]; + Value *z = src[2]; + uint16_t subOp = 0; + + switch (dim) { + case 1: + y = zero; + z = zero; + break; + case 2: + z = off; + if (!su->tex.target.isArray()) { + z = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C); + subOp = NV50_IR_SUBOP_SUBFM_3D; + } + break; + default: + subOp = NV50_IR_SUBOP_SUBFM_3D; + assert(dim == 3); + break; + } + insn = bld.mkOp3(OP_SUBFM, TYPE_U32, bf, src[0], y, z); + insn->subOp = subOp; + insn->setFlagsDef(1, pred); + } + + // part 2 + v = loadResInfo32(NULL, base + NVE4_SU_INFO_ADDR); + + if (su->tex.target == TEX_TARGET_BUFFER) { + eau = v; + } else { + eau = bld.mkOp3v(OP_SUEAU, TYPE_U32, bld.getScratch(4), off, bf, v); + } + // add array layer offset + if (su->tex.target.isArray()) { + v = loadResInfo32(NULL, base + NVE4_SU_INFO_ARRAY); + if (dim == 1) + bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau) + ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32 + else + bld.mkOp3(OP_MADSP, TYPE_U32, eau, v, src[2], eau) + ->subOp = NV50_IR_SUBOP_MADSP(0,0,0); // u32 u24 u32 + // combine predicates + assert(p1); + bld.mkOp2(OP_OR, TYPE_U8, pred, pred, p1); + } + + if (atom) { + Value *lo = bf; + if (su->tex.target == TEX_TARGET_BUFFER) { + lo = zero; + bld.mkMov(off, bf); + } + // bf == g[] address & 0xff + // eau == g[] address >> 8 + bld.mkOp3(OP_PERMT, TYPE_U32, bf, lo, bld.loadImm(NULL, 0x6540), eau); + bld.mkOp3(OP_PERMT, TYPE_U32, eau, zero, bld.loadImm(NULL, 0x0007), eau); + } else + if (su->op == OP_SULDP && su->tex.target == TEX_TARGET_BUFFER) { + // Convert from u32 to u8 address format, which is what the library code + // doing SULDP currently uses. + // XXX: can SUEAU do this ? + // XXX: does it matter that we don't mask high bytes in bf ? + // Grrr. + bld.mkOp2(OP_SHR, TYPE_U32, off, bf, bld.mkImm(8)); + bld.mkOp2(OP_ADD, TYPE_U32, eau, eau, off); + } + + bld.mkOp2(OP_MERGE, TYPE_U64, addr, bf, eau); + + if (atom && su->tex.target == TEX_TARGET_BUFFER) + bld.mkOp2(OP_ADD, TYPE_U64, addr, addr, off); + + // let's just set it 0 for raw access and hope it works + v = raw ? + bld.mkImm(0) : loadResInfo32(NULL, base + NVE4_SU_INFO_FMT); + + // get rid of old coordinate sources, make space for fmt info and predicate + su->moveSources(arg, 3 - arg); + // set 64 bit address and 32-bit format sources + su->setSrc(0, addr); + su->setSrc(1, v); + su->setSrc(2, pred); +} + +void +NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) +{ + processSurfaceCoordsNVE4(su); + + // Who do we hate more ? The person who decided that nvc0's SULD doesn't + // have to support conversion or the person who decided that, in OpenCL, + // you don't have to specify the format here like you do in OpenGL ? + + if (su->op == OP_SULDP) { + // We don't patch shaders. Ever. + // You get an indirect call to our library blob here. + // But at least it's uniform. + FlowInstruction *call; + LValue *p[3]; + LValue *r[5]; + uint16_t base = su->tex.r * NVE4_SU_INFO__STRIDE + NVE4_SU_INFO_CALL; + + for (int i = 0; i < 4; ++i) + (r[i] = bld.getScratch(4, FILE_GPR))->reg.data.id = i; + for (int i = 0; i < 3; ++i) + (p[i] = bld.getScratch(1, FILE_PREDICATE))->reg.data.id = i; + (r[4] = bld.getScratch(8, FILE_GPR))->reg.data.id = 4; + + bld.mkMov(p[1], bld.mkImm((su->cache == CACHE_CA) ? 1 : 0), TYPE_U8); + bld.mkMov(p[2], bld.mkImm((su->cache == CACHE_CG) ? 1 : 0), TYPE_U8); + bld.mkMov(p[0], su->getSrc(2), TYPE_U8); + bld.mkMov(r[4], su->getSrc(0), TYPE_U64); + bld.mkMov(r[2], su->getSrc(1), TYPE_U32); + + call = bld.mkFlow(OP_CALL, NULL, su->cc, su->getPredicate()); + + call->indirect = 1; + call->absolute = 1; + call->setSrc(0, bld.mkSymbol(FILE_MEMORY_CONST, + prog->driver->io.resInfoCBSlot, TYPE_U32, + prog->driver->io.suInfoBase + base)); + call->setSrc(1, r[2]); + call->setSrc(2, r[4]); + for (int i = 0; i < 3; ++i) + call->setSrc(3 + i, p[i]); + for (int i = 0; i < 4; ++i) { + call->setDef(i, r[i]); + bld.mkMov(su->getDef(i), r[i]); + } + call->setDef(4, p[1]); + delete_Instruction(bld.getProgram(), su); + } + + if (su->op == OP_SUREDB || su->op == OP_SUREDP) { + // FIXME: for out of bounds access, destination value will be undefined ! + Value *pred = su->getSrc(2); + CondCode cc = CC_NOT_P; + if (su->getPredicate()) { + pred = bld.getScratch(1, FILE_PREDICATE); + cc = su->cc; + if (cc == CC_NOT_P) { + bld.mkOp2(OP_OR, TYPE_U8, pred, su->getPredicate(), su->getSrc(2)); + } else { + bld.mkOp2(OP_AND, TYPE_U8, pred, su->getPredicate(), su->getSrc(2)); + pred->getInsn()->src(1).mod = Modifier(NV50_IR_MOD_NOT); + } + } + Instruction *red = bld.mkOp(OP_ATOM, su->dType, su->getDef(0)); + red->subOp = su->subOp; + if (!gMemBase) + gMemBase = bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, 0); + red->setSrc(0, gMemBase); + red->setSrc(1, su->getSrc(3)); + if (su->subOp == NV50_IR_SUBOP_ATOM_CAS) + red->setSrc(2, su->getSrc(4)); + red->setIndirect(0, 0, su->getSrc(0)); + red->setPredicate(cc, pred); + delete_Instruction(bld.getProgram(), su); + handleCasExch(red, true); + } else { + su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8; + } +} + +bool +NVC0LoweringPass::handleWRSV(Instruction *i) +{ + Instruction *st; + Symbol *sym; + uint32_t addr; + + // must replace, $sreg are not writeable + addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym()); + if (addr >= 0x400) + return false; + sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr); + + st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0), + i->getSrc(1)); + st->perPatch = i->perPatch; + + bld.getBB()->remove(i); + return true; +} + +void +NVC0LoweringPass::readTessCoord(LValue *dst, int c) +{ + Value *laneid = bld.getSSA(); + Value *x, *y; + + bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0)); + + if (c == 0) { + x = dst; + y = NULL; + } else + if (c == 1) { + x = NULL; + y = dst; + } else { + assert(c == 2); + x = bld.getSSA(); + y = bld.getSSA(); + } + if (x) + bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid); + if (y) + bld.mkFetch(y, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid); + + if (c == 2) { + bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y); + bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst); + } +} + +bool +NVC0LoweringPass::handleRDSV(Instruction *i) +{ + Symbol *sym = i->getSrc(0)->asSym(); + const SVSemantic sv = sym->reg.data.sv.sv; + Value *vtx = NULL; + Instruction *ld; + uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym); + + if (addr >= 0x400) { + // mov $sreg + if (sym->reg.data.sv.index == 3) { + // TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID + i->op = OP_MOV; + i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0)); + } + return true; + } + + switch (sv) { + case SV_POSITION: + assert(prog->getType() == Program::TYPE_FRAGMENT); + bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL); + break; + case SV_FACE: + { + Value *face = i->getDef(0); + bld.mkInterp(NV50_IR_INTERP_FLAT, face, addr, NULL); + if (i->dType == TYPE_F32) { + bld.mkOp2(OP_AND, TYPE_U32, face, face, bld.mkImm(0x80000000)); + bld.mkOp2(OP_XOR, TYPE_U32, face, face, bld.mkImm(0xbf800000)); + } + } + break; + case SV_TESS_COORD: + assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL); + readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index); + break; + case SV_NTID: + case SV_NCTAID: + case SV_GRIDID: + assert(targ->getChipset() >= NVISA_GK104_CHIPSET); // mov $sreg otherwise + if (sym->reg.data.sv.index == 3) { + i->op = OP_MOV; + i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1)); + return true; + } + addr += prog->driver->prop.cp.gridInfoBase; + bld.mkLoad(TYPE_U32, i->getDef(0), + bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL); + break; + default: + if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) + vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0)); + ld = bld.mkFetch(i->getDef(0), i->dType, + FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx); + ld->perPatch = i->perPatch; + break; + } + bld.getBB()->remove(i); + return true; +} + +bool +NVC0LoweringPass::handleDIV(Instruction *i) +{ + if (!isFloatType(i->dType)) + return true; + bld.setPosition(i, false); + Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1)); + i->op = OP_MUL; + i->setSrc(1, rcp->getDef(0)); + return true; +} + +bool +NVC0LoweringPass::handleMOD(Instruction *i) +{ + if (i->dType != TYPE_F32) + return true; + LValue *value = bld.getScratch(); + bld.mkOp1(OP_RCP, TYPE_F32, value, i->getSrc(1)); + bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(0), value); + bld.mkOp1(OP_TRUNC, TYPE_F32, value, value); + bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(1), value); + i->op = OP_SUB; + i->setSrc(1, value); + return true; +} + +bool +NVC0LoweringPass::handleSQRT(Instruction *i) +{ + Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32, + bld.getSSA(), i->getSrc(0)); + i->op = OP_MUL; + i->setSrc(1, rsq->getDef(0)); + + return true; +} + +bool +NVC0LoweringPass::handlePOW(Instruction *i) +{ + LValue *val = bld.getScratch(); + + bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0)); + bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1; + bld.mkOp1(OP_PREEX2, TYPE_F32, val, val); + + i->op = OP_EX2; + i->setSrc(0, val); + i->setSrc(1, NULL); + + return true; +} + +bool +NVC0LoweringPass::handleEXPORT(Instruction *i) +{ + if (prog->getType() == Program::TYPE_FRAGMENT) { + int id = i->getSrc(0)->reg.data.offset / 4; + + if (i->src(0).isIndirect(0)) // TODO, ugly + return false; + i->op = OP_MOV; + i->subOp = NV50_IR_SUBOP_MOV_FINAL; + i->src(0).set(i->src(1)); + i->setSrc(1, NULL); + i->setDef(0, new_LValue(func, FILE_GPR)); + i->getDef(0)->reg.data.id = id; + + prog->maxGPR = MAX2(prog->maxGPR, id); + } else + if (prog->getType() == Program::TYPE_GEOMETRY) { + i->setIndirect(0, 1, gpEmitAddress); + } + return true; +} + +bool +NVC0LoweringPass::handleOUT(Instruction *i) +{ + if (i->op == OP_RESTART && i->prev && i->prev->op == OP_EMIT) { + i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART; + delete_Instruction(prog, i); + } else { + assert(gpEmitAddress); + i->setDef(0, gpEmitAddress); + if (i->srcExists(0)) + i->setSrc(1, i->getSrc(0)); + i->setSrc(0, gpEmitAddress); + } + return true; +} + +// Generate a binary predicate if an instruction is predicated by +// e.g. an f32 value. +void +NVC0LoweringPass::checkPredicate(Instruction *insn) +{ + Value *pred = insn->getPredicate(); + Value *pdst; + + if (!pred || pred->reg.file == FILE_PREDICATE) + return; + pdst = new_LValue(func, FILE_PREDICATE); + + // CAUTION: don't use pdst->getInsn, the definition might not be unique, + // delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass + + bld.mkCmp(OP_SET, CC_NEU, insn->dType, pdst, bld.mkImm(0), pred); + + insn->setPredicate(insn->cc, pdst); +} + +// +// - add quadop dance for texturing +// - put FP outputs in GPRs +// - convert instruction sequences +// +bool +NVC0LoweringPass::visit(Instruction *i) +{ + bld.setPosition(i, false); + + if (i->cc != CC_ALWAYS) + checkPredicate(i); + + switch (i->op) { + case OP_TEX: + case OP_TXB: + case OP_TXL: + case OP_TXF: + case OP_TXG: + return handleTEX(i->asTex()); + case OP_TXD: + return handleTXD(i->asTex()); + case OP_TXQ: + return handleTXQ(i->asTex()); + case OP_EX2: + bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0)); + i->setSrc(0, i->getDef(0)); + break; + case OP_POW: + return handlePOW(i); + case OP_DIV: + return handleDIV(i); + case OP_MOD: + return handleMOD(i); + case OP_SQRT: + return handleSQRT(i); + case OP_EXPORT: + return handleEXPORT(i); + case OP_EMIT: + case OP_RESTART: + return handleOUT(i); + case OP_RDSV: + return handleRDSV(i); + case OP_WRSV: + return handleWRSV(i); + case OP_LOAD: + if (i->src(0).getFile() == FILE_SHADER_INPUT) { + if (prog->getType() == Program::TYPE_COMPUTE) { + i->getSrc(0)->reg.file = FILE_MEMORY_CONST; + i->getSrc(0)->reg.fileIndex = 0; + } else { + i->op = OP_VFETCH; + assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP + } + } + break; + case OP_ATOM: + { + const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; + handleATOM(i); + handleCasExch(i, cctl); + } + break; + case OP_SULDB: + case OP_SULDP: + case OP_SUSTB: + case OP_SUSTP: + case OP_SUREDB: + case OP_SUREDP: + if (targ->getChipset() >= NVISA_GK104_CHIPSET) + handleSurfaceOpNVE4(i->asTex()); + break; + default: + break; + } + return true; +} + +bool +TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const +{ + if (stage == CG_STAGE_PRE_SSA) { + NVC0LoweringPass pass(prog); + return pass.run(prog, false, true); + } else + if (stage == CG_STAGE_POST_RA) { + NVC0LegalizePostRA pass(prog); + return pass.run(prog, false, true); + } else + if (stage == CG_STAGE_SSA) { + NVC0LegalizeSSA pass; + return pass.run(prog, false, true); + } + return false; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp new file mode 100644 index 00000000000..99bd2bf1a4d --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -0,0 +1,2464 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_target.h" +#include "codegen/nv50_ir_build_util.h" + +extern "C" { +#include "util/u_math.h" +} + +namespace nv50_ir { + +bool +Instruction::isNop() const +{ + if (op == OP_PHI || op == OP_SPLIT || op == OP_MERGE || op == OP_CONSTRAINT) + return true; + if (terminator || join) // XXX: should terminator imply flow ? + return false; + if (op == OP_ATOM) + return false; + if (!fixed && op == OP_NOP) + return true; + + if (defExists(0) && def(0).rep()->reg.data.id < 0) { + for (int d = 1; defExists(d); ++d) + if (def(d).rep()->reg.data.id >= 0) + WARN("part of vector result is unused !\n"); + return true; + } + + if (op == OP_MOV || op == OP_UNION) { + if (!getDef(0)->equals(getSrc(0))) + return false; + if (op == OP_UNION) + if (!def(0).rep()->equals(getSrc(1))) + return false; + return true; + } + + return false; +} + +bool Instruction::isDead() const +{ + if (op == OP_STORE || + op == OP_EXPORT || + op == OP_ATOM || + op == OP_SUSTB || op == OP_SUSTP || op == OP_SUREDP || op == OP_SUREDB || + op == OP_WRSV) + return false; + + for (int d = 0; defExists(d); ++d) + if (getDef(d)->refCount() || getDef(d)->reg.data.id >= 0) + return false; + + if (terminator || asFlow()) + return false; + if (fixed) + return false; + + return true; +}; + +// ============================================================================= + +class CopyPropagation : public Pass +{ +private: + virtual bool visit(BasicBlock *); +}; + +// Propagate all MOVs forward to make subsequent optimization easier, except if +// the sources stem from a phi, in which case we don't want to mess up potential +// swaps $rX <-> $rY, i.e. do not create live range overlaps of phi src and def. +bool +CopyPropagation::visit(BasicBlock *bb) +{ + Instruction *mov, *si, *next; + + for (mov = bb->getEntry(); mov; mov = next) { + next = mov->next; + if (mov->op != OP_MOV || mov->fixed || !mov->getSrc(0)->asLValue()) + continue; + if (mov->getPredicate()) + continue; + if (mov->def(0).getFile() != mov->src(0).getFile()) + continue; + si = mov->getSrc(0)->getInsn(); + if (mov->getDef(0)->reg.data.id < 0 && si && si->op != OP_PHI) { + // propagate + mov->def(0).replace(mov->getSrc(0), false); + delete_Instruction(prog, mov); + } + } + return true; +} + +// ============================================================================= + +class LoadPropagation : public Pass +{ +private: + virtual bool visit(BasicBlock *); + + void checkSwapSrc01(Instruction *); + + bool isCSpaceLoad(Instruction *); + bool isImmd32Load(Instruction *); + bool isAttribOrSharedLoad(Instruction *); +}; + +bool +LoadPropagation::isCSpaceLoad(Instruction *ld) +{ + return ld && ld->op == OP_LOAD && ld->src(0).getFile() == FILE_MEMORY_CONST; +} + +bool +LoadPropagation::isImmd32Load(Instruction *ld) +{ + if (!ld || (ld->op != OP_MOV) || (typeSizeof(ld->dType) != 4)) + return false; + return ld->src(0).getFile() == FILE_IMMEDIATE; +} + +bool +LoadPropagation::isAttribOrSharedLoad(Instruction *ld) +{ + return ld && + (ld->op == OP_VFETCH || + (ld->op == OP_LOAD && + (ld->src(0).getFile() == FILE_SHADER_INPUT || + ld->src(0).getFile() == FILE_MEMORY_SHARED))); +} + +void +LoadPropagation::checkSwapSrc01(Instruction *insn) +{ + if (!prog->getTarget()->getOpInfo(insn).commutative) + if (insn->op != OP_SET && insn->op != OP_SLCT) + return; + if (insn->src(1).getFile() != FILE_GPR) + return; + + Instruction *i0 = insn->getSrc(0)->getInsn(); + Instruction *i1 = insn->getSrc(1)->getInsn(); + + if (isCSpaceLoad(i0)) { + if (!isCSpaceLoad(i1)) + insn->swapSources(0, 1); + else + return; + } else + if (isImmd32Load(i0)) { + if (!isCSpaceLoad(i1) && !isImmd32Load(i1)) + insn->swapSources(0, 1); + else + return; + } else + if (isAttribOrSharedLoad(i1)) { + if (!isAttribOrSharedLoad(i0)) + insn->swapSources(0, 1); + else + return; + } else { + return; + } + + if (insn->op == OP_SET) + insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond); + else + if (insn->op == OP_SLCT) + insn->asCmp()->setCond = inverseCondCode(insn->asCmp()->setCond); +} + +bool +LoadPropagation::visit(BasicBlock *bb) +{ + const Target *targ = prog->getTarget(); + Instruction *next; + + for (Instruction *i = bb->getEntry(); i; i = next) { + next = i->next; + + if (i->op == OP_CALL) // calls have args as sources, they must be in regs + continue; + + if (i->srcExists(1)) + checkSwapSrc01(i); + + for (int s = 0; i->srcExists(s); ++s) { + Instruction *ld = i->getSrc(s)->getInsn(); + + if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV)) + continue; + if (!targ->insnCanLoad(i, s, ld)) + continue; + + // propagate ! + i->setSrc(s, ld->getSrc(0)); + if (ld->src(0).isIndirect(0)) + i->setIndirect(s, 0, ld->getIndirect(0, 0)); + + if (ld->getDef(0)->refCount() == 0) + delete_Instruction(prog, ld); + } + } + return true; +} + +// ============================================================================= + +// Evaluate constant expressions. +class ConstantFolding : public Pass +{ +public: + bool foldAll(Program *); + +private: + virtual bool visit(BasicBlock *); + + void expr(Instruction *, ImmediateValue&, ImmediateValue&); + void opnd(Instruction *, ImmediateValue&, int s); + + void unary(Instruction *, const ImmediateValue&); + + void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&); + + // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET + CmpInstruction *findOriginForTestWithZero(Value *); + + unsigned int foldCount; + + BuildUtil bld; +}; + +// TODO: remember generated immediates and only revisit these +bool +ConstantFolding::foldAll(Program *prog) +{ + unsigned int iterCount = 0; + do { + foldCount = 0; + if (!run(prog)) + return false; + } while (foldCount && ++iterCount < 2); + return true; +} + +bool +ConstantFolding::visit(BasicBlock *bb) +{ + Instruction *i, *next; + + for (i = bb->getEntry(); i; i = next) { + next = i->next; + if (i->op == OP_MOV || i->op == OP_CALL) + continue; + + ImmediateValue src0, src1; + + if (i->srcExists(1) && + i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1)) + expr(i, src0, src1); + else + if (i->srcExists(0) && i->src(0).getImmediate(src0)) + opnd(i, src0, 0); + else + if (i->srcExists(1) && i->src(1).getImmediate(src1)) + opnd(i, src1, 1); + } + return true; +} + +CmpInstruction * +ConstantFolding::findOriginForTestWithZero(Value *value) +{ + if (!value) + return NULL; + Instruction *insn = value->getInsn(); + + while (insn && insn->op != OP_SET) { + Instruction *next = NULL; + switch (insn->op) { + case OP_NEG: + case OP_ABS: + case OP_CVT: + next = insn->getSrc(0)->getInsn(); + if (insn->sType != next->dType) + return NULL; + break; + case OP_MOV: + next = insn->getSrc(0)->getInsn(); + break; + default: + return NULL; + } + insn = next; + } + return insn ? insn->asCmp() : NULL; +} + +void +Modifier::applyTo(ImmediateValue& imm) const +{ + if (!bits) // avoid failure if imm.reg.type is unhandled (e.g. b128) + return; + switch (imm.reg.type) { + case TYPE_F32: + if (bits & NV50_IR_MOD_ABS) + imm.reg.data.f32 = fabsf(imm.reg.data.f32); + if (bits & NV50_IR_MOD_NEG) + imm.reg.data.f32 = -imm.reg.data.f32; + if (bits & NV50_IR_MOD_SAT) { + if (imm.reg.data.f32 < 0.0f) + imm.reg.data.f32 = 0.0f; + else + if (imm.reg.data.f32 > 1.0f) + imm.reg.data.f32 = 1.0f; + } + assert(!(bits & NV50_IR_MOD_NOT)); + break; + + case TYPE_S8: // NOTE: will be extended + case TYPE_S16: + case TYPE_S32: + case TYPE_U8: // NOTE: treated as signed + case TYPE_U16: + case TYPE_U32: + if (bits & NV50_IR_MOD_ABS) + imm.reg.data.s32 = (imm.reg.data.s32 >= 0) ? + imm.reg.data.s32 : -imm.reg.data.s32; + if (bits & NV50_IR_MOD_NEG) + imm.reg.data.s32 = -imm.reg.data.s32; + if (bits & NV50_IR_MOD_NOT) + imm.reg.data.s32 = ~imm.reg.data.s32; + break; + + case TYPE_F64: + if (bits & NV50_IR_MOD_ABS) + imm.reg.data.f64 = fabs(imm.reg.data.f64); + if (bits & NV50_IR_MOD_NEG) + imm.reg.data.f64 = -imm.reg.data.f64; + if (bits & NV50_IR_MOD_SAT) { + if (imm.reg.data.f64 < 0.0) + imm.reg.data.f64 = 0.0; + else + if (imm.reg.data.f64 > 1.0) + imm.reg.data.f64 = 1.0; + } + assert(!(bits & NV50_IR_MOD_NOT)); + break; + + default: + assert(!"invalid/unhandled type"); + imm.reg.data.u64 = 0; + break; + } +} + +operation +Modifier::getOp() const +{ + switch (bits) { + case NV50_IR_MOD_ABS: return OP_ABS; + case NV50_IR_MOD_NEG: return OP_NEG; + case NV50_IR_MOD_SAT: return OP_SAT; + case NV50_IR_MOD_NOT: return OP_NOT; + case 0: + return OP_MOV; + default: + return OP_CVT; + } +} + +void +ConstantFolding::expr(Instruction *i, + ImmediateValue &imm0, ImmediateValue &imm1) +{ + struct Storage *const a = &imm0.reg, *const b = &imm1.reg; + struct Storage res; + + memset(&res.data, 0, sizeof(res.data)); + + switch (i->op) { + case OP_MAD: + case OP_FMA: + case OP_MUL: + if (i->dnz && i->dType == TYPE_F32) { + if (!isfinite(a->data.f32)) + a->data.f32 = 0.0f; + if (!isfinite(b->data.f32)) + b->data.f32 = 0.0f; + } + switch (i->dType) { + case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break; + case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break; + case TYPE_S32: + case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break; + default: + return; + } + break; + case OP_DIV: + if (b->data.u32 == 0) + break; + switch (i->dType) { + case TYPE_F32: res.data.f32 = a->data.f32 / b->data.f32; break; + case TYPE_F64: res.data.f64 = a->data.f64 / b->data.f64; break; + case TYPE_S32: res.data.s32 = a->data.s32 / b->data.s32; break; + case TYPE_U32: res.data.u32 = a->data.u32 / b->data.u32; break; + default: + return; + } + break; + case OP_ADD: + switch (i->dType) { + case TYPE_F32: res.data.f32 = a->data.f32 + b->data.f32; break; + case TYPE_F64: res.data.f64 = a->data.f64 + b->data.f64; break; + case TYPE_S32: + case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break; + default: + return; + } + break; + case OP_POW: + switch (i->dType) { + case TYPE_F32: res.data.f32 = pow(a->data.f32, b->data.f32); break; + case TYPE_F64: res.data.f64 = pow(a->data.f64, b->data.f64); break; + default: + return; + } + break; + case OP_MAX: + switch (i->dType) { + case TYPE_F32: res.data.f32 = MAX2(a->data.f32, b->data.f32); break; + case TYPE_F64: res.data.f64 = MAX2(a->data.f64, b->data.f64); break; + case TYPE_S32: res.data.s32 = MAX2(a->data.s32, b->data.s32); break; + case TYPE_U32: res.data.u32 = MAX2(a->data.u32, b->data.u32); break; + default: + return; + } + break; + case OP_MIN: + switch (i->dType) { + case TYPE_F32: res.data.f32 = MIN2(a->data.f32, b->data.f32); break; + case TYPE_F64: res.data.f64 = MIN2(a->data.f64, b->data.f64); break; + case TYPE_S32: res.data.s32 = MIN2(a->data.s32, b->data.s32); break; + case TYPE_U32: res.data.u32 = MIN2(a->data.u32, b->data.u32); break; + default: + return; + } + break; + case OP_AND: + res.data.u64 = a->data.u64 & b->data.u64; + break; + case OP_OR: + res.data.u64 = a->data.u64 | b->data.u64; + break; + case OP_XOR: + res.data.u64 = a->data.u64 ^ b->data.u64; + break; + case OP_SHL: + res.data.u32 = a->data.u32 << b->data.u32; + break; + case OP_SHR: + switch (i->dType) { + case TYPE_S32: res.data.s32 = a->data.s32 >> b->data.u32; break; + case TYPE_U32: res.data.u32 = a->data.u32 >> b->data.u32; break; + default: + return; + } + break; + case OP_SLCT: + if (a->data.u32 != b->data.u32) + return; + res.data.u32 = a->data.u32; + break; + default: + return; + } + ++foldCount; + + i->src(0).mod = Modifier(0); + i->src(1).mod = Modifier(0); + + i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32)); + i->setSrc(1, NULL); + + i->getSrc(0)->reg.data = res.data; + + if (i->op == OP_MAD || i->op == OP_FMA) { + i->op = OP_ADD; + + i->setSrc(1, i->getSrc(0)); + i->src(1).mod = i->src(2).mod; + i->setSrc(0, i->getSrc(2)); + i->setSrc(2, NULL); + + ImmediateValue src0; + if (i->src(0).getImmediate(src0)) + expr(i, src0, *i->getSrc(1)->asImm()); + } else { + i->op = OP_MOV; + } +} + +void +ConstantFolding::unary(Instruction *i, const ImmediateValue &imm) +{ + Storage res; + + if (i->dType != TYPE_F32) + return; + switch (i->op) { + case OP_NEG: res.data.f32 = -imm.reg.data.f32; break; + case OP_ABS: res.data.f32 = fabsf(imm.reg.data.f32); break; + case OP_RCP: res.data.f32 = 1.0f / imm.reg.data.f32; break; + case OP_RSQ: res.data.f32 = 1.0f / sqrtf(imm.reg.data.f32); break; + case OP_LG2: res.data.f32 = log2f(imm.reg.data.f32); break; + case OP_EX2: res.data.f32 = exp2f(imm.reg.data.f32); break; + case OP_SIN: res.data.f32 = sinf(imm.reg.data.f32); break; + case OP_COS: res.data.f32 = cosf(imm.reg.data.f32); break; + case OP_SQRT: res.data.f32 = sqrtf(imm.reg.data.f32); break; + case OP_PRESIN: + case OP_PREEX2: + // these should be handled in subsequent OP_SIN/COS/EX2 + res.data.f32 = imm.reg.data.f32; + break; + default: + return; + } + i->op = OP_MOV; + i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.f32)); + i->src(0).mod = Modifier(0); +} + +void +ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, + const int s, ImmediateValue& imm2) +{ + const int t = s ? 0 : 1; + Instruction *insn; + Instruction *mul1 = NULL; // mul1 before mul2 + int e = 0; + float f = imm2.reg.data.f32; + ImmediateValue imm1; + + assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32); + + if (mul2->getSrc(t)->refCount() == 1) { + insn = mul2->getSrc(t)->getInsn(); + if (!mul2->src(t).mod && insn->op == OP_MUL && insn->dType == TYPE_F32) + mul1 = insn; + if (mul1 && !mul1->saturate) { + int s1; + + if (mul1->src(s1 = 0).getImmediate(imm1) || + mul1->src(s1 = 1).getImmediate(imm1)) { + bld.setPosition(mul1, false); + // a = mul r, imm1 + // d = mul a, imm2 -> d = mul r, (imm1 * imm2) + mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32)); + mul1->src(s1).mod = Modifier(0); + mul2->def(0).replace(mul1->getDef(0), false); + } else + if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { + // c = mul a, b + // d = mul c, imm -> d = mul_x_imm a, b + mul1->postFactor = e; + mul2->def(0).replace(mul1->getDef(0), false); + if (f < 0) + mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG); + } + mul1->saturate = mul2->saturate; + return; + } + } + if (mul2->getDef(0)->refCount() == 1 && !mul2->saturate) { + // b = mul a, imm + // d = mul b, c -> d = mul_x_imm a, c + int s2, t2; + insn = mul2->getDef(0)->uses.front()->getInsn(); + if (!insn) + return; + mul1 = mul2; + mul2 = NULL; + s2 = insn->getSrc(0) == mul1->getDef(0) ? 0 : 1; + t2 = s2 ? 0 : 1; + if (insn->op == OP_MUL && insn->dType == TYPE_F32) + if (!insn->src(s2).mod && !insn->src(t2).getImmediate(imm1)) + mul2 = insn; + if (mul2 && prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { + mul2->postFactor = e; + mul2->setSrc(s2, mul1->src(t)); + if (f < 0) + mul2->src(s2).mod *= Modifier(NV50_IR_MOD_NEG); + } + } +} + +void +ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) +{ + const int t = !s; + const operation op = i->op; + + switch (i->op) { + case OP_MUL: + if (i->dType == TYPE_F32) + tryCollapseChainedMULs(i, s, imm0); + + if (imm0.isInteger(0)) { + i->op = OP_MOV; + i->setSrc(0, new_ImmediateValue(prog, 0u)); + i->src(0).mod = Modifier(0); + i->setSrc(1, NULL); + } else + if (imm0.isInteger(1) || imm0.isInteger(-1)) { + if (imm0.isNegative()) + i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); + i->op = i->src(t).mod.getOp(); + if (s == 0) { + i->setSrc(0, i->getSrc(1)); + i->src(0).mod = i->src(1).mod; + i->src(1).mod = 0; + } + if (i->op != OP_CVT) + i->src(0).mod = 0; + i->setSrc(1, NULL); + } else + if (imm0.isInteger(2) || imm0.isInteger(-2)) { + if (imm0.isNegative()) + i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); + i->op = OP_ADD; + i->setSrc(s, i->getSrc(t)); + i->src(s).mod = i->src(t).mod; + } else + if (!isFloatType(i->sType) && !imm0.isNegative() && imm0.isPow2()) { + i->op = OP_SHL; + imm0.applyLog2(); + i->setSrc(0, i->getSrc(t)); + i->src(0).mod = i->src(t).mod; + i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32)); + i->src(1).mod = 0; + } + break; + case OP_ADD: + if (i->usesFlags()) + break; + if (imm0.isInteger(0)) { + if (s == 0) { + i->setSrc(0, i->getSrc(1)); + i->src(0).mod = i->src(1).mod; + } + i->setSrc(1, NULL); + i->op = i->src(0).mod.getOp(); + if (i->op != OP_CVT) + i->src(0).mod = Modifier(0); + } + break; + + case OP_DIV: + if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32)) + break; + bld.setPosition(i, false); + if (imm0.reg.data.u32 == 0) { + break; + } else + if (imm0.reg.data.u32 == 1) { + i->op = OP_MOV; + i->setSrc(1, NULL); + } else + if (i->dType == TYPE_U32 && imm0.isPow2()) { + i->op = OP_SHR; + i->setSrc(1, bld.mkImm(util_logbase2(imm0.reg.data.u32))); + } else + if (i->dType == TYPE_U32) { + Instruction *mul; + Value *tA, *tB; + const uint32_t d = imm0.reg.data.u32; + uint32_t m; + int r, s; + uint32_t l = util_logbase2(d); + if (((uint32_t)1 << l) < d) + ++l; + m = (((uint64_t)1 << 32) * (((uint64_t)1 << l) - d)) / d + 1; + r = l ? 1 : 0; + s = l ? (l - 1) : 0; + + tA = bld.getSSA(); + tB = bld.getSSA(); + mul = bld.mkOp2(OP_MUL, TYPE_U32, tA, i->getSrc(0), + bld.loadImm(NULL, m)); + mul->subOp = NV50_IR_SUBOP_MUL_HIGH; + bld.mkOp2(OP_SUB, TYPE_U32, tB, i->getSrc(0), tA); + tA = bld.getSSA(); + if (r) + bld.mkOp2(OP_SHR, TYPE_U32, tA, tB, bld.mkImm(r)); + else + tA = tB; + tB = s ? bld.getSSA() : i->getDef(0); + bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA); + if (s) + bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s)); + + delete_Instruction(prog, i); + } else + if (imm0.reg.data.s32 == -1) { + i->op = OP_NEG; + i->setSrc(1, NULL); + } else { + LValue *tA, *tB; + LValue *tD; + const int32_t d = imm0.reg.data.s32; + int32_t m; + int32_t l = util_logbase2(static_cast<unsigned>(abs(d))); + if ((1 << l) < abs(d)) + ++l; + if (!l) + l = 1; + m = ((uint64_t)1 << (32 + l - 1)) / abs(d) + 1 - ((uint64_t)1 << 32); + + tA = bld.getSSA(); + tB = bld.getSSA(); + bld.mkOp3(OP_MAD, TYPE_S32, tA, i->getSrc(0), bld.loadImm(NULL, m), + i->getSrc(0))->subOp = NV50_IR_SUBOP_MUL_HIGH; + if (l > 1) + bld.mkOp2(OP_SHR, TYPE_S32, tB, tA, bld.mkImm(l - 1)); + else + tB = tA; + tA = bld.getSSA(); + bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, i->getSrc(0), bld.mkImm(0)); + tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue(); + bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA); + if (d < 0) + bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB); + + delete_Instruction(prog, i); + } + break; + + case OP_MOD: + if (i->sType == TYPE_U32 && imm0.isPow2()) { + bld.setPosition(i, false); + i->op = OP_AND; + i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1)); + } + break; + + case OP_SET: // TODO: SET_AND,OR,XOR + { + CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t)); + CondCode cc, ccZ; + if (i->src(t).mod != Modifier(0)) + return; + if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET) + return; + cc = si->setCond; + ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U); + if (s == 0) + ccZ = reverseCondCode(ccZ); + switch (ccZ) { + case CC_LT: cc = CC_FL; break; + case CC_GE: cc = CC_TR; break; + case CC_EQ: cc = inverseCondCode(cc); break; + case CC_LE: cc = inverseCondCode(cc); break; + case CC_GT: break; + case CC_NE: break; + default: + return; + } + i->asCmp()->setCond = cc; + i->setSrc(0, si->src(0)); + i->setSrc(1, si->src(1)); + i->sType = si->sType; + } + break; + + case OP_SHL: + { + if (s != 1 || i->src(0).mod != Modifier(0)) + break; + // try to concatenate shifts + Instruction *si = i->getSrc(0)->getInsn(); + if (!si || si->op != OP_SHL) + break; + ImmediateValue imm1; + if (si->src(1).getImmediate(imm1)) { + bld.setPosition(i, false); + i->setSrc(0, si->getSrc(0)); + i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32)); + } + } + break; + + case OP_ABS: + case OP_NEG: + case OP_LG2: + case OP_RCP: + case OP_SQRT: + case OP_RSQ: + case OP_PRESIN: + case OP_SIN: + case OP_COS: + case OP_PREEX2: + case OP_EX2: + unary(i, imm0); + break; + default: + return; + } + if (i->op != op) + foldCount++; +} + +// ============================================================================= + +// Merge modifier operations (ABS, NEG, NOT) into ValueRefs where allowed. +class ModifierFolding : public Pass +{ +private: + virtual bool visit(BasicBlock *); +}; + +bool +ModifierFolding::visit(BasicBlock *bb) +{ + const Target *target = prog->getTarget(); + + Instruction *i, *next, *mi; + Modifier mod; + + for (i = bb->getEntry(); i; i = next) { + next = i->next; + + if (0 && i->op == OP_SUB) { + // turn "sub" into "add neg" (do we really want this ?) + i->op = OP_ADD; + i->src(0).mod = i->src(0).mod ^ Modifier(NV50_IR_MOD_NEG); + } + + for (int s = 0; s < 3 && i->srcExists(s); ++s) { + mi = i->getSrc(s)->getInsn(); + if (!mi || + mi->predSrc >= 0 || mi->getDef(0)->refCount() > 8) + continue; + if (i->sType == TYPE_U32 && mi->dType == TYPE_S32) { + if ((i->op != OP_ADD && + i->op != OP_MUL) || + (mi->op != OP_ABS && + mi->op != OP_NEG)) + continue; + } else + if (i->sType != mi->dType) { + continue; + } + if ((mod = Modifier(mi->op)) == Modifier(0)) + continue; + mod *= mi->src(0).mod; + + if ((i->op == OP_ABS) || i->src(s).mod.abs()) { + // abs neg [abs] = abs + mod = mod & Modifier(~(NV50_IR_MOD_NEG | NV50_IR_MOD_ABS)); + } else + if ((i->op == OP_NEG) && mod.neg()) { + assert(s == 0); + // neg as both opcode and modifier on same insn is prohibited + // neg neg abs = abs, neg neg = identity + mod = mod & Modifier(~NV50_IR_MOD_NEG); + i->op = mod.getOp(); + mod = mod & Modifier(~NV50_IR_MOD_ABS); + if (mod == Modifier(0)) + i->op = OP_MOV; + } + + if (target->isModSupported(i, s, mod)) { + i->setSrc(s, mi->getSrc(0)); + i->src(s).mod *= mod; + } + } + + if (i->op == OP_SAT) { + mi = i->getSrc(0)->getInsn(); + if (mi && + mi->getDef(0)->refCount() <= 1 && target->isSatSupported(mi)) { + mi->saturate = 1; + mi->setDef(0, i->getDef(0)); + delete_Instruction(prog, i); + } + } + } + + return true; +} + +// ============================================================================= + +// MUL + ADD -> MAD/FMA +// MIN/MAX(a, a) -> a, etc. +// SLCT(a, b, const) -> cc(const) ? a : b +// RCP(RCP(a)) -> a +// MUL(MUL(a, b), const) -> MUL_Xconst(a, b) +class AlgebraicOpt : public Pass +{ +private: + virtual bool visit(BasicBlock *); + + void handleABS(Instruction *); + bool handleADD(Instruction *); + bool tryADDToMADOrSAD(Instruction *, operation toOp); + void handleMINMAX(Instruction *); + void handleRCP(Instruction *); + void handleSLCT(Instruction *); + void handleLOGOP(Instruction *); + void handleCVT(Instruction *); + void handleSUCLAMP(Instruction *); + + BuildUtil bld; +}; + +void +AlgebraicOpt::handleABS(Instruction *abs) +{ + Instruction *sub = abs->getSrc(0)->getInsn(); + DataType ty; + if (!sub || + !prog->getTarget()->isOpSupported(OP_SAD, abs->dType)) + return; + // expect not to have mods yet, if we do, bail + if (sub->src(0).mod || sub->src(1).mod) + return; + // hidden conversion ? + ty = intTypeToSigned(sub->dType); + if (abs->dType != abs->sType || ty != abs->sType) + return; + + if ((sub->op != OP_ADD && sub->op != OP_SUB) || + sub->src(0).getFile() != FILE_GPR || sub->src(0).mod || + sub->src(1).getFile() != FILE_GPR || sub->src(1).mod) + return; + + Value *src0 = sub->getSrc(0); + Value *src1 = sub->getSrc(1); + + if (sub->op == OP_ADD) { + Instruction *neg = sub->getSrc(1)->getInsn(); + if (neg && neg->op != OP_NEG) { + neg = sub->getSrc(0)->getInsn(); + src0 = sub->getSrc(1); + } + if (!neg || neg->op != OP_NEG || + neg->dType != neg->sType || neg->sType != ty) + return; + src1 = neg->getSrc(0); + } + + // found ABS(SUB)) + abs->moveSources(1, 2); // move sources >=1 up by 2 + abs->op = OP_SAD; + abs->setType(sub->dType); + abs->setSrc(0, src0); + abs->setSrc(1, src1); + bld.setPosition(abs, false); + abs->setSrc(2, bld.loadImm(bld.getSSA(typeSizeof(ty)), 0)); +} + +bool +AlgebraicOpt::handleADD(Instruction *add) +{ + Value *src0 = add->getSrc(0); + Value *src1 = add->getSrc(1); + + if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) + return false; + + bool changed = false; + if (!changed && prog->getTarget()->isOpSupported(OP_MAD, add->dType)) + changed = tryADDToMADOrSAD(add, OP_MAD); + if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType)) + changed = tryADDToMADOrSAD(add, OP_SAD); + return changed; +} + +// ADD(SAD(a,b,0), c) -> SAD(a,b,c) +// ADD(MUL(a,b), c) -> MAD(a,b,c) +bool +AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp) +{ + Value *src0 = add->getSrc(0); + Value *src1 = add->getSrc(1); + Value *src; + int s; + const operation srcOp = toOp == OP_SAD ? OP_SAD : OP_MUL; + const Modifier modBad = Modifier(~((toOp == OP_MAD) ? NV50_IR_MOD_NEG : 0)); + Modifier mod[4]; + + if (src0->refCount() == 1 && + src0->getUniqueInsn() && src0->getUniqueInsn()->op == srcOp) + s = 0; + else + if (src1->refCount() == 1 && + src1->getUniqueInsn() && src1->getUniqueInsn()->op == srcOp) + s = 1; + else + return false; + + if ((src0->getUniqueInsn() && src0->getUniqueInsn()->bb != add->bb) || + (src1->getUniqueInsn() && src1->getUniqueInsn()->bb != add->bb)) + return false; + + src = add->getSrc(s); + + if (src->getInsn()->postFactor) + return false; + if (toOp == OP_SAD) { + ImmediateValue imm; + if (!src->getInsn()->src(2).getImmediate(imm)) + return false; + if (!imm.isInteger(0)) + return false; + } + + mod[0] = add->src(0).mod; + mod[1] = add->src(1).mod; + mod[2] = src->getUniqueInsn()->src(0).mod; + mod[3] = src->getUniqueInsn()->src(1).mod; + + if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad) + return false; + + add->op = toOp; + add->subOp = src->getInsn()->subOp; // potentially mul-high + + add->setSrc(2, add->src(s ? 0 : 1)); + + add->setSrc(0, src->getInsn()->getSrc(0)); + add->src(0).mod = mod[2] ^ mod[s]; + add->setSrc(1, src->getInsn()->getSrc(1)); + add->src(1).mod = mod[3]; + + return true; +} + +void +AlgebraicOpt::handleMINMAX(Instruction *minmax) +{ + Value *src0 = minmax->getSrc(0); + Value *src1 = minmax->getSrc(1); + + if (src0 != src1 || src0->reg.file != FILE_GPR) + return; + if (minmax->src(0).mod == minmax->src(1).mod) { + if (minmax->def(0).mayReplace(minmax->src(0))) { + minmax->def(0).replace(minmax->src(0), false); + minmax->bb->remove(minmax); + } else { + minmax->op = OP_CVT; + minmax->setSrc(1, NULL); + } + } else { + // TODO: + // min(x, -x) = -abs(x) + // min(x, -abs(x)) = -abs(x) + // min(x, abs(x)) = x + // max(x, -abs(x)) = x + // max(x, abs(x)) = abs(x) + // max(x, -x) = abs(x) + } +} + +void +AlgebraicOpt::handleRCP(Instruction *rcp) +{ + Instruction *si = rcp->getSrc(0)->getUniqueInsn(); + + if (si && si->op == OP_RCP) { + Modifier mod = rcp->src(0).mod * si->src(0).mod; + rcp->op = mod.getOp(); + rcp->setSrc(0, si->getSrc(0)); + } +} + +void +AlgebraicOpt::handleSLCT(Instruction *slct) +{ + if (slct->getSrc(2)->reg.file == FILE_IMMEDIATE) { + if (slct->getSrc(2)->asImm()->compare(slct->asCmp()->setCond, 0.0f)) + slct->setSrc(0, slct->getSrc(1)); + } else + if (slct->getSrc(0) != slct->getSrc(1)) { + return; + } + slct->op = OP_MOV; + slct->setSrc(1, NULL); + slct->setSrc(2, NULL); +} + +void +AlgebraicOpt::handleLOGOP(Instruction *logop) +{ + Value *src0 = logop->getSrc(0); + Value *src1 = logop->getSrc(1); + + if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) + return; + + if (src0 == src1) { + if ((logop->op == OP_AND || logop->op == OP_OR) && + logop->def(0).mayReplace(logop->src(0))) { + logop->def(0).replace(logop->src(0), false); + delete_Instruction(prog, logop); + } + } else { + // try AND(SET, SET) -> SET_AND(SET) + Instruction *set0 = src0->getInsn(); + Instruction *set1 = src1->getInsn(); + + if (!set0 || set0->fixed || !set1 || set1->fixed) + return; + if (set1->op != OP_SET) { + Instruction *xchg = set0; + set0 = set1; + set1 = xchg; + if (set1->op != OP_SET) + return; + } + operation redOp = (logop->op == OP_AND ? OP_SET_AND : + logop->op == OP_XOR ? OP_SET_XOR : OP_SET_OR); + if (!prog->getTarget()->isOpSupported(redOp, set1->sType)) + return; + if (set0->op != OP_SET && + set0->op != OP_SET_AND && + set0->op != OP_SET_OR && + set0->op != OP_SET_XOR) + return; + if (set0->getDef(0)->refCount() > 1 && + set1->getDef(0)->refCount() > 1) + return; + if (set0->getPredicate() || set1->getPredicate()) + return; + // check that they don't source each other + for (int s = 0; s < 2; ++s) + if (set0->getSrc(s) == set1->getDef(0) || + set1->getSrc(s) == set0->getDef(0)) + return; + + set0 = cloneForward(func, set0); + set1 = cloneShallow(func, set1); + logop->bb->insertAfter(logop, set1); + logop->bb->insertAfter(logop, set0); + + set0->dType = TYPE_U8; + set0->getDef(0)->reg.file = FILE_PREDICATE; + set0->getDef(0)->reg.size = 1; + set1->setSrc(2, set0->getDef(0)); + set1->op = redOp; + set1->setDef(0, logop->getDef(0)); + delete_Instruction(prog, logop); + } +} + +// F2I(NEG(SET with result 1.0f/0.0f)) -> SET with result -1/0 +// nv50: +// F2I(NEG(I2F(ABS(SET)))) +void +AlgebraicOpt::handleCVT(Instruction *cvt) +{ + if (cvt->sType != TYPE_F32 || + cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0)) + return; + Instruction *insn = cvt->getSrc(0)->getInsn(); + if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32) + return; + if (insn->src(0).mod != Modifier(0)) + return; + insn = insn->getSrc(0)->getInsn(); + + // check for nv50 SET(-1,0) -> SET(1.0f/0.0f) chain and nvc0's f32 SET + if (insn && insn->op == OP_CVT && + insn->dType == TYPE_F32 && + insn->sType == TYPE_S32) { + insn = insn->getSrc(0)->getInsn(); + if (!insn || insn->op != OP_ABS || insn->sType != TYPE_S32 || + insn->src(0).mod) + return; + insn = insn->getSrc(0)->getInsn(); + if (!insn || insn->op != OP_SET || insn->dType != TYPE_U32) + return; + } else + if (!insn || insn->op != OP_SET || insn->dType != TYPE_F32) { + return; + } + + Instruction *bset = cloneShallow(func, insn); + bset->dType = TYPE_U32; + bset->setDef(0, cvt->getDef(0)); + cvt->bb->insertAfter(cvt, bset); + delete_Instruction(prog, cvt); +} + +// SUCLAMP dst, (ADD b imm), k, 0 -> SUCLAMP dst, b, k, imm (if imm fits s6) +void +AlgebraicOpt::handleSUCLAMP(Instruction *insn) +{ + ImmediateValue imm; + int32_t val = insn->getSrc(2)->asImm()->reg.data.s32; + int s; + Instruction *add; + + assert(insn->srcExists(0) && insn->src(0).getFile() == FILE_GPR); + + // look for ADD (TODO: only count references by non-SUCLAMP) + if (insn->getSrc(0)->refCount() > 1) + return; + add = insn->getSrc(0)->getInsn(); + if (!add || add->op != OP_ADD || + (add->dType != TYPE_U32 && + add->dType != TYPE_S32)) + return; + + // look for immediate + for (s = 0; s < 2; ++s) + if (add->src(s).getImmediate(imm)) + break; + if (s >= 2) + return; + s = s ? 0 : 1; + // determine if immediate fits + val += imm.reg.data.s32; + if (val > 31 || val < -32) + return; + // determine if other addend fits + if (add->src(s).getFile() != FILE_GPR || add->src(s).mod != Modifier(0)) + return; + + bld.setPosition(insn, false); // make sure bld is init'ed + // replace sources + insn->setSrc(2, bld.mkImm(val)); + insn->setSrc(0, add->getSrc(s)); +} + +bool +AlgebraicOpt::visit(BasicBlock *bb) +{ + Instruction *next; + for (Instruction *i = bb->getEntry(); i; i = next) { + next = i->next; + switch (i->op) { + case OP_ABS: + handleABS(i); + break; + case OP_ADD: + handleADD(i); + break; + case OP_RCP: + handleRCP(i); + break; + case OP_MIN: + case OP_MAX: + handleMINMAX(i); + break; + case OP_SLCT: + handleSLCT(i); + break; + case OP_AND: + case OP_OR: + case OP_XOR: + handleLOGOP(i); + break; + case OP_CVT: + handleCVT(i); + break; + case OP_SUCLAMP: + handleSUCLAMP(i); + break; + default: + break; + } + } + + return true; +} + +// ============================================================================= + +static inline void +updateLdStOffset(Instruction *ldst, int32_t offset, Function *fn) +{ + if (offset != ldst->getSrc(0)->reg.data.offset) { + if (ldst->getSrc(0)->refCount() > 1) + ldst->setSrc(0, cloneShallow(fn, ldst->getSrc(0))); + ldst->getSrc(0)->reg.data.offset = offset; + } +} + +// Combine loads and stores, forward stores to loads where possible. +class MemoryOpt : public Pass +{ +private: + class Record + { + public: + Record *next; + Instruction *insn; + const Value *rel[2]; + const Value *base; + int32_t offset; + int8_t fileIndex; + uint8_t size; + bool locked; + Record *prev; + + bool overlaps(const Instruction *ldst) const; + + inline void link(Record **); + inline void unlink(Record **); + inline void set(const Instruction *ldst); + }; + +public: + MemoryOpt(); + + Record *loads[DATA_FILE_COUNT]; + Record *stores[DATA_FILE_COUNT]; + + MemoryPool recordPool; + +private: + virtual bool visit(BasicBlock *); + bool runOpt(BasicBlock *); + + Record **getList(const Instruction *); + + Record *findRecord(const Instruction *, bool load, bool& isAdjacent) const; + + // merge @insn into load/store instruction from @rec + bool combineLd(Record *rec, Instruction *ld); + bool combineSt(Record *rec, Instruction *st); + + bool replaceLdFromLd(Instruction *ld, Record *ldRec); + bool replaceLdFromSt(Instruction *ld, Record *stRec); + bool replaceStFromSt(Instruction *restrict st, Record *stRec); + + void addRecord(Instruction *ldst); + void purgeRecords(Instruction *const st, DataFile); + void lockStores(Instruction *const ld); + void reset(); + +private: + Record *prevRecord; +}; + +MemoryOpt::MemoryOpt() : recordPool(sizeof(MemoryOpt::Record), 6) +{ + for (int i = 0; i < DATA_FILE_COUNT; ++i) { + loads[i] = NULL; + stores[i] = NULL; + } + prevRecord = NULL; +} + +void +MemoryOpt::reset() +{ + for (unsigned int i = 0; i < DATA_FILE_COUNT; ++i) { + Record *it, *next; + for (it = loads[i]; it; it = next) { + next = it->next; + recordPool.release(it); + } + loads[i] = NULL; + for (it = stores[i]; it; it = next) { + next = it->next; + recordPool.release(it); + } + stores[i] = NULL; + } +} + +bool +MemoryOpt::combineLd(Record *rec, Instruction *ld) +{ + int32_t offRc = rec->offset; + int32_t offLd = ld->getSrc(0)->reg.data.offset; + int sizeRc = rec->size; + int sizeLd = typeSizeof(ld->dType); + int size = sizeRc + sizeLd; + int d, j; + + if (!prog->getTarget()-> + isAccessSupported(ld->getSrc(0)->reg.file, typeOfSize(size))) + return false; + // no unaligned loads + if (((size == 0x8) && (MIN2(offLd, offRc) & 0x7)) || + ((size == 0xc) && (MIN2(offLd, offRc) & 0xf))) + return false; + + assert(sizeRc + sizeLd <= 16 && offRc != offLd); + + for (j = 0; sizeRc; sizeRc -= rec->insn->getDef(j)->reg.size, ++j); + + if (offLd < offRc) { + int sz; + for (sz = 0, d = 0; sz < sizeLd; sz += ld->getDef(d)->reg.size, ++d); + // d: nr of definitions in ld + // j: nr of definitions in rec->insn, move: + for (d = d + j - 1; j > 0; --j, --d) + rec->insn->setDef(d, rec->insn->getDef(j - 1)); + + if (rec->insn->getSrc(0)->refCount() > 1) + rec->insn->setSrc(0, cloneShallow(func, rec->insn->getSrc(0))); + rec->offset = rec->insn->getSrc(0)->reg.data.offset = offLd; + + d = 0; + } else { + d = j; + } + // move definitions of @ld to @rec->insn + for (j = 0; sizeLd; ++j, ++d) { + sizeLd -= ld->getDef(j)->reg.size; + rec->insn->setDef(d, ld->getDef(j)); + } + + rec->size = size; + rec->insn->getSrc(0)->reg.size = size; + rec->insn->setType(typeOfSize(size)); + + delete_Instruction(prog, ld); + + return true; +} + +bool +MemoryOpt::combineSt(Record *rec, Instruction *st) +{ + int32_t offRc = rec->offset; + int32_t offSt = st->getSrc(0)->reg.data.offset; + int sizeRc = rec->size; + int sizeSt = typeSizeof(st->dType); + int s = sizeSt / 4; + int size = sizeRc + sizeSt; + int j, k; + Value *src[4]; // no modifiers in ValueRef allowed for st + Value *extra[3]; + + if (!prog->getTarget()-> + isAccessSupported(st->getSrc(0)->reg.file, typeOfSize(size))) + return false; + if (size == 8 && MIN2(offRc, offSt) & 0x7) + return false; + + st->takeExtraSources(0, extra); // save predicate and indirect address + + if (offRc < offSt) { + // save values from @st + for (s = 0; sizeSt; ++s) { + sizeSt -= st->getSrc(s + 1)->reg.size; + src[s] = st->getSrc(s + 1); + } + // set record's values as low sources of @st + for (j = 1; sizeRc; ++j) { + sizeRc -= rec->insn->getSrc(j)->reg.size; + st->setSrc(j, rec->insn->getSrc(j)); + } + // set saved values as high sources of @st + for (k = j, j = 0; j < s; ++j) + st->setSrc(k++, src[j]); + + updateLdStOffset(st, offRc, func); + } else { + for (j = 1; sizeSt; ++j) + sizeSt -= st->getSrc(j)->reg.size; + for (s = 1; sizeRc; ++j, ++s) { + sizeRc -= rec->insn->getSrc(s)->reg.size; + st->setSrc(j, rec->insn->getSrc(s)); + } + rec->offset = offSt; + } + st->putExtraSources(0, extra); // restore pointer and predicate + + delete_Instruction(prog, rec->insn); + rec->insn = st; + rec->size = size; + rec->insn->getSrc(0)->reg.size = size; + rec->insn->setType(typeOfSize(size)); + return true; +} + +void +MemoryOpt::Record::set(const Instruction *ldst) +{ + const Symbol *mem = ldst->getSrc(0)->asSym(); + fileIndex = mem->reg.fileIndex; + rel[0] = ldst->getIndirect(0, 0); + rel[1] = ldst->getIndirect(0, 1); + offset = mem->reg.data.offset; + base = mem->getBase(); + size = typeSizeof(ldst->sType); +} + +void +MemoryOpt::Record::link(Record **list) +{ + next = *list; + if (next) + next->prev = this; + prev = NULL; + *list = this; +} + +void +MemoryOpt::Record::unlink(Record **list) +{ + if (next) + next->prev = prev; + if (prev) + prev->next = next; + else + *list = next; +} + +MemoryOpt::Record ** +MemoryOpt::getList(const Instruction *insn) +{ + if (insn->op == OP_LOAD || insn->op == OP_VFETCH) + return &loads[insn->src(0).getFile()]; + return &stores[insn->src(0).getFile()]; +} + +void +MemoryOpt::addRecord(Instruction *i) +{ + Record **list = getList(i); + Record *it = reinterpret_cast<Record *>(recordPool.allocate()); + + it->link(list); + it->set(i); + it->insn = i; + it->locked = false; +} + +MemoryOpt::Record * +MemoryOpt::findRecord(const Instruction *insn, bool load, bool& isAdj) const +{ + const Symbol *sym = insn->getSrc(0)->asSym(); + const int size = typeSizeof(insn->sType); + Record *rec = NULL; + Record *it = load ? loads[sym->reg.file] : stores[sym->reg.file]; + + for (; it; it = it->next) { + if (it->locked && insn->op != OP_LOAD) + continue; + if ((it->offset >> 4) != (sym->reg.data.offset >> 4) || + it->rel[0] != insn->getIndirect(0, 0) || + it->fileIndex != sym->reg.fileIndex || + it->rel[1] != insn->getIndirect(0, 1)) + continue; + + if (it->offset < sym->reg.data.offset) { + if (it->offset + it->size >= sym->reg.data.offset) { + isAdj = (it->offset + it->size == sym->reg.data.offset); + if (!isAdj) + return it; + if (!(it->offset & 0x7)) + rec = it; + } + } else { + isAdj = it->offset != sym->reg.data.offset; + if (size <= it->size && !isAdj) + return it; + else + if (!(sym->reg.data.offset & 0x7)) + if (it->offset - size <= sym->reg.data.offset) + rec = it; + } + } + return rec; +} + +bool +MemoryOpt::replaceLdFromSt(Instruction *ld, Record *rec) +{ + Instruction *st = rec->insn; + int32_t offSt = rec->offset; + int32_t offLd = ld->getSrc(0)->reg.data.offset; + int d, s; + + for (s = 1; offSt != offLd && st->srcExists(s); ++s) + offSt += st->getSrc(s)->reg.size; + if (offSt != offLd) + return false; + + for (d = 0; ld->defExists(d) && st->srcExists(s); ++d, ++s) { + if (ld->getDef(d)->reg.size != st->getSrc(s)->reg.size) + return false; + if (st->getSrc(s)->reg.file != FILE_GPR) + return false; + ld->def(d).replace(st->src(s), false); + } + ld->bb->remove(ld); + return true; +} + +bool +MemoryOpt::replaceLdFromLd(Instruction *ldE, Record *rec) +{ + Instruction *ldR = rec->insn; + int32_t offR = rec->offset; + int32_t offE = ldE->getSrc(0)->reg.data.offset; + int dR, dE; + + assert(offR <= offE); + for (dR = 0; offR < offE && ldR->defExists(dR); ++dR) + offR += ldR->getDef(dR)->reg.size; + if (offR != offE) + return false; + + for (dE = 0; ldE->defExists(dE) && ldR->defExists(dR); ++dE, ++dR) { + if (ldE->getDef(dE)->reg.size != ldR->getDef(dR)->reg.size) + return false; + ldE->def(dE).replace(ldR->getDef(dR), false); + } + + delete_Instruction(prog, ldE); + return true; +} + +bool +MemoryOpt::replaceStFromSt(Instruction *restrict st, Record *rec) +{ + const Instruction *const ri = rec->insn; + Value *extra[3]; + + int32_t offS = st->getSrc(0)->reg.data.offset; + int32_t offR = rec->offset; + int32_t endS = offS + typeSizeof(st->dType); + int32_t endR = offR + typeSizeof(ri->dType); + + rec->size = MAX2(endS, endR) - MIN2(offS, offR); + + st->takeExtraSources(0, extra); + + if (offR < offS) { + Value *vals[10]; + int s, n; + int k = 0; + // get non-replaced sources of ri + for (s = 1; offR < offS; offR += ri->getSrc(s)->reg.size, ++s) + vals[k++] = ri->getSrc(s); + n = s; + // get replaced sources of st + for (s = 1; st->srcExists(s); offS += st->getSrc(s)->reg.size, ++s) + vals[k++] = st->getSrc(s); + // skip replaced sources of ri + for (s = n; offR < endS; offR += ri->getSrc(s)->reg.size, ++s); + // get non-replaced sources after values covered by st + for (; offR < endR; offR += ri->getSrc(s)->reg.size, ++s) + vals[k++] = ri->getSrc(s); + assert((unsigned int)k <= Elements(vals)); + for (s = 0; s < k; ++s) + st->setSrc(s + 1, vals[s]); + st->setSrc(0, ri->getSrc(0)); + } else + if (endR > endS) { + int j, s; + for (j = 1; offR < endS; offR += ri->getSrc(j++)->reg.size); + for (s = 1; offS < endS; offS += st->getSrc(s++)->reg.size); + for (; offR < endR; offR += ri->getSrc(j++)->reg.size) + st->setSrc(s++, ri->getSrc(j)); + } + st->putExtraSources(0, extra); + + delete_Instruction(prog, rec->insn); + + rec->insn = st; + rec->offset = st->getSrc(0)->reg.data.offset; + + st->setType(typeOfSize(rec->size)); + + return true; +} + +bool +MemoryOpt::Record::overlaps(const Instruction *ldst) const +{ + Record that; + that.set(ldst); + + if (this->fileIndex != that.fileIndex) + return false; + + if (this->rel[0] || that.rel[0]) + return this->base == that.base; + return + (this->offset < that.offset + that.size) && + (this->offset + this->size > that.offset); +} + +// We must not eliminate stores that affect the result of @ld if +// we find later stores to the same location, and we may no longer +// merge them with later stores. +// The stored value can, however, still be used to determine the value +// returned by future loads. +void +MemoryOpt::lockStores(Instruction *const ld) +{ + for (Record *r = stores[ld->src(0).getFile()]; r; r = r->next) + if (!r->locked && r->overlaps(ld)) + r->locked = true; +} + +// Prior loads from the location of @st are no longer valid. +// Stores to the location of @st may no longer be used to derive +// the value at it nor be coalesced into later stores. +void +MemoryOpt::purgeRecords(Instruction *const st, DataFile f) +{ + if (st) + f = st->src(0).getFile(); + + for (Record *r = loads[f]; r; r = r->next) + if (!st || r->overlaps(st)) + r->unlink(&loads[f]); + + for (Record *r = stores[f]; r; r = r->next) + if (!st || r->overlaps(st)) + r->unlink(&stores[f]); +} + +bool +MemoryOpt::visit(BasicBlock *bb) +{ + bool ret = runOpt(bb); + // Run again, one pass won't combine 4 32 bit ld/st to a single 128 bit ld/st + // where 96 bit memory operations are forbidden. + if (ret) + ret = runOpt(bb); + return ret; +} + +bool +MemoryOpt::runOpt(BasicBlock *bb) +{ + Instruction *ldst, *next; + Record *rec; + bool isAdjacent = true; + + for (ldst = bb->getEntry(); ldst; ldst = next) { + bool keep = true; + bool isLoad = true; + next = ldst->next; + + if (ldst->op == OP_LOAD || ldst->op == OP_VFETCH) { + if (ldst->isDead()) { + // might have been produced by earlier optimization + delete_Instruction(prog, ldst); + continue; + } + } else + if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) { + isLoad = false; + } else { + // TODO: maybe have all fixed ops act as barrier ? + if (ldst->op == OP_CALL || + ldst->op == OP_BAR || + ldst->op == OP_MEMBAR) { + purgeRecords(NULL, FILE_MEMORY_LOCAL); + purgeRecords(NULL, FILE_MEMORY_GLOBAL); + purgeRecords(NULL, FILE_MEMORY_SHARED); + purgeRecords(NULL, FILE_SHADER_OUTPUT); + } else + if (ldst->op == OP_ATOM || ldst->op == OP_CCTL) { + if (ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) { + purgeRecords(NULL, FILE_MEMORY_LOCAL); + purgeRecords(NULL, FILE_MEMORY_GLOBAL); + purgeRecords(NULL, FILE_MEMORY_SHARED); + } else { + purgeRecords(NULL, ldst->src(0).getFile()); + } + } else + if (ldst->op == OP_EMIT || ldst->op == OP_RESTART) { + purgeRecords(NULL, FILE_SHADER_OUTPUT); + } + continue; + } + if (ldst->getPredicate()) // TODO: handle predicated ld/st + continue; + + if (isLoad) { + DataFile file = ldst->src(0).getFile(); + + // if ld l[]/g[] look for previous store to eliminate the reload + if (file == FILE_MEMORY_GLOBAL || file == FILE_MEMORY_LOCAL) { + // TODO: shared memory ? + rec = findRecord(ldst, false, isAdjacent); + if (rec && !isAdjacent) + keep = !replaceLdFromSt(ldst, rec); + } + + // or look for ld from the same location and replace this one + rec = keep ? findRecord(ldst, true, isAdjacent) : NULL; + if (rec) { + if (!isAdjacent) + keep = !replaceLdFromLd(ldst, rec); + else + // or combine a previous load with this one + keep = !combineLd(rec, ldst); + } + if (keep) + lockStores(ldst); + } else { + rec = findRecord(ldst, false, isAdjacent); + if (rec) { + if (!isAdjacent) + keep = !replaceStFromSt(ldst, rec); + else + keep = !combineSt(rec, ldst); + } + if (keep) + purgeRecords(ldst, DATA_FILE_COUNT); + } + if (keep) + addRecord(ldst); + } + reset(); + + return true; +} + +// ============================================================================= + +// Turn control flow into predicated instructions (after register allocation !). +// TODO: +// Could move this to before register allocation on NVC0 and also handle nested +// constructs. +class FlatteningPass : public Pass +{ +private: + virtual bool visit(BasicBlock *); + + bool tryPredicateConditional(BasicBlock *); + void predicateInstructions(BasicBlock *, Value *pred, CondCode cc); + void tryPropagateBranch(BasicBlock *); + inline bool isConstantCondition(Value *pred); + inline bool mayPredicate(const Instruction *, const Value *pred) const; + inline void removeFlow(Instruction *); +}; + +bool +FlatteningPass::isConstantCondition(Value *pred) +{ + Instruction *insn = pred->getUniqueInsn(); + assert(insn); + if (insn->op != OP_SET || insn->srcExists(2)) + return false; + + for (int s = 0; s < 2 && insn->srcExists(s); ++s) { + Instruction *ld = insn->getSrc(s)->getUniqueInsn(); + DataFile file; + if (ld) { + if (ld->op != OP_MOV && ld->op != OP_LOAD) + return false; + if (ld->src(0).isIndirect(0)) + return false; + file = ld->src(0).getFile(); + } else { + file = insn->src(s).getFile(); + // catch $r63 on NVC0 + if (file == FILE_GPR && insn->getSrc(s)->reg.data.id > prog->maxGPR) + file = FILE_IMMEDIATE; + } + if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST) + return false; + } + return true; +} + +void +FlatteningPass::removeFlow(Instruction *insn) +{ + FlowInstruction *term = insn ? insn->asFlow() : NULL; + if (!term) + return; + Graph::Edge::Type ty = term->bb->cfg.outgoing().getType(); + + if (term->op == OP_BRA) { + // TODO: this might get more difficult when we get arbitrary BRAs + if (ty == Graph::Edge::CROSS || ty == Graph::Edge::BACK) + return; + } else + if (term->op != OP_JOIN) + return; + + Value *pred = term->getPredicate(); + + delete_Instruction(prog, term); + + if (pred && pred->refCount() == 0) { + Instruction *pSet = pred->getUniqueInsn(); + pred->join->reg.data.id = -1; // deallocate + if (pSet->isDead()) + delete_Instruction(prog, pSet); + } +} + +void +FlatteningPass::predicateInstructions(BasicBlock *bb, Value *pred, CondCode cc) +{ + for (Instruction *i = bb->getEntry(); i; i = i->next) { + if (i->isNop()) + continue; + assert(!i->getPredicate()); + i->setPredicate(cc, pred); + } + removeFlow(bb->getExit()); +} + +bool +FlatteningPass::mayPredicate(const Instruction *insn, const Value *pred) const +{ + if (insn->isPseudo()) + return true; + // TODO: calls where we don't know which registers are modified + + if (!prog->getTarget()->mayPredicate(insn, pred)) + return false; + for (int d = 0; insn->defExists(d); ++d) + if (insn->getDef(d)->equals(pred)) + return false; + return true; +} + +// If we jump to BRA/RET/EXIT, replace the jump with it. +// NOTE: We do not update the CFG anymore here ! +// +// TODO: Handle cases where we skip over a branch (maybe do that elsewhere ?): +// BB:0 +// @p0 bra BB:2 -> @!p0 bra BB:3 iff (!) BB:2 immediately adjoins BB:1 +// BB1: +// bra BB:3 +// BB2: +// ... +// BB3: +// ... +void +FlatteningPass::tryPropagateBranch(BasicBlock *bb) +{ + for (Instruction *i = bb->getExit(); i && i->op == OP_BRA; i = i->prev) { + BasicBlock *bf = i->asFlow()->target.bb; + + if (bf->getInsnCount() != 1) + continue; + + FlowInstruction *bra = i->asFlow(); + FlowInstruction *rep = bf->getExit()->asFlow(); + + if (!rep || rep->getPredicate()) + continue; + if (rep->op != OP_BRA && + rep->op != OP_JOIN && + rep->op != OP_EXIT) + continue; + + // TODO: If there are multiple branches to @rep, only the first would + // be replaced, so only remove them after this pass is done ? + // Also, need to check all incident blocks for fall-through exits and + // add the branch there. + bra->op = rep->op; + bra->target.bb = rep->target.bb; + if (bf->cfg.incidentCount() == 1) + bf->remove(rep); + } +} + +bool +FlatteningPass::visit(BasicBlock *bb) +{ + if (tryPredicateConditional(bb)) + return true; + + // try to attach join to previous instruction + Instruction *insn = bb->getExit(); + if (insn && insn->op == OP_JOIN && !insn->getPredicate()) { + insn = insn->prev; + if (insn && !insn->getPredicate() && + !insn->asFlow() && + insn->op != OP_TEXBAR && + !isTextureOp(insn->op) && // probably just nve4 + !isSurfaceOp(insn->op) && // not confirmed + insn->op != OP_LINTERP && // probably just nve4 + insn->op != OP_PINTERP && // probably just nve4 + ((insn->op != OP_LOAD && insn->op != OP_STORE) || + typeSizeof(insn->dType) <= 4) && + !insn->isNop()) { + insn->join = 1; + bb->remove(bb->getExit()); + return true; + } + } + + tryPropagateBranch(bb); + + return true; +} + +bool +FlatteningPass::tryPredicateConditional(BasicBlock *bb) +{ + BasicBlock *bL = NULL, *bR = NULL; + unsigned int nL = 0, nR = 0, limit = 12; + Instruction *insn; + unsigned int mask; + + mask = bb->initiatesSimpleConditional(); + if (!mask) + return false; + + assert(bb->getExit()); + Value *pred = bb->getExit()->getPredicate(); + assert(pred); + + if (isConstantCondition(pred)) + limit = 4; + + Graph::EdgeIterator ei = bb->cfg.outgoing(); + + if (mask & 1) { + bL = BasicBlock::get(ei.getNode()); + for (insn = bL->getEntry(); insn; insn = insn->next, ++nL) + if (!mayPredicate(insn, pred)) + return false; + if (nL > limit) + return false; // too long, do a real branch + } + ei.next(); + + if (mask & 2) { + bR = BasicBlock::get(ei.getNode()); + for (insn = bR->getEntry(); insn; insn = insn->next, ++nR) + if (!mayPredicate(insn, pred)) + return false; + if (nR > limit) + return false; // too long, do a real branch + } + + if (bL) + predicateInstructions(bL, pred, bb->getExit()->cc); + if (bR) + predicateInstructions(bR, pred, inverseCondCode(bb->getExit()->cc)); + + if (bb->joinAt) { + bb->remove(bb->joinAt); + bb->joinAt = NULL; + } + removeFlow(bb->getExit()); // delete the branch/join at the fork point + + // remove potential join operations at the end of the conditional + if (prog->getTarget()->joinAnterior) { + bb = BasicBlock::get((bL ? bL : bR)->cfg.outgoing().getNode()); + if (bb->getEntry() && bb->getEntry()->op == OP_JOIN) + removeFlow(bb->getEntry()); + } + + return true; +} + +// ============================================================================= + +// Common subexpression elimination. Stupid O^2 implementation. +class LocalCSE : public Pass +{ +private: + virtual bool visit(BasicBlock *); + + inline bool tryReplace(Instruction **, Instruction *); + + DLList ops[OP_LAST + 1]; +}; + +class GlobalCSE : public Pass +{ +private: + virtual bool visit(BasicBlock *); +}; + +bool +Instruction::isActionEqual(const Instruction *that) const +{ + if (this->op != that->op || + this->dType != that->dType || + this->sType != that->sType) + return false; + if (this->cc != that->cc) + return false; + + if (this->asTex()) { + if (memcmp(&this->asTex()->tex, + &that->asTex()->tex, + sizeof(this->asTex()->tex))) + return false; + } else + if (this->asCmp()) { + if (this->asCmp()->setCond != that->asCmp()->setCond) + return false; + } else + if (this->asFlow()) { + return false; + } else { + if (this->ipa != that->ipa || + this->lanes != that->lanes || + this->perPatch != that->perPatch) + return false; + if (this->postFactor != that->postFactor) + return false; + } + + if (this->subOp != that->subOp || + this->saturate != that->saturate || + this->rnd != that->rnd || + this->ftz != that->ftz || + this->dnz != that->dnz || + this->cache != that->cache || + this->mask != that->mask) + return false; + + return true; +} + +bool +Instruction::isResultEqual(const Instruction *that) const +{ + unsigned int d, s; + + // NOTE: location of discard only affects tex with liveOnly and quadops + if (!this->defExists(0) && this->op != OP_DISCARD) + return false; + + if (!isActionEqual(that)) + return false; + + if (this->predSrc != that->predSrc) + return false; + + for (d = 0; this->defExists(d); ++d) { + if (!that->defExists(d) || + !this->getDef(d)->equals(that->getDef(d), false)) + return false; + } + if (that->defExists(d)) + return false; + + for (s = 0; this->srcExists(s); ++s) { + if (!that->srcExists(s)) + return false; + if (this->src(s).mod != that->src(s).mod) + return false; + if (!this->getSrc(s)->equals(that->getSrc(s), true)) + return false; + } + if (that->srcExists(s)) + return false; + + if (op == OP_LOAD || op == OP_VFETCH) { + switch (src(0).getFile()) { + case FILE_MEMORY_CONST: + case FILE_SHADER_INPUT: + return true; + default: + return false; + } + } + + return true; +} + +// pull through common expressions from different in-blocks +bool +GlobalCSE::visit(BasicBlock *bb) +{ + Instruction *phi, *next, *ik; + int s; + + // TODO: maybe do this with OP_UNION, too + + for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = next) { + next = phi->next; + if (phi->getSrc(0)->refCount() > 1) + continue; + ik = phi->getSrc(0)->getInsn(); + if (!ik) + continue; // probably a function input + for (s = 1; phi->srcExists(s); ++s) { + if (phi->getSrc(s)->refCount() > 1) + break; + if (!phi->getSrc(s)->getInsn() || + !phi->getSrc(s)->getInsn()->isResultEqual(ik)) + break; + } + if (!phi->srcExists(s)) { + Instruction *entry = bb->getEntry(); + ik->bb->remove(ik); + if (!entry || entry->op != OP_JOIN) + bb->insertHead(ik); + else + bb->insertAfter(entry, ik); + ik->setDef(0, phi->getDef(0)); + delete_Instruction(prog, phi); + } + } + + return true; +} + +bool +LocalCSE::tryReplace(Instruction **ptr, Instruction *i) +{ + Instruction *old = *ptr; + + // TODO: maybe relax this later (causes trouble with OP_UNION) + if (i->isPredicated()) + return false; + + if (!old->isResultEqual(i)) + return false; + + for (int d = 0; old->defExists(d); ++d) + old->def(d).replace(i->getDef(d), false); + delete_Instruction(prog, old); + *ptr = NULL; + return true; +} + +bool +LocalCSE::visit(BasicBlock *bb) +{ + unsigned int replaced; + + do { + Instruction *ir, *next; + + replaced = 0; + + // will need to know the order of instructions + int serial = 0; + for (ir = bb->getFirst(); ir; ir = ir->next) + ir->serial = serial++; + + for (ir = bb->getEntry(); ir; ir = next) { + int s; + Value *src = NULL; + + next = ir->next; + + if (ir->fixed) { + ops[ir->op].insert(ir); + continue; + } + + for (s = 0; ir->srcExists(s); ++s) + if (ir->getSrc(s)->asLValue()) + if (!src || ir->getSrc(s)->refCount() < src->refCount()) + src = ir->getSrc(s); + + if (src) { + for (Value::UseIterator it = src->uses.begin(); + it != src->uses.end(); ++it) { + Instruction *ik = (*it)->getInsn(); + if (ik && ik->bb == ir->bb && ik->serial < ir->serial) + if (tryReplace(&ir, ik)) + break; + } + } else { + DLLIST_FOR_EACH(&ops[ir->op], iter) + { + Instruction *ik = reinterpret_cast<Instruction *>(iter.get()); + if (tryReplace(&ir, ik)) + break; + } + } + + if (ir) + ops[ir->op].insert(ir); + else + ++replaced; + } + for (unsigned int i = 0; i <= OP_LAST; ++i) + ops[i].clear(); + + } while (replaced); + + return true; +} + +// ============================================================================= + +// Remove computations of unused values. +class DeadCodeElim : public Pass +{ +public: + bool buryAll(Program *); + +private: + virtual bool visit(BasicBlock *); + + void checkSplitLoad(Instruction *ld); // for partially dead loads + + unsigned int deadCount; +}; + +bool +DeadCodeElim::buryAll(Program *prog) +{ + do { + deadCount = 0; + if (!this->run(prog, false, false)) + return false; + } while (deadCount); + + return true; +} + +bool +DeadCodeElim::visit(BasicBlock *bb) +{ + Instruction *next; + + for (Instruction *i = bb->getFirst(); i; i = next) { + next = i->next; + if (i->isDead()) { + ++deadCount; + delete_Instruction(prog, i); + } else + if (i->defExists(1) && (i->op == OP_VFETCH || i->op == OP_LOAD)) { + checkSplitLoad(i); + } else + if (i->defExists(0) && !i->getDef(0)->refCount()) { + if (i->op == OP_ATOM || + i->op == OP_SUREDP || + i->op == OP_SUREDB) + i->setDef(0, NULL); + } + } + return true; +} + +void +DeadCodeElim::checkSplitLoad(Instruction *ld1) +{ + Instruction *ld2 = NULL; // can get at most 2 loads + Value *def1[4]; + Value *def2[4]; + int32_t addr1, addr2; + int32_t size1, size2; + int d, n1, n2; + uint32_t mask = 0xffffffff; + + for (d = 0; ld1->defExists(d); ++d) + if (!ld1->getDef(d)->refCount() && ld1->getDef(d)->reg.data.id < 0) + mask &= ~(1 << d); + if (mask == 0xffffffff) + return; + + addr1 = ld1->getSrc(0)->reg.data.offset; + n1 = n2 = 0; + size1 = size2 = 0; + for (d = 0; ld1->defExists(d); ++d) { + if (mask & (1 << d)) { + if (size1 && (addr1 & 0x7)) + break; + def1[n1] = ld1->getDef(d); + size1 += def1[n1++]->reg.size; + } else + if (!n1) { + addr1 += ld1->getDef(d)->reg.size; + } else { + break; + } + } + for (addr2 = addr1 + size1; ld1->defExists(d); ++d) { + if (mask & (1 << d)) { + def2[n2] = ld1->getDef(d); + size2 += def2[n2++]->reg.size; + } else { + assert(!n2); + addr2 += ld1->getDef(d)->reg.size; + } + } + + updateLdStOffset(ld1, addr1, func); + ld1->setType(typeOfSize(size1)); + for (d = 0; d < 4; ++d) + ld1->setDef(d, (d < n1) ? def1[d] : NULL); + + if (!n2) + return; + + ld2 = cloneShallow(func, ld1); + updateLdStOffset(ld2, addr2, func); + ld2->setType(typeOfSize(size2)); + for (d = 0; d < 4; ++d) + ld2->setDef(d, (d < n2) ? def2[d] : NULL); + + ld1->bb->insertAfter(ld1, ld2); +} + +// ============================================================================= + +#define RUN_PASS(l, n, f) \ + if (level >= (l)) { \ + if (dbgFlags & NV50_IR_DEBUG_VERBOSE) \ + INFO("PEEPHOLE: %s\n", #n); \ + n pass; \ + if (!pass.f(this)) \ + return false; \ + } + +bool +Program::optimizeSSA(int level) +{ + RUN_PASS(1, DeadCodeElim, buryAll); + RUN_PASS(1, CopyPropagation, run); + RUN_PASS(2, GlobalCSE, run); + RUN_PASS(1, LocalCSE, run); + RUN_PASS(2, AlgebraicOpt, run); + RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks + RUN_PASS(1, ConstantFolding, foldAll); + RUN_PASS(1, LoadPropagation, run); + RUN_PASS(2, MemoryOpt, run); + RUN_PASS(2, LocalCSE, run); + RUN_PASS(0, DeadCodeElim, buryAll); + + return true; +} + +bool +Program::optimizePostRA(int level) +{ + RUN_PASS(2, FlatteningPass, run); + return true; +} + +} diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp new file mode 100644 index 00000000000..ee39b3c5880 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -0,0 +1,698 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_target.h" + +#define __STDC_FORMAT_MACROS +#include <inttypes.h> + +namespace nv50_ir { + +enum TextStyle +{ + TXT_DEFAULT, + TXT_GPR, + TXT_REGISTER, + TXT_FLAGS, + TXT_MEM, + TXT_IMMD, + TXT_BRA, + TXT_INSN +}; + +static const char *_colour[8] = +{ + "\x1b[00m", + "\x1b[34m", + "\x1b[35m", + "\x1b[35m", + "\x1b[36m", + "\x1b[33m", + "\x1b[37m", + "\x1b[32m" +}; + +static const char *_nocolour[8] = +{ + "", "", "", "", "", "", "", "" +}; + +static const char **colour; + +static void init_colours() +{ + if (getenv("NV50_PROG_DEBUG_NO_COLORS") != NULL) + colour = _nocolour; + else + colour = _colour; +} + +const char *operationStr[OP_LAST + 1] = +{ + "nop", + "phi", + "union", + "split", + "merge", + "consec", + "mov", + "ld", + "st", + "add", + "sub", + "mul", + "div", + "mod", + "mad", + "fma", + "sad", + "abs", + "neg", + "not", + "and", + "or", + "xor", + "shl", + "shr", + "max", + "min", + "sat", + "ceil", + "floor", + "trunc", + "cvt", + "set and", + "set or", + "set xor", + "set", + "selp", + "slct", + "rcp", + "rsq", + "lg2", + "sin", + "cos", + "ex2", + "exp", + "log", + "presin", + "preex2", + "sqrt", + "pow", + "bra", + "call", + "ret", + "cont", + "break", + "preret", + "precont", + "prebreak", + "brkpt", + "joinat", + "join", + "discard", + "exit", + "membar", + "vfetch", + "pfetch", + "export", + "linterp", + "pinterp", + "emit", + "restart", + "tex", + "texbias", + "texlod", + "texfetch", + "texquery", + "texgrad", + "texgather", + "texcsaa", + "texprep", + "suldb", + "suldp", + "sustb", + "sustp", + "suredb", + "suredp", + "sulea", + "subfm", + "suclamp", + "sueau", + "madsp", + "texbar", + "dfdx", + "dfdy", + "rdsv", + "wrsv", + "quadop", + "quadon", + "quadpop", + "popcnt", + "insbf", + "extbf", + "permt", + "atom", + "bar", + "vadd", + "vavg", + "vmin", + "vmax", + "vsad", + "vset", + "vshr", + "vshl", + "vsel", + "cctl", + "(invalid)" +}; + +static const char *atomSubOpStr[] = +{ + "add", "min", "max", "inc", "dec", "and", "or", "xor", "cas", "exch" +}; + +static const char *DataTypeStr[] = +{ + "-", + "u8", "s8", + "u16", "s16", + "u32", "s32", + "u64", "s64", + "f16", "f32", "f64", + "b96", "b128" +}; + +static const char *RoundModeStr[] = +{ + "", "rm", "rz", "rp", "rni", "rmi", "rzi", "rpi" +}; + +static const char *CondCodeStr[] = +{ + "never", + "lt", + "eq", + "le", + "gt", + "ne", + "ge", + "", + "(invalid)", + "ltu", + "equ", + "leu", + "gtu", + "neu", + "geu", + "", + "no", + "nc", + "ns", + "na", + "a", + "s", + "c", + "o" +}; + +static const char *SemanticStr[SV_LAST + 1] = +{ + "POSITION", + "VERTEX_ID", + "INSTANCE_ID", + "INVOCATION_ID", + "PRIMITIVE_ID", + "VERTEX_COUNT", + "LAYER", + "VIEWPORT_INDEX", + "Y_DIR", + "FACE", + "POINT_SIZE", + "POINT_COORD", + "CLIP_DISTANCE", + "SAMPLE_INDEX", + "TESS_FACTOR", + "TESS_COORD", + "TID", + "CTAID", + "NTID", + "GRIDID", + "NCTAID", + "LANEID", + "PHYSID", + "NPHYSID", + "CLOCK", + "LBASE", + "SBASE", + "?", + "(INVALID)" +}; + +static const char *interpStr[16] = +{ + "pass", + "mul", + "flat", + "sc", + "cent pass", + "cent mul", + "cent flat", + "cent sc", + "off pass", + "off mul", + "off flat", + "off sc", + "samp pass", + "samp mul", + "samp flat", + "samp sc" +}; + +#define PRINT(args...) \ + do { \ + pos += snprintf(&buf[pos], size - pos, args); \ + } while(0) + +#define SPACE_PRINT(cond, args...) \ + do { \ + if (cond) \ + buf[pos++] = ' '; \ + pos += snprintf(&buf[pos], size - pos, args); \ + } while(0) + +#define SPACE() \ + do { \ + if (pos < size) \ + buf[pos++] = ' '; \ + } while(0) + +int Modifier::print(char *buf, size_t size) const +{ + size_t pos = 0; + + if (bits) + PRINT("%s", colour[TXT_INSN]); + + size_t base = pos; + + if (bits & NV50_IR_MOD_NOT) + PRINT("not"); + if (bits & NV50_IR_MOD_SAT) + SPACE_PRINT(pos > base && pos < size, "sat"); + if (bits & NV50_IR_MOD_NEG) + SPACE_PRINT(pos > base && pos < size, "neg"); + if (bits & NV50_IR_MOD_ABS) + SPACE_PRINT(pos > base && pos < size, "abs"); + + return pos; +} + +int LValue::print(char *buf, size_t size, DataType ty) const +{ + const char *postFix = ""; + size_t pos = 0; + int idx = join->reg.data.id >= 0 ? join->reg.data.id : id; + char p = join->reg.data.id >= 0 ? '$' : '%'; + char r; + int col = TXT_DEFAULT; + + switch (reg.file) { + case FILE_GPR: + r = 'r'; col = TXT_GPR; + if (reg.size == 2) { + if (p == '$') { + postFix = (idx & 1) ? "h" : "l"; + idx /= 2; + } else { + postFix = "s"; + } + } else + if (reg.size == 8) { + postFix = "d"; + } else + if (reg.size == 16) { + postFix = "q"; + } else + if (reg.size == 12) { + postFix = "t"; + } + break; + case FILE_PREDICATE: + r = 'p'; col = TXT_REGISTER; + if (reg.size == 2) + postFix = "d"; + else + if (reg.size == 4) + postFix = "q"; + break; + case FILE_FLAGS: + r = 'c'; col = TXT_FLAGS; + break; + case FILE_ADDRESS: + r = 'a'; col = TXT_REGISTER; + break; + default: + assert(!"invalid file for lvalue"); + r = '?'; + break; + } + + PRINT("%s%c%c%i%s", colour[col], p, r, idx, postFix); + + return pos; +} + +int ImmediateValue::print(char *buf, size_t size, DataType ty) const +{ + size_t pos = 0; + + PRINT("%s", colour[TXT_IMMD]); + + switch (ty) { + case TYPE_F32: PRINT("%f", reg.data.f32); break; + case TYPE_F64: PRINT("%f", reg.data.f64); break; + case TYPE_U8: PRINT("0x%02x", reg.data.u8); break; + case TYPE_S8: PRINT("%i", reg.data.s8); break; + case TYPE_U16: PRINT("0x%04x", reg.data.u16); break; + case TYPE_S16: PRINT("%i", reg.data.s16); break; + case TYPE_U32: PRINT("0x%08x", reg.data.u32); break; + case TYPE_S32: PRINT("%i", reg.data.s32); break; + case TYPE_U64: + case TYPE_S64: + default: + PRINT("0x%016"PRIx64, reg.data.u64); + break; + } + return pos; +} + +int Symbol::print(char *buf, size_t size, DataType ty) const +{ + return print(buf, size, NULL, NULL, ty); +} + +int Symbol::print(char *buf, size_t size, + Value *rel, Value *dimRel, DataType ty) const +{ + size_t pos = 0; + char c; + + if (ty == TYPE_NONE) + ty = typeOfSize(reg.size); + + if (reg.file == FILE_SYSTEM_VALUE) { + PRINT("%ssv[%s%s:%i%s", colour[TXT_MEM], + colour[TXT_REGISTER], + SemanticStr[reg.data.sv.sv], reg.data.sv.index, colour[TXT_MEM]); + if (rel) { + PRINT("%s+", colour[TXT_DEFAULT]); + pos += rel->print(&buf[pos], size - pos); + } + PRINT("%s]", colour[TXT_MEM]); + return pos; + } + + switch (reg.file) { + case FILE_MEMORY_CONST: c = 'c'; break; + case FILE_SHADER_INPUT: c = 'a'; break; + case FILE_SHADER_OUTPUT: c = 'o'; break; + case FILE_MEMORY_GLOBAL: c = 'g'; break; + case FILE_MEMORY_SHARED: c = 's'; break; + case FILE_MEMORY_LOCAL: c = 'l'; break; + default: + assert(!"invalid file"); + c = '?'; + break; + } + + if (c == 'c') + PRINT("%s%c%i[", colour[TXT_MEM], c, reg.fileIndex); + else + PRINT("%s%c[", colour[TXT_MEM], c); + + if (dimRel) { + pos += dimRel->print(&buf[pos], size - pos, TYPE_S32); + PRINT("%s][", colour[TXT_MEM]); + } + + if (rel) { + pos += rel->print(&buf[pos], size - pos); + PRINT("%s%c", colour[TXT_DEFAULT], (reg.data.offset < 0) ? '-' : '+'); + } else { + assert(reg.data.offset >= 0); + } + PRINT("%s0x%x%s]", colour[TXT_IMMD], abs(reg.data.offset), colour[TXT_MEM]); + + return pos; +} + +void Instruction::print() const +{ + #define BUFSZ 512 + + const size_t size = BUFSZ; + + char buf[BUFSZ]; + int s, d; + size_t pos = 0; + + PRINT("%s", colour[TXT_INSN]); + + if (join) + PRINT("join "); + + if (predSrc >= 0) { + const size_t pre = pos; + if (getSrc(predSrc)->reg.file == FILE_PREDICATE) { + if (cc == CC_NOT_P) + PRINT("not"); + } else { + PRINT("%s", CondCodeStr[cc]); + } + if (pos > pre) + SPACE(); + pos += getSrc(predSrc)->print(&buf[pos], BUFSZ - pos); + PRINT(" %s", colour[TXT_INSN]); + } + + if (saturate) + PRINT("sat "); + + if (asFlow()) { + PRINT("%s", operationStr[op]); + if (asFlow()->indirect) + PRINT(" ind"); + if (asFlow()->absolute) + PRINT(" abs"); + if (op == OP_CALL && asFlow()->builtin) { + PRINT(" %sBUILTIN:%i", colour[TXT_BRA], asFlow()->target.builtin); + } else + if (op == OP_CALL && asFlow()->target.fn) { + PRINT(" %s%s:%i", colour[TXT_BRA], + asFlow()->target.fn->getName(), + asFlow()->target.fn->getLabel()); + } else + if (asFlow()->target.bb) + PRINT(" %sBB:%i", colour[TXT_BRA], asFlow()->target.bb->getId()); + } else { + PRINT("%s ", operationStr[op]); + if (op == OP_LINTERP || op == OP_PINTERP) + PRINT("%s ", interpStr[ipa]); + switch (op) { + case OP_SUREDP: + case OP_ATOM: + if (subOp < Elements(atomSubOpStr)) + PRINT("%s ", atomSubOpStr[subOp]); + break; + default: + if (subOp) + PRINT("(SUBOP:%u) ", subOp); + break; + } + if (perPatch) + PRINT("patch "); + if (asTex()) + PRINT("%s %s$r%u $s%u %s", asTex()->tex.target.getName(), + colour[TXT_MEM], asTex()->tex.r, asTex()->tex.s, + colour[TXT_INSN]); + if (postFactor) + PRINT("x2^%i ", postFactor); + PRINT("%s%s", dnz ? "dnz " : (ftz ? "ftz " : ""), DataTypeStr[dType]); + } + + if (rnd != ROUND_N) + PRINT(" %s", RoundModeStr[rnd]); + + if (defExists(1)) + PRINT(" {"); + for (d = 0; defExists(d); ++d) { + SPACE(); + pos += getDef(d)->print(&buf[pos], size - pos); + } + if (d > 1) + PRINT(" %s}", colour[TXT_INSN]); + else + if (!d && !asFlow()) + PRINT(" %s#", colour[TXT_INSN]); + + if (asCmp()) + PRINT(" %s%s", colour[TXT_INSN], CondCodeStr[asCmp()->setCond]); + + if (sType != dType) + PRINT(" %s%s", colour[TXT_INSN], DataTypeStr[sType]); + + for (s = 0; srcExists(s); ++s) { + if (s == predSrc || src(s).usedAsPtr) + continue; + const size_t pre = pos; + SPACE(); + pos += src(s).mod.print(&buf[pos], BUFSZ - pos); + if (pos > pre + 1) + SPACE(); + if (src(s).isIndirect(0) || src(s).isIndirect(1)) + pos += getSrc(s)->asSym()->print(&buf[pos], BUFSZ - pos, + getIndirect(s, 0), + getIndirect(s, 1)); + else + pos += getSrc(s)->print(&buf[pos], BUFSZ - pos, sType); + } + if (exit) + PRINT("%s exit", colour[TXT_INSN]); + + PRINT("%s", colour[TXT_DEFAULT]); + + buf[MIN2(pos, BUFSZ - 1)] = 0; + + INFO("%s (%u)\n", buf, encSize); +} + +class PrintPass : public Pass +{ +public: + PrintPass() : serial(0) { } + + virtual bool visit(Function *); + virtual bool visit(BasicBlock *); + virtual bool visit(Instruction *); + +private: + int serial; +}; + +bool +PrintPass::visit(Function *fn) +{ + char str[16]; + + INFO("\n%s:%i (", fn->getName(), fn->getLabel()); + + if (!fn->outs.empty()) + INFO("out"); + for (std::deque<ValueRef>::iterator it = fn->outs.begin(); + it != fn->outs.end(); + ++it) { + it->get()->print(str, sizeof(str), typeOfSize(it->get()->reg.size)); + INFO(" %s", str); + } + + if (!fn->ins.empty()) + INFO("%s%sin", colour[TXT_DEFAULT], fn->outs.empty() ? "" : ", "); + for (std::deque<ValueDef>::iterator it = fn->ins.begin(); + it != fn->ins.end(); + ++it) { + it->get()->print(str, sizeof(str), typeOfSize(it->get()->reg.size)); + INFO(" %s", str); + } + INFO("%s)\n", colour[TXT_DEFAULT]); + + return true; +} + +bool +PrintPass::visit(BasicBlock *bb) +{ +#if 0 + INFO("---\n"); + for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) + INFO(" <- BB:%i (%s)\n", + BasicBlock::get(ei.getNode())->getId(), + ei.getEdge()->typeStr()); +#endif + INFO("BB:%i (%u instructions) - ", bb->getId(), bb->getInsnCount()); + + if (bb->idom()) + INFO("idom = BB:%i, ", bb->idom()->getId()); + + INFO("df = { "); + for (DLList::Iterator df = bb->getDF().iterator(); !df.end(); df.next()) + INFO("BB:%i ", BasicBlock::get(df)->getId()); + + INFO("}\n"); + + for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) + INFO(" -> BB:%i (%s)\n", + BasicBlock::get(ei.getNode())->getId(), + ei.getEdge()->typeStr()); + + return true; +} + +bool +PrintPass::visit(Instruction *insn) +{ + INFO("%3i: ", serial++); + insn->print(); + return true; +} + +void +Function::print() +{ + PrintPass pass; + pass.run(this, true, false); +} + +void +Program::print() +{ + PrintPass pass; + init_colours(); + pass.run(this, true, false); +} + +void +Function::printLiveIntervals() const +{ + INFO("printing live intervals ...\n"); + + for (ArrayList::Iterator it = allLValues.iterator(); !it.end(); it.next()) { + const Value *lval = Value::get(it)->asLValue(); + if (lval && !lval->livei.isEmpty()) { + INFO("livei(%%%i): ", lval->id); + lval->livei.print(); + } + } +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp new file mode 100644 index 00000000000..d65003ce4eb --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -0,0 +1,2050 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_target.h" + +#include <stack> +#include <limits> + +namespace nv50_ir { + +#define MAX_REGISTER_FILE_SIZE 256 + +class RegisterSet +{ +public: + RegisterSet(const Target *); + + void init(const Target *); + void reset(DataFile, bool resetMax = false); + + void periodicMask(DataFile f, uint32_t lock, uint32_t unlock); + void intersect(DataFile f, const RegisterSet *); + + bool assign(int32_t& reg, DataFile f, unsigned int size); + void release(DataFile f, int32_t reg, unsigned int size); + void occupy(DataFile f, int32_t reg, unsigned int size); + void occupy(const Value *); + void occupyMask(DataFile f, int32_t reg, uint8_t mask); + bool isOccupied(DataFile f, int32_t reg, unsigned int size) const; + bool testOccupy(const Value *); + bool testOccupy(DataFile f, int32_t reg, unsigned int size); + + inline int getMaxAssigned(DataFile f) const { return fill[f]; } + + inline unsigned int getFileSize(DataFile f, uint8_t regSize) const + { + if (restrictedGPR16Range && f == FILE_GPR && regSize == 2) + return (last[f] + 1) / 2; + return last[f] + 1; + } + + inline unsigned int units(DataFile f, unsigned int size) const + { + return size >> unit[f]; + } + // for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary) + inline unsigned int idToBytes(const Value *v) const + { + return v->reg.data.id * MIN2(v->reg.size, 4); + } + inline unsigned int idToUnits(const Value *v) const + { + return units(v->reg.file, idToBytes(v)); + } + inline int bytesToId(Value *v, unsigned int bytes) const + { + if (v->reg.size < 4) + return units(v->reg.file, bytes); + return bytes / 4; + } + inline int unitsToId(DataFile f, int u, uint8_t size) const + { + if (u < 0) + return -1; + return (size < 4) ? u : ((u << unit[f]) / 4); + } + + void print() const; + +private: + BitSet bits[LAST_REGISTER_FILE + 1]; + + int unit[LAST_REGISTER_FILE + 1]; // log2 of allocation granularity + + int last[LAST_REGISTER_FILE + 1]; + int fill[LAST_REGISTER_FILE + 1]; + + const bool restrictedGPR16Range; +}; + +void +RegisterSet::reset(DataFile f, bool resetMax) +{ + bits[f].fill(0); + if (resetMax) + fill[f] = -1; +} + +void +RegisterSet::init(const Target *targ) +{ + for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) { + DataFile f = static_cast<DataFile>(rf); + last[rf] = targ->getFileSize(f) - 1; + unit[rf] = targ->getFileUnit(f); + fill[rf] = -1; + assert(last[rf] < MAX_REGISTER_FILE_SIZE); + bits[rf].allocate(last[rf] + 1, true); + } +} + +RegisterSet::RegisterSet(const Target *targ) + : restrictedGPR16Range(targ->getChipset() < 0xc0) +{ + init(targ); + for (unsigned int i = 0; i <= LAST_REGISTER_FILE; ++i) + reset(static_cast<DataFile>(i)); +} + +void +RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock) +{ + bits[f].periodicMask32(lock, unlock); +} + +void +RegisterSet::intersect(DataFile f, const RegisterSet *set) +{ + bits[f] |= set->bits[f]; +} + +void +RegisterSet::print() const +{ + INFO("GPR:"); + bits[FILE_GPR].print(); + INFO("\n"); +} + +bool +RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size) +{ + reg = bits[f].findFreeRange(size); + if (reg < 0) + return false; + fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1)); + return true; +} + +bool +RegisterSet::isOccupied(DataFile f, int32_t reg, unsigned int size) const +{ + return bits[f].testRange(reg, size); +} + +void +RegisterSet::occupy(const Value *v) +{ + occupy(v->reg.file, idToUnits(v), v->reg.size >> unit[v->reg.file]); +} + +void +RegisterSet::occupyMask(DataFile f, int32_t reg, uint8_t mask) +{ + bits[f].setMask(reg & ~31, static_cast<uint32_t>(mask) << (reg % 32)); +} + +void +RegisterSet::occupy(DataFile f, int32_t reg, unsigned int size) +{ + bits[f].setRange(reg, size); + + INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %u\n", f, reg, size); + + fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1)); +} + +bool +RegisterSet::testOccupy(const Value *v) +{ + return testOccupy(v->reg.file, + idToUnits(v), v->reg.size >> unit[v->reg.file]); +} + +bool +RegisterSet::testOccupy(DataFile f, int32_t reg, unsigned int size) +{ + if (isOccupied(f, reg, size)) + return false; + occupy(f, reg, size); + return true; +} + +void +RegisterSet::release(DataFile f, int32_t reg, unsigned int size) +{ + bits[f].clrRange(reg, size); + + INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %u\n", f, reg, size); +} + +class RegAlloc +{ +public: + RegAlloc(Program *program) : prog(program), sequence(0) { } + + bool exec(); + bool execFunc(); + +private: + class PhiMovesPass : public Pass { + private: + virtual bool visit(BasicBlock *); + inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p); + }; + + class ArgumentMovesPass : public Pass { + private: + virtual bool visit(BasicBlock *); + }; + + class BuildIntervalsPass : public Pass { + private: + virtual bool visit(BasicBlock *); + void collectLiveValues(BasicBlock *); + void addLiveRange(Value *, const BasicBlock *, int end); + }; + + class InsertConstraintsPass : public Pass { + public: + bool exec(Function *func); + private: + virtual bool visit(BasicBlock *); + + bool insertConstraintMoves(); + + void condenseDefs(Instruction *); + void condenseSrcs(Instruction *, const int first, const int last); + + void addHazard(Instruction *i, const ValueRef *src); + void textureMask(TexInstruction *); + void addConstraint(Instruction *, int s, int n); + bool detectConflict(Instruction *, int s); + + // target specific functions, TODO: put in subclass or Target + void texConstraintNV50(TexInstruction *); + void texConstraintNVC0(TexInstruction *); + void texConstraintNVE0(TexInstruction *); + + std::list<Instruction *> constrList; + + const Target *targ; + }; + + bool buildLiveSets(BasicBlock *); + +private: + Program *prog; + Function *func; + + // instructions in control flow / chronological order + ArrayList insns; + + int sequence; // for manual passes through CFG +}; + +typedef std::pair<Value *, Value *> ValuePair; + +class SpillCodeInserter +{ +public: + SpillCodeInserter(Function *fn) : func(fn), stackSize(0), stackBase(0) { } + + bool run(const std::list<ValuePair>&); + + Symbol *assignSlot(const Interval&, const unsigned int size); + inline int32_t getStackSize() const { return stackSize; } + +private: + Function *func; + + struct SpillSlot + { + Interval occup; + std::list<Value *> residents; // needed to recalculate occup + Symbol *sym; + int32_t offset; + inline uint8_t size() const { return sym->reg.size; } + }; + std::list<SpillSlot> slots; + int32_t stackSize; + int32_t stackBase; + + LValue *unspill(Instruction *usei, LValue *, Value *slot); + void spill(Instruction *defi, Value *slot, LValue *); +}; + +void +RegAlloc::BuildIntervalsPass::addLiveRange(Value *val, + const BasicBlock *bb, + int end) +{ + Instruction *insn = val->getUniqueInsn(); + + if (!insn) + insn = bb->getFirst(); + + assert(bb->getFirst()->serial <= bb->getExit()->serial); + assert(bb->getExit()->serial + 1 >= end); + + int begin = insn->serial; + if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial) + begin = bb->getEntry()->serial; + + INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n", + val->id, begin, insn->serial, end); + + if (begin != end) // empty ranges are only added as hazards for fixed regs + val->livei.extend(begin, end); +} + +bool +RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p) +{ + if (b->cfg.incidentCount() <= 1) + return false; + + int n = 0; + for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next()) + if (ei.getType() == Graph::Edge::TREE || + ei.getType() == Graph::Edge::FORWARD) + ++n; + return (n == 2); +} + +// For each operand of each PHI in b, generate a new value by inserting a MOV +// at the end of the block it is coming from and replace the operand with its +// result. This eliminates liveness conflicts and enables us to let values be +// copied to the right register if such a conflict exists nonetheless. +// +// These MOVs are also crucial in making sure the live intervals of phi srces +// are extended until the end of the loop, since they are not included in the +// live-in sets. +bool +RegAlloc::PhiMovesPass::visit(BasicBlock *bb) +{ + Instruction *phi, *mov; + BasicBlock *pb, *pn; + + std::stack<BasicBlock *> stack; + + for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { + pb = BasicBlock::get(ei.getNode()); + assert(pb); + if (needNewElseBlock(bb, pb)) + stack.push(pb); + } + while (!stack.empty()) { + pb = stack.top(); + pn = new BasicBlock(func); + stack.pop(); + + pb->cfg.detach(&bb->cfg); + pb->cfg.attach(&pn->cfg, Graph::Edge::TREE); + pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD); + + assert(pb->getExit()->op != OP_CALL); + if (pb->getExit()->asFlow()->target.bb == bb) + pb->getExit()->asFlow()->target.bb = pn; + } + + // insert MOVs (phi->src(j) should stem from j-th in-BB) + int j = 0; + for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { + pb = BasicBlock::get(ei.getNode()); + if (!pb->isTerminated()) + pb->insertTail(new_FlowInstruction(func, OP_BRA, bb)); + + for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) { + mov = new_Instruction(func, OP_MOV, TYPE_U32); + + mov->setSrc(0, phi->getSrc(j)); + mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue())); + phi->setSrc(j, mov->getDef(0)); + + pb->insertBefore(pb->getExit(), mov); + } + ++j; + } + + return true; +} + +bool +RegAlloc::ArgumentMovesPass::visit(BasicBlock *bb) +{ + // Bind function call inputs/outputs to the same physical register + // the callee uses, inserting moves as appropriate for the case a + // conflict arises. + for (Instruction *i = bb->getEntry(); i; i = i->next) { + FlowInstruction *cal = i->asFlow(); + // TODO: Handle indirect calls. + // Right now they should only be generated for builtins. + if (!cal || cal->op != OP_CALL || cal->builtin || cal->indirect) + continue; + RegisterSet clobberSet(prog->getTarget()); + + // Bind input values. + for (int s = cal->indirect ? 1 : 0; cal->srcExists(s); ++s) { + const int t = cal->indirect ? (s - 1) : s; + LValue *tmp = new_LValue(func, cal->getSrc(s)->asLValue()); + tmp->reg.data.id = cal->target.fn->ins[t].rep()->reg.data.id; + + Instruction *mov = + new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); + mov->setDef(0, tmp); + mov->setSrc(0, cal->getSrc(s)); + cal->setSrc(s, tmp); + + bb->insertBefore(cal, mov); + } + + // Bind output values. + for (int d = 0; cal->defExists(d); ++d) { + LValue *tmp = new_LValue(func, cal->getDef(d)->asLValue()); + tmp->reg.data.id = cal->target.fn->outs[d].rep()->reg.data.id; + + Instruction *mov = + new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); + mov->setSrc(0, tmp); + mov->setDef(0, cal->getDef(d)); + cal->setDef(d, tmp); + + bb->insertAfter(cal, mov); + clobberSet.occupy(tmp); + } + + // Bind clobbered values. + for (std::deque<Value *>::iterator it = cal->target.fn->clobbers.begin(); + it != cal->target.fn->clobbers.end(); + ++it) { + if (clobberSet.testOccupy(*it)) { + Value *tmp = new_LValue(func, (*it)->asLValue()); + tmp->reg.data.id = (*it)->reg.data.id; + cal->setDef(cal->defCount(), tmp); + } + } + } + + // Update the clobber set of the function. + if (BasicBlock::get(func->cfgExit) == bb) { + func->buildDefSets(); + for (unsigned int i = 0; i < bb->defSet.getSize(); ++i) + if (bb->defSet.test(i)) + func->clobbers.push_back(func->getLValue(i)); + } + + return true; +} + +// Build the set of live-in variables of bb. +bool +RegAlloc::buildLiveSets(BasicBlock *bb) +{ + Function *f = bb->getFunction(); + BasicBlock *bn; + Instruction *i; + unsigned int s, d; + + INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId()); + + bb->liveSet.allocate(func->allLValues.getSize(), false); + + int n = 0; + for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { + bn = BasicBlock::get(ei.getNode()); + if (bn == bb) + continue; + if (bn->cfg.visit(sequence)) + if (!buildLiveSets(bn)) + return false; + if (n++ || bb->liveSet.marker) + bb->liveSet |= bn->liveSet; + else + bb->liveSet = bn->liveSet; + } + if (!n && !bb->liveSet.marker) + bb->liveSet.fill(0); + bb->liveSet.marker = true; + + if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { + INFO("BB:%i live set of out blocks:\n", bb->getId()); + bb->liveSet.print(); + } + + // if (!bb->getEntry()) + // return true; + + if (bb == BasicBlock::get(f->cfgExit)) { + for (std::deque<ValueRef>::iterator it = f->outs.begin(); + it != f->outs.end(); ++it) { + assert(it->get()->asLValue()); + bb->liveSet.set(it->get()->id); + } + } + + for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) { + for (d = 0; i->defExists(d); ++d) + bb->liveSet.clr(i->getDef(d)->id); + for (s = 0; i->srcExists(s); ++s) + if (i->getSrc(s)->asLValue()) + bb->liveSet.set(i->getSrc(s)->id); + } + for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next) + bb->liveSet.clr(i->getDef(0)->id); + + if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { + INFO("BB:%i live set after propagation:\n", bb->getId()); + bb->liveSet.print(); + } + + return true; +} + +void +RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb) +{ + BasicBlock *bbA = NULL, *bbB = NULL; + + if (bb->cfg.outgoingCount()) { + // trickery to save a loop of OR'ing liveSets + // aliasing works fine with BitSet::setOr + for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { + if (ei.getType() == Graph::Edge::DUMMY) + continue; + if (bbA) { + bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet); + bbA = bb; + } else { + bbA = bbB; + } + bbB = BasicBlock::get(ei.getNode()); + } + bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL); + } else + if (bb->cfg.incidentCount()) { + bb->liveSet.fill(0); + } +} + +bool +RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb) +{ + collectLiveValues(bb); + + INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId()); + + // go through out blocks and delete phi sources that do not originate from + // the current block from the live set + for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { + BasicBlock *out = BasicBlock::get(ei.getNode()); + + for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) { + bb->liveSet.clr(i->getDef(0)->id); + + for (int s = 0; i->srcExists(s); ++s) { + assert(i->src(s).getInsn()); + if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ? + bb->liveSet.set(i->getSrc(s)->id); + else + bb->liveSet.clr(i->getSrc(s)->id); + } + } + } + + // remaining live-outs are live until end + if (bb->getExit()) { + for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j) + if (bb->liveSet.test(j)) + addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1); + } + + for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) { + for (int d = 0; i->defExists(d); ++d) { + bb->liveSet.clr(i->getDef(d)->id); + if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs + i->getDef(d)->livei.extend(i->serial, i->serial); + } + + for (int s = 0; i->srcExists(s); ++s) { + if (!i->getSrc(s)->asLValue()) + continue; + if (!bb->liveSet.test(i->getSrc(s)->id)) { + bb->liveSet.set(i->getSrc(s)->id); + addLiveRange(i->getSrc(s), bb, i->serial); + } + } + } + + if (bb == BasicBlock::get(func->cfg.getRoot())) { + for (std::deque<ValueDef>::iterator it = func->ins.begin(); + it != func->ins.end(); ++it) { + if (it->get()->reg.data.id >= 0) // add hazard for fixed regs + it->get()->livei.extend(0, 1); + } + } + + return true; +} + + +#define JOIN_MASK_PHI (1 << 0) +#define JOIN_MASK_UNION (1 << 1) +#define JOIN_MASK_MOV (1 << 2) +#define JOIN_MASK_TEX (1 << 3) + +class GCRA +{ +public: + GCRA(Function *, SpillCodeInserter&); + ~GCRA(); + + bool allocateRegisters(ArrayList& insns); + + void printNodeInfo() const; + +private: + class RIG_Node : public Graph::Node + { + public: + RIG_Node(); + + void init(const RegisterSet&, LValue *); + + void addInterference(RIG_Node *); + void addRegPreference(RIG_Node *); + + inline LValue *getValue() const + { + return reinterpret_cast<LValue *>(data); + } + inline void setValue(LValue *lval) { data = lval; } + + inline uint8_t getCompMask() const + { + return ((1 << colors) - 1) << (reg & 7); + } + + static inline RIG_Node *get(const Graph::EdgeIterator& ei) + { + return static_cast<RIG_Node *>(ei.getNode()); + } + + public: + uint32_t degree; + uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable + uint16_t colors; + + DataFile f; + int32_t reg; + + float weight; + + // list pointers for simplify() phase + RIG_Node *next; + RIG_Node *prev; + + // union of the live intervals of all coalesced values (we want to retain + // the separate intervals for testing interference of compound values) + Interval livei; + + std::list<RIG_Node *> prefRegs; + }; + +private: + inline RIG_Node *getNode(const LValue *v) const { return &nodes[v->id]; } + + void buildRIG(ArrayList&); + bool coalesce(ArrayList&); + bool doCoalesce(ArrayList&, unsigned int mask); + void calculateSpillWeights(); + void simplify(); + bool selectRegisters(); + void cleanup(const bool success); + + void simplifyEdge(RIG_Node *, RIG_Node *); + void simplifyNode(RIG_Node *); + + bool coalesceValues(Value *, Value *, bool force); + void resolveSplitsAndMerges(); + void makeCompound(Instruction *, bool isSplit); + + inline void checkInterference(const RIG_Node *, Graph::EdgeIterator&); + + inline void insertOrderedTail(std::list<RIG_Node *>&, RIG_Node *); + void checkList(std::list<RIG_Node *>&); + +private: + std::stack<uint32_t> stack; + + // list headers for simplify() phase + RIG_Node lo[2]; + RIG_Node hi; + + Graph RIG; + RIG_Node *nodes; + unsigned int nodeCount; + + Function *func; + Program *prog; + + static uint8_t relDegree[17][17]; + + RegisterSet regs; + + // need to fixup register id for participants of OP_MERGE/SPLIT + std::list<Instruction *> merges; + std::list<Instruction *> splits; + + SpillCodeInserter& spill; + std::list<ValuePair> mustSpill; +}; + +uint8_t GCRA::relDegree[17][17]; + +GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this) +{ + colors = 0; +} + +void +GCRA::printNodeInfo() const +{ + for (unsigned int i = 0; i < nodeCount; ++i) { + if (!nodes[i].colors) + continue; + INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X", + i, + nodes[i].f,nodes[i].reg,nodes[i].colors, + nodes[i].weight, + nodes[i].degree, nodes[i].degreeLimit); + + for (Graph::EdgeIterator ei = nodes[i].outgoing(); !ei.end(); ei.next()) + INFO(" %%%i", RIG_Node::get(ei)->getValue()->id); + for (Graph::EdgeIterator ei = nodes[i].incident(); !ei.end(); ei.next()) + INFO(" %%%i", RIG_Node::get(ei)->getValue()->id); + INFO("\n"); + } +} + +void +GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval) +{ + setValue(lval); + if (lval->reg.data.id >= 0) + lval->noSpill = lval->fixedReg = 1; + + colors = regs.units(lval->reg.file, lval->reg.size); + f = lval->reg.file; + reg = -1; + if (lval->reg.data.id >= 0) + reg = regs.idToUnits(lval); + + weight = std::numeric_limits<float>::infinity(); + degree = 0; + degreeLimit = regs.getFileSize(f, lval->reg.size); + + livei.insert(lval->livei); +} + +bool +GCRA::coalesceValues(Value *dst, Value *src, bool force) +{ + LValue *rep = dst->join->asLValue(); + LValue *val = src->join->asLValue(); + + if (!force && val->reg.data.id >= 0) { + rep = src->join->asLValue(); + val = dst->join->asLValue(); + } + RIG_Node *nRep = &nodes[rep->id]; + RIG_Node *nVal = &nodes[val->id]; + + if (src->reg.file != dst->reg.file) { + if (!force) + return false; + WARN("forced coalescing of values in different files !\n"); + } + if (!force && dst->reg.size != src->reg.size) + return false; + + if ((rep->reg.data.id >= 0) && (rep->reg.data.id != val->reg.data.id)) { + if (force) { + if (val->reg.data.id >= 0) + WARN("forced coalescing of values in different fixed regs !\n"); + } else { + if (val->reg.data.id >= 0) + return false; + // make sure that there is no overlap with the fixed register of rep + for (ArrayList::Iterator it = func->allLValues.iterator(); + !it.end(); it.next()) { + Value *reg = reinterpret_cast<Value *>(it.get())->asLValue(); + assert(reg); + if (reg->interfers(rep) && reg->livei.overlaps(nVal->livei)) + return false; + } + } + } + + if (!force && nRep->livei.overlaps(nVal->livei)) + return false; + + INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n", + rep->id, rep->reg.data.id, val->id); + + // set join pointer of all values joined with val + for (Value::DefIterator def = val->defs.begin(); def != val->defs.end(); + ++def) + (*def)->get()->join = rep; + assert(rep->join == rep && val->join == rep); + + // add val's definitions to rep and extend the live interval of its RIG node + rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end()); + nRep->livei.unify(nVal->livei); + return true; +} + +bool +GCRA::coalesce(ArrayList& insns) +{ + bool ret = doCoalesce(insns, JOIN_MASK_PHI); + if (!ret) + return false; + switch (func->getProgram()->getTarget()->getChipset() & ~0xf) { + case 0x50: + case 0x80: + case 0x90: + case 0xa0: + ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX); + break; + case 0xc0: + case 0xd0: + case 0xe0: + ret = doCoalesce(insns, JOIN_MASK_UNION); + break; + default: + break; + } + if (!ret) + return false; + return doCoalesce(insns, JOIN_MASK_MOV); +} + +static inline uint8_t makeCompMask(int compSize, int base, int size) +{ + uint8_t m = ((1 << size) - 1) << base; + + switch (compSize) { + case 1: + return 0xff; + case 2: + m |= (m << 2); + return (m << 4) | m; + case 3: + case 4: + return (m << 4) | m; + default: + assert(compSize <= 8); + return m; + } +} + +// Used when coalescing moves. The non-compound value will become one, e.g.: +// mov b32 $r0 $r2 / merge b64 $r0d { $r0 $r1 } +// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d +static inline void copyCompound(Value *dst, Value *src) +{ + LValue *ldst = dst->asLValue(); + LValue *lsrc = src->asLValue(); + + if (ldst->compound && !lsrc->compound) { + LValue *swap = lsrc; + lsrc = ldst; + ldst = swap; + } + + ldst->compound = lsrc->compound; + ldst->compMask = lsrc->compMask; +} + +void +GCRA::makeCompound(Instruction *insn, bool split) +{ + LValue *rep = (split ? insn->getSrc(0) : insn->getDef(0))->asLValue(); + + if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { + INFO("makeCompound(split = %i): ", split); + insn->print(); + } + + const unsigned int size = getNode(rep)->colors; + unsigned int base = 0; + + if (!rep->compound) + rep->compMask = 0xff; + rep->compound = 1; + + for (int c = 0; split ? insn->defExists(c) : insn->srcExists(c); ++c) { + LValue *val = (split ? insn->getDef(c) : insn->getSrc(c))->asLValue(); + + val->compound = 1; + if (!val->compMask) + val->compMask = 0xff; + val->compMask &= makeCompMask(size, base, getNode(val)->colors); + assert(val->compMask); + + INFO_DBG(prog->dbgFlags, REG_ALLOC, "compound: %%%i:%02x <- %%%i:%02x\n", + rep->id, rep->compMask, val->id, val->compMask); + + base += getNode(val)->colors; + } + assert(base == size); +} + +bool +GCRA::doCoalesce(ArrayList& insns, unsigned int mask) +{ + int c, n; + + for (n = 0; n < insns.getSize(); ++n) { + Instruction *i; + Instruction *insn = reinterpret_cast<Instruction *>(insns.get(n)); + + switch (insn->op) { + case OP_PHI: + if (!(mask & JOIN_MASK_PHI)) + break; + for (c = 0; insn->srcExists(c); ++c) + if (!coalesceValues(insn->getDef(0), insn->getSrc(c), false)) { + // this is bad + ERROR("failed to coalesce phi operands\n"); + return false; + } + break; + case OP_UNION: + case OP_MERGE: + if (!(mask & JOIN_MASK_UNION)) + break; + for (c = 0; insn->srcExists(c); ++c) + coalesceValues(insn->getDef(0), insn->getSrc(c), true); + if (insn->op == OP_MERGE) { + merges.push_back(insn); + if (insn->srcExists(1)) + makeCompound(insn, false); + } + break; + case OP_SPLIT: + if (!(mask & JOIN_MASK_UNION)) + break; + splits.push_back(insn); + for (c = 0; insn->defExists(c); ++c) + coalesceValues(insn->getSrc(0), insn->getDef(c), true); + makeCompound(insn, true); + break; + case OP_MOV: + if (!(mask & JOIN_MASK_MOV)) + break; + i = NULL; + if (!insn->getDef(0)->uses.empty()) + i = insn->getDef(0)->uses.front()->getInsn(); + // if this is a contraint-move there will only be a single use + if (i && i->op == OP_MERGE) // do we really still need this ? + break; + i = insn->getSrc(0)->getUniqueInsn(); + if (i && !i->constrainedDefs()) { + if (coalesceValues(insn->getDef(0), insn->getSrc(0), false)) + copyCompound(insn->getSrc(0), insn->getDef(0)); + } + break; + case OP_TEX: + case OP_TXB: + case OP_TXL: + case OP_TXF: + case OP_TXQ: + case OP_TXD: + case OP_TXG: + case OP_TEXCSAA: + if (!(mask & JOIN_MASK_TEX)) + break; + for (c = 0; insn->srcExists(c) && c != insn->predSrc; ++c) + coalesceValues(insn->getDef(c), insn->getSrc(c), true); + break; + default: + break; + } + } + return true; +} + +void +GCRA::RIG_Node::addInterference(RIG_Node *node) +{ + this->degree += relDegree[node->colors][colors]; + node->degree += relDegree[colors][node->colors]; + + this->attach(node, Graph::Edge::CROSS); +} + +void +GCRA::RIG_Node::addRegPreference(RIG_Node *node) +{ + prefRegs.push_back(node); +} + +GCRA::GCRA(Function *fn, SpillCodeInserter& spill) : + func(fn), + regs(fn->getProgram()->getTarget()), + spill(spill) +{ + prog = func->getProgram(); + + // initialize relative degrees array - i takes away from j + for (int i = 1; i <= 16; ++i) + for (int j = 1; j <= 16; ++j) + relDegree[i][j] = j * ((i + j - 1) / j); +} + +GCRA::~GCRA() +{ + if (nodes) + delete[] nodes; +} + +void +GCRA::checkList(std::list<RIG_Node *>& lst) +{ + GCRA::RIG_Node *prev = NULL; + + for (std::list<RIG_Node *>::iterator it = lst.begin(); + it != lst.end(); + ++it) { + assert((*it)->getValue()->join == (*it)->getValue()); + if (prev) + assert(prev->livei.begin() <= (*it)->livei.begin()); + prev = *it; + } +} + +void +GCRA::insertOrderedTail(std::list<RIG_Node *>& list, RIG_Node *node) +{ + if (node->livei.isEmpty()) + return; + // only the intervals of joined values don't necessarily arrive in order + std::list<RIG_Node *>::iterator prev, it; + for (it = list.end(); it != list.begin(); it = prev) { + prev = it; + --prev; + if ((*prev)->livei.begin() <= node->livei.begin()) + break; + } + list.insert(it, node); +} + +void +GCRA::buildRIG(ArrayList& insns) +{ + std::list<RIG_Node *> values, active; + + for (std::deque<ValueDef>::iterator it = func->ins.begin(); + it != func->ins.end(); ++it) + insertOrderedTail(values, getNode(it->get()->asLValue())); + + for (int i = 0; i < insns.getSize(); ++i) { + Instruction *insn = reinterpret_cast<Instruction *>(insns.get(i)); + for (int d = 0; insn->defExists(d); ++d) + if (insn->getDef(d)->rep() == insn->getDef(d)) + insertOrderedTail(values, getNode(insn->getDef(d)->asLValue())); + } + checkList(values); + + while (!values.empty()) { + RIG_Node *cur = values.front(); + + for (std::list<RIG_Node *>::iterator it = active.begin(); + it != active.end();) { + RIG_Node *node = *it; + + if (node->livei.end() <= cur->livei.begin()) { + it = active.erase(it); + } else { + if (node->f == cur->f && node->livei.overlaps(cur->livei)) + cur->addInterference(node); + ++it; + } + } + values.pop_front(); + active.push_back(cur); + } +} + +void +GCRA::calculateSpillWeights() +{ + for (unsigned int i = 0; i < nodeCount; ++i) { + RIG_Node *const n = &nodes[i]; + if (!nodes[i].colors || nodes[i].livei.isEmpty()) + continue; + if (nodes[i].reg >= 0) { + // update max reg + regs.occupy(n->f, n->reg, n->colors); + continue; + } + LValue *val = nodes[i].getValue(); + + if (!val->noSpill) { + int rc = 0; + for (Value::DefIterator it = val->defs.begin(); + it != val->defs.end(); + ++it) + rc += (*it)->get()->refCount(); + + nodes[i].weight = + (float)rc * (float)rc / (float)nodes[i].livei.extent(); + } + + if (nodes[i].degree < nodes[i].degreeLimit) { + int l = 0; + if (val->reg.size > 4) + l = 1; + DLLIST_ADDHEAD(&lo[l], &nodes[i]); + } else { + DLLIST_ADDHEAD(&hi, &nodes[i]); + } + } + if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) + printNodeInfo(); +} + +void +GCRA::simplifyEdge(RIG_Node *a, RIG_Node *b) +{ + bool move = b->degree >= b->degreeLimit; + + INFO_DBG(prog->dbgFlags, REG_ALLOC, + "edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n", + a->getValue()->id, a->degree, a->degreeLimit, + b->getValue()->id, b->degree, b->degreeLimit); + + b->degree -= relDegree[a->colors][b->colors]; + + move = move && b->degree < b->degreeLimit; + if (move && !DLLIST_EMPTY(b)) { + int l = (b->getValue()->reg.size > 4) ? 1 : 0; + DLLIST_DEL(b); + DLLIST_ADDTAIL(&lo[l], b); + } +} + +void +GCRA::simplifyNode(RIG_Node *node) +{ + for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) + simplifyEdge(node, RIG_Node::get(ei)); + + for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next()) + simplifyEdge(node, RIG_Node::get(ei)); + + DLLIST_DEL(node); + stack.push(node->getValue()->id); + + INFO_DBG(prog->dbgFlags, REG_ALLOC, "SIMPLIFY: pushed %%%i%s\n", + node->getValue()->id, + (node->degree < node->degreeLimit) ? "" : "(spill)"); +} + +void +GCRA::simplify() +{ + for (;;) { + if (!DLLIST_EMPTY(&lo[0])) { + do { + simplifyNode(lo[0].next); + } while (!DLLIST_EMPTY(&lo[0])); + } else + if (!DLLIST_EMPTY(&lo[1])) { + simplifyNode(lo[1].next); + } else + if (!DLLIST_EMPTY(&hi)) { + RIG_Node *best = hi.next; + float bestScore = best->weight / (float)best->degree; + // spill candidate + for (RIG_Node *it = best->next; it != &hi; it = it->next) { + float score = it->weight / (float)it->degree; + if (score < bestScore) { + best = it; + bestScore = score; + } + } + if (isinf(bestScore)) { + ERROR("no viable spill candidates left\n"); + break; + } + simplifyNode(best); + } else { + break; + } + } +} + +void +GCRA::checkInterference(const RIG_Node *node, Graph::EdgeIterator& ei) +{ + const RIG_Node *intf = RIG_Node::get(ei); + + if (intf->reg < 0) + return; + const LValue *vA = node->getValue(); + const LValue *vB = intf->getValue(); + + const uint8_t intfMask = ((1 << intf->colors) - 1) << (intf->reg & 7); + + if (vA->compound | vB->compound) { + // NOTE: this only works for >aligned< register tuples ! + for (Value::DefCIterator D = vA->defs.begin(); D != vA->defs.end(); ++D) { + for (Value::DefCIterator d = vB->defs.begin(); d != vB->defs.end(); ++d) { + const LValue *vD = (*D)->get()->asLValue(); + const LValue *vd = (*d)->get()->asLValue(); + + if (!vD->livei.overlaps(vd->livei)) { + INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no overlap\n", + vD->id, vd->id); + continue; + } + + uint8_t mask = vD->compound ? vD->compMask : ~0; + if (vd->compound) { + assert(vB->compound); + mask &= vd->compMask & vB->compMask; + } else { + mask &= intfMask; + } + + INFO_DBG(prog->dbgFlags, REG_ALLOC, + "(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n", + vD->id, + vD->compound ? vD->compMask : 0xff, + vd->id, + vd->compound ? vd->compMask : intfMask, + vB->compMask, intf->reg & ~7, mask); + if (mask) + regs.occupyMask(node->f, intf->reg & ~7, mask); + } + } + } else { + INFO_DBG(prog->dbgFlags, REG_ALLOC, + "(%%%i) X (%%%i): $r%i + %u\n", + vA->id, vB->id, intf->reg, intf->colors); + regs.occupy(node->f, intf->reg, intf->colors); + } +} + +bool +GCRA::selectRegisters() +{ + INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nSELECT phase\n"); + + while (!stack.empty()) { + RIG_Node *node = &nodes[stack.top()]; + stack.pop(); + + regs.reset(node->f); + + INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nNODE[%%%i, %u colors]\n", + node->getValue()->id, node->colors); + + for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) + checkInterference(node, ei); + for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next()) + checkInterference(node, ei); + + if (!node->prefRegs.empty()) { + for (std::list<RIG_Node *>::const_iterator it = node->prefRegs.begin(); + it != node->prefRegs.end(); + ++it) { + if ((*it)->reg >= 0 && + regs.testOccupy(node->f, (*it)->reg, node->colors)) { + node->reg = (*it)->reg; + break; + } + } + } + if (node->reg >= 0) + continue; + LValue *lval = node->getValue(); + if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) + regs.print(); + bool ret = regs.assign(node->reg, node->f, node->colors); + if (ret) { + INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg); + lval->compMask = node->getCompMask(); + } else { + INFO_DBG(prog->dbgFlags, REG_ALLOC, "must spill: %%%i (size %u)\n", + lval->id, lval->reg.size); + Symbol *slot = NULL; + if (lval->reg.file == FILE_GPR) + slot = spill.assignSlot(node->livei, lval->reg.size); + mustSpill.push_back(ValuePair(lval, slot)); + } + } + if (!mustSpill.empty()) + return false; + for (unsigned int i = 0; i < nodeCount; ++i) { + LValue *lval = nodes[i].getValue(); + if (nodes[i].reg >= 0 && nodes[i].colors > 0) + lval->reg.data.id = + regs.unitsToId(nodes[i].f, nodes[i].reg, lval->reg.size); + } + return true; +} + +bool +GCRA::allocateRegisters(ArrayList& insns) +{ + bool ret; + + INFO_DBG(prog->dbgFlags, REG_ALLOC, + "allocateRegisters to %u instructions\n", insns.getSize()); + + nodeCount = func->allLValues.getSize(); + nodes = new RIG_Node[nodeCount]; + if (!nodes) + return false; + for (unsigned int i = 0; i < nodeCount; ++i) { + LValue *lval = reinterpret_cast<LValue *>(func->allLValues.get(i)); + if (lval) { + nodes[i].init(regs, lval); + RIG.insert(&nodes[i]); + } + } + + // coalesce first, we use only 1 RIG node for a group of joined values + ret = coalesce(insns); + if (!ret) + goto out; + + if (func->getProgram()->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) + func->printLiveIntervals(); + + buildRIG(insns); + calculateSpillWeights(); + simplify(); + + ret = selectRegisters(); + if (!ret) { + INFO_DBG(prog->dbgFlags, REG_ALLOC, + "selectRegisters failed, inserting spill code ...\n"); + regs.reset(FILE_GPR, true); + spill.run(mustSpill); + if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) + func->print(); + } else { + prog->maxGPR = std::max(prog->maxGPR, regs.getMaxAssigned(FILE_GPR)); + } + +out: + cleanup(ret); + return ret; +} + +void +GCRA::cleanup(const bool success) +{ + mustSpill.clear(); + + for (ArrayList::Iterator it = func->allLValues.iterator(); + !it.end(); it.next()) { + LValue *lval = reinterpret_cast<LValue *>(it.get()); + + lval->livei.clear(); + + lval->compound = 0; + lval->compMask = 0; + + if (lval->join == lval) + continue; + + if (success) { + lval->reg.data.id = lval->join->reg.data.id; + } else { + for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end(); + ++d) + lval->join->defs.remove(*d); + lval->join = lval; + } + } + + if (success) + resolveSplitsAndMerges(); + splits.clear(); // avoid duplicate entries on next coalesce pass + merges.clear(); + + delete[] nodes; + nodes = NULL; +} + +Symbol * +SpillCodeInserter::assignSlot(const Interval &livei, const unsigned int size) +{ + SpillSlot slot; + int32_t offsetBase = stackSize; + int32_t offset; + std::list<SpillSlot>::iterator pos = slots.end(), it = slots.begin(); + + if (offsetBase % size) + offsetBase += size - (offsetBase % size); + + slot.sym = NULL; + + for (offset = offsetBase; offset < stackSize; offset += size) { + const int32_t entryEnd = offset + size; + while (it != slots.end() && it->offset < offset) + ++it; + if (it == slots.end()) // no slots left + break; + std::list<SpillSlot>::iterator bgn = it; + + while (it != slots.end() && it->offset < entryEnd) { + it->occup.print(); + if (it->occup.overlaps(livei)) + break; + ++it; + } + if (it == slots.end() || it->offset >= entryEnd) { + // fits + for (; bgn != slots.end() && bgn->offset < entryEnd; ++bgn) { + bgn->occup.insert(livei); + if (bgn->size() == size) + slot.sym = bgn->sym; + } + break; + } + } + if (!slot.sym) { + stackSize = offset + size; + slot.offset = offset; + slot.sym = new_Symbol(func->getProgram(), FILE_MEMORY_LOCAL); + if (!func->stackPtr) + offset += func->tlsBase; + slot.sym->setAddress(NULL, offset); + slot.sym->reg.size = size; + slots.insert(pos, slot)->occup.insert(livei); + } + return slot.sym; +} + +void +SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval) +{ + const DataType ty = typeOfSize(slot->reg.size); + + Instruction *st; + if (slot->reg.file == FILE_MEMORY_LOCAL) { + st = new_Instruction(func, OP_STORE, ty); + st->setSrc(0, slot); + st->setSrc(1, lval); + lval->noSpill = 1; + } else { + st = new_Instruction(func, OP_CVT, ty); + st->setDef(0, slot); + st->setSrc(0, lval); + } + defi->bb->insertAfter(defi, st); +} + +LValue * +SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot) +{ + const DataType ty = typeOfSize(slot->reg.size); + + lval = cloneShallow(func, lval); + + Instruction *ld; + if (slot->reg.file == FILE_MEMORY_LOCAL) { + lval->noSpill = 1; + ld = new_Instruction(func, OP_LOAD, ty); + } else { + ld = new_Instruction(func, OP_CVT, ty); + } + ld->setDef(0, lval); + ld->setSrc(0, slot); + + usei->bb->insertBefore(usei, ld); + return lval; +} + +bool +SpillCodeInserter::run(const std::list<ValuePair>& lst) +{ + for (std::list<ValuePair>::const_iterator it = lst.begin(); it != lst.end(); + ++it) { + LValue *lval = it->first->asLValue(); + Symbol *mem = it->second ? it->second->asSym() : NULL; + + for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end(); + ++d) { + Value *slot = mem ? + static_cast<Value *>(mem) : new_LValue(func, FILE_GPR); + Value *tmp = NULL; + Instruction *last = NULL; + + LValue *dval = (*d)->get()->asLValue(); + Instruction *defi = (*d)->getInsn(); + + // handle uses first or they'll contain the spill stores + while (!dval->uses.empty()) { + ValueRef *u = dval->uses.front(); + Instruction *usei = u->getInsn(); + assert(usei); + if (usei->op == OP_PHI) { + tmp = (slot->reg.file == FILE_MEMORY_LOCAL) ? NULL : slot; + last = NULL; + } else + if (!last || usei != last->next) { // TODO: sort uses + tmp = unspill(usei, dval, slot); + last = usei; + } + u->set(tmp); + } + + assert(defi); + if (defi->op == OP_PHI) { + d = lval->defs.erase(d); + --d; + if (slot->reg.file == FILE_MEMORY_LOCAL) + delete_Instruction(func->getProgram(), defi); + else + defi->setDef(0, slot); + } else { + spill(defi, slot, dval); + } + } + + } + + // TODO: We're not trying to reuse old slots in a potential next iteration. + // We have to update the slots' livei intervals to be able to do that. + stackBase = stackSize; + slots.clear(); + return true; +} + +bool +RegAlloc::exec() +{ + for (IteratorRef it = prog->calls.iteratorDFS(false); + !it->end(); it->next()) { + func = Function::get(reinterpret_cast<Graph::Node *>(it->get())); + + func->tlsBase = prog->tlsSize; + if (!execFunc()) + return false; + prog->tlsSize += func->tlsSize; + } + return true; +} + +bool +RegAlloc::execFunc() +{ + InsertConstraintsPass insertConstr; + PhiMovesPass insertPhiMoves; + ArgumentMovesPass insertArgMoves; + BuildIntervalsPass buildIntervals; + SpillCodeInserter insertSpills(func); + + GCRA gcra(func, insertSpills); + + unsigned int i, retries; + bool ret; + + if (!func->ins.empty()) { + // Insert a nop at the entry so inputs only used by the first instruction + // don't count as having an empty live range. + Instruction *nop = new_Instruction(func, OP_NOP, TYPE_NONE); + BasicBlock::get(func->cfg.getRoot())->insertHead(nop); + } + + ret = insertConstr.exec(func); + if (!ret) + goto out; + + ret = insertPhiMoves.run(func); + if (!ret) + goto out; + + ret = insertArgMoves.run(func); + if (!ret) + goto out; + + // TODO: need to fix up spill slot usage ranges to support > 1 retry + for (retries = 0; retries < 3; ++retries) { + if (retries && (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)) + INFO("Retry: %i\n", retries); + if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) + func->print(); + + // spilling to registers may add live ranges, need to rebuild everything + ret = true; + for (sequence = func->cfg.nextSequence(), i = 0; + ret && i <= func->loopNestingBound; + sequence = func->cfg.nextSequence(), ++i) + ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot())); + if (!ret) + break; + func->orderInstructions(this->insns); + + ret = buildIntervals.run(func); + if (!ret) + break; + ret = gcra.allocateRegisters(insns); + if (ret) + break; // success + } + INFO_DBG(prog->dbgFlags, REG_ALLOC, "RegAlloc done: %i\n", ret); + + func->tlsSize = insertSpills.getStackSize(); +out: + return ret; +} + +// TODO: check if modifying Instruction::join here breaks anything +void +GCRA::resolveSplitsAndMerges() +{ + for (std::list<Instruction *>::iterator it = splits.begin(); + it != splits.end(); + ++it) { + Instruction *split = *it; + unsigned int reg = regs.idToBytes(split->getSrc(0)); + for (int d = 0; split->defExists(d); ++d) { + Value *v = split->getDef(d); + v->reg.data.id = regs.bytesToId(v, reg); + v->join = v; + reg += v->reg.size; + } + } + splits.clear(); + + for (std::list<Instruction *>::iterator it = merges.begin(); + it != merges.end(); + ++it) { + Instruction *merge = *it; + unsigned int reg = regs.idToBytes(merge->getDef(0)); + for (int s = 0; merge->srcExists(s); ++s) { + Value *v = merge->getSrc(s); + v->reg.data.id = regs.bytesToId(v, reg); + v->join = v; + reg += v->reg.size; + } + } + merges.clear(); +} + +bool Program::registerAllocation() +{ + RegAlloc ra(this); + return ra.exec(); +} + +bool +RegAlloc::InsertConstraintsPass::exec(Function *ir) +{ + constrList.clear(); + + bool ret = run(ir, true, true); + if (ret) + ret = insertConstraintMoves(); + return ret; +} + +// TODO: make part of texture insn +void +RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex) +{ + Value *def[4]; + int c, k, d; + uint8_t mask = 0; + + for (d = 0, k = 0, c = 0; c < 4; ++c) { + if (!(tex->tex.mask & (1 << c))) + continue; + if (tex->getDef(k)->refCount()) { + mask |= 1 << c; + def[d++] = tex->getDef(k); + } + ++k; + } + tex->tex.mask = mask; + + for (c = 0; c < d; ++c) + tex->setDef(c, def[c]); + for (; c < 4; ++c) + tex->setDef(c, NULL); +} + +bool +RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s) +{ + Value *v = cst->getSrc(s); + + // current register allocation can't handle it if a value participates in + // multiple constraints + for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) { + if (cst != (*it)->getInsn()) + return true; + } + + // can start at s + 1 because detectConflict is called on all sources + for (int c = s + 1; cst->srcExists(c); ++c) + if (v == cst->getSrc(c)) + return true; + + Instruction *defi = v->getInsn(); + + return (!defi || defi->constrainedDefs()); +} + +void +RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n) +{ + Instruction *cst; + int d; + + // first, look for an existing identical constraint op + for (std::list<Instruction *>::iterator it = constrList.begin(); + it != constrList.end(); + ++it) { + cst = (*it); + if (!i->bb->dominatedBy(cst->bb)) + break; + for (d = 0; d < n; ++d) + if (cst->getSrc(d) != i->getSrc(d + s)) + break; + if (d >= n) { + for (d = 0; d < n; ++d, ++s) + i->setSrc(s, cst->getDef(d)); + return; + } + } + cst = new_Instruction(func, OP_CONSTRAINT, i->dType); + + for (d = 0; d < n; ++s, ++d) { + cst->setDef(d, new_LValue(func, FILE_GPR)); + cst->setSrc(d, i->getSrc(s)); + i->setSrc(s, cst->getDef(d)); + } + i->bb->insertBefore(i, cst); + + constrList.push_back(cst); +} + +// Add a dummy use of the pointer source of >= 8 byte loads after the load +// to prevent it from being assigned a register which overlapping the load's +// destination, which would produce random corruptions. +void +RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src) +{ + Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE); + hzd->setSrc(0, src->get()); + i->bb->insertAfter(i, hzd); + +} + +// b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q +void +RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn) +{ + uint8_t size = 0; + int n; + for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n) + size += insn->getDef(n)->reg.size; + if (n < 2) + return; + LValue *lval = new_LValue(func, FILE_GPR); + lval->reg.size = size; + + Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size)); + split->setSrc(0, lval); + for (int d = 0; d < n; ++d) { + split->setDef(d, insn->getDef(d)); + insn->setDef(d, NULL); + } + insn->setDef(0, lval); + + for (int k = 1, d = n; insn->defExists(d); ++d, ++k) { + insn->setDef(k, insn->getDef(d)); + insn->setDef(d, NULL); + } + // carry over predicate if any (mainly for OP_UNION uses) + split->setPredicate(insn->cc, insn->getPredicate()); + + insn->bb->insertAfter(insn, split); + constrList.push_back(split); +} +void +RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn, + const int a, const int b) +{ + uint8_t size = 0; + if (a >= b) + return; + for (int s = a; s <= b; ++s) + size += insn->getSrc(s)->reg.size; + if (!size) + return; + LValue *lval = new_LValue(func, FILE_GPR); + lval->reg.size = size; + + Value *save[3]; + insn->takeExtraSources(0, save); + + Instruction *merge = new_Instruction(func, OP_MERGE, typeOfSize(size)); + merge->setDef(0, lval); + for (int s = a, i = 0; s <= b; ++s, ++i) { + merge->setSrc(i, insn->getSrc(s)); + insn->setSrc(s, NULL); + } + insn->setSrc(a, lval); + + for (int k = a + 1, s = b + 1; insn->srcExists(s); ++s, ++k) { + insn->setSrc(k, insn->getSrc(s)); + insn->setSrc(s, NULL); + } + insn->bb->insertBefore(insn, merge); + + insn->putExtraSources(0, save); + + constrList.push_back(merge); +} + +void +RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) +{ + if (isTextureOp(tex->op)) + textureMask(tex); + condenseDefs(tex); + + if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) { + condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1); + } else + if (isTextureOp(tex->op)) { + int n = tex->srcCount(0xff, true); + if (n > 4) { + condenseSrcs(tex, 0, 3); + if (n > 5) // NOTE: first call modified positions already + condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1)); + } else + if (n > 1) { + condenseSrcs(tex, 0, n - 1); + } + } +} + +void +RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex) +{ + int n, s; + + textureMask(tex); + + if (tex->op == OP_TXQ) { + s = tex->srcCount(0xff); + n = 0; + } else { + s = tex->tex.target.getArgCount(); + if (!tex->tex.target.isArray() && + (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0)) + ++s; + if (tex->op == OP_TXD && tex->tex.useOffsets) + ++s; + n = tex->srcCount(0xff) - s; + assert(n <= 4); + } + + if (s > 1) + condenseSrcs(tex, 0, s - 1); + if (n > 1) // NOTE: first call modified positions already + condenseSrcs(tex, 1, n); + + condenseDefs(tex); +} + +void +RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex) +{ + Value *pred = tex->getPredicate(); + if (pred) + tex->setPredicate(tex->cc, NULL); + + textureMask(tex); + + assert(tex->defExists(0) && tex->srcExists(0)); + // make src and def count match + int c; + for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) { + if (!tex->srcExists(c)) + tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue())); + if (!tex->defExists(c)) + tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue())); + } + if (pred) + tex->setPredicate(tex->cc, pred); + condenseDefs(tex); + condenseSrcs(tex, 0, c - 1); +} + +// Insert constraint markers for instructions whose multiple sources must be +// located in consecutive registers. +bool +RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) +{ + TexInstruction *tex; + Instruction *next; + int s, size; + + targ = bb->getProgram()->getTarget(); + + for (Instruction *i = bb->getEntry(); i; i = next) { + next = i->next; + + if ((tex = i->asTex())) { + switch (targ->getChipset() & ~0xf) { + case 0x50: + case 0x80: + case 0x90: + case 0xa0: + texConstraintNV50(tex); + break; + case 0xc0: + case 0xd0: + texConstraintNVC0(tex); + break; + case 0xe0: + case NVISA_GK110_CHIPSET: + texConstraintNVE0(tex); + break; + default: + break; + } + } else + if (i->op == OP_EXPORT || i->op == OP_STORE) { + for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) { + assert(i->srcExists(s)); + size -= i->getSrc(s)->reg.size; + } + condenseSrcs(i, 1, s - 1); + } else + if (i->op == OP_LOAD || i->op == OP_VFETCH) { + condenseDefs(i); + if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8) + addHazard(i, i->src(0).getIndirect(0)); + } else + if (i->op == OP_UNION || + i->op == OP_MERGE || + i->op == OP_SPLIT) { + constrList.push_back(i); + } + } + return true; +} + +// Insert extra moves so that, if multiple register constraints on a value are +// in conflict, these conflicts can be resolved. +bool +RegAlloc::InsertConstraintsPass::insertConstraintMoves() +{ + for (std::list<Instruction *>::iterator it = constrList.begin(); + it != constrList.end(); + ++it) { + Instruction *cst = *it; + Instruction *mov; + + if (cst->op == OP_SPLIT && 0) { + // spilling splits is annoying, just make sure they're separate + for (int d = 0; cst->defExists(d); ++d) { + if (!cst->getDef(d)->refCount()) + continue; + LValue *lval = new_LValue(func, cst->def(d).getFile()); + const uint8_t size = cst->def(d).getSize(); + lval->reg.size = size; + + mov = new_Instruction(func, OP_MOV, typeOfSize(size)); + mov->setSrc(0, lval); + mov->setDef(0, cst->getDef(d)); + cst->setDef(d, mov->getSrc(0)); + cst->bb->insertAfter(cst, mov); + + cst->getSrc(0)->asLValue()->noSpill = 1; + mov->getSrc(0)->asLValue()->noSpill = 1; + } + } else + if (cst->op == OP_MERGE || cst->op == OP_UNION) { + for (int s = 0; cst->srcExists(s); ++s) { + const uint8_t size = cst->src(s).getSize(); + + if (!cst->getSrc(s)->defs.size()) { + mov = new_Instruction(func, OP_NOP, typeOfSize(size)); + mov->setDef(0, cst->getSrc(s)); + cst->bb->insertBefore(cst, mov); + continue; + } + assert(cst->getSrc(s)->defs.size() == 1); // still SSA + + Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); + // catch some cases where don't really need MOVs + if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) + continue; + + LValue *lval = new_LValue(func, cst->src(s).getFile()); + lval->reg.size = size; + + mov = new_Instruction(func, OP_MOV, typeOfSize(size)); + mov->setDef(0, lval); + mov->setSrc(0, cst->getSrc(s)); + cst->setSrc(s, mov->getDef(0)); + cst->bb->insertBefore(cst, mov); + + cst->getDef(0)->asLValue()->noSpill = 1; // doesn't help + + if (cst->op == OP_UNION) + mov->setPredicate(defi->cc, defi->getPredicate()); + } + } + } + + return true; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp new file mode 100644 index 00000000000..2e432349f24 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp @@ -0,0 +1,552 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_target.h" + +namespace nv50_ir { + +// Converts nv50 IR generated from TGSI to SSA form. + +// DominatorTree implements an algorithm for finding immediate dominators, +// as described by T. Lengauer & R. Tarjan. +class DominatorTree : public Graph +{ +public: + DominatorTree(Graph *cfg); + ~DominatorTree() { } + + bool dominates(BasicBlock *, BasicBlock *); + + void findDominanceFrontiers(); + +private: + void build(); + void buildDFS(Node *); + + void squash(int); + inline void link(int, int); + inline int eval(int); + + void debugPrint(); + + Graph *cfg; + + Node **vert; + int *data; + const int count; + + #define SEMI(i) (data[(i) + 0 * count]) + #define ANCESTOR(i) (data[(i) + 1 * count]) + #define PARENT(i) (data[(i) + 2 * count]) + #define LABEL(i) (data[(i) + 3 * count]) + #define DOM(i) (data[(i) + 4 * count]) +}; + +void DominatorTree::debugPrint() +{ + for (int i = 0; i < count; ++i) { + INFO("SEMI(%i) = %i\n", i, SEMI(i)); + INFO("ANCESTOR(%i) = %i\n", i, ANCESTOR(i)); + INFO("PARENT(%i) = %i\n", i, PARENT(i)); + INFO("LABEL(%i) = %i\n", i, LABEL(i)); + INFO("DOM(%i) = %i\n", i, DOM(i)); + } +} + +DominatorTree::DominatorTree(Graph *cfgraph) : cfg(cfgraph), + count(cfg->getSize()) +{ + int i = 0; + + vert = new Node * [count]; + data = new int[5 * count]; + + for (IteratorRef it = cfg->iteratorDFS(true); !it->end(); it->next(), ++i) { + vert[i] = reinterpret_cast<Node *>(it->get()); + vert[i]->tag = i; + LABEL(i) = i; + SEMI(i) = ANCESTOR(i) = -1; + } + + build(); + + delete[] vert; + delete[] data; +} + +void DominatorTree::buildDFS(Graph::Node *node) +{ + SEMI(node->tag) = node->tag; + + for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) { + if (SEMI(ei.getNode()->tag) < 0) { + buildDFS(ei.getNode()); + PARENT(ei.getNode()->tag) = node->tag; + } + } +} + +void DominatorTree::squash(int v) +{ + if (ANCESTOR(ANCESTOR(v)) >= 0) { + squash(ANCESTOR(v)); + + if (SEMI(LABEL(ANCESTOR(v))) < SEMI(LABEL(v))) + LABEL(v) = LABEL(ANCESTOR(v)); + ANCESTOR(v) = ANCESTOR(ANCESTOR(v)); + } +} + +int DominatorTree::eval(int v) +{ + if (ANCESTOR(v) < 0) + return v; + squash(v); + return LABEL(v); +} + +void DominatorTree::link(int v, int w) +{ + ANCESTOR(w) = v; +} + +void DominatorTree::build() +{ + DLList *bucket = new DLList[count]; + Node *nv, *nw; + int p, u, v, w; + + buildDFS(cfg->getRoot()); + + for (w = count - 1; w >= 1; --w) { + nw = vert[w]; + assert(nw->tag == w); + for (Graph::EdgeIterator ei = nw->incident(); !ei.end(); ei.next()) { + nv = ei.getNode(); + v = nv->tag; + u = eval(v); + if (SEMI(u) < SEMI(w)) + SEMI(w) = SEMI(u); + } + p = PARENT(w); + bucket[SEMI(w)].insert(nw); + link(p, w); + + for (DLList::Iterator it = bucket[p].iterator(); !it.end(); it.erase()) { + v = reinterpret_cast<Node *>(it.get())->tag; + u = eval(v); + DOM(v) = (SEMI(u) < SEMI(v)) ? u : p; + } + } + for (w = 1; w < count; ++w) { + if (DOM(w) != SEMI(w)) + DOM(w) = DOM(DOM(w)); + } + DOM(0) = 0; + + insert(&BasicBlock::get(cfg->getRoot())->dom); + do { + p = 0; + for (v = 1; v < count; ++v) { + nw = &BasicBlock::get(vert[DOM(v)])->dom;; + nv = &BasicBlock::get(vert[v])->dom; + if (nw->getGraph() && !nv->getGraph()) { + ++p; + nw->attach(nv, Graph::Edge::TREE); + } + } + } while (p); + + delete[] bucket; +} + +#undef SEMI +#undef ANCESTOR +#undef PARENT +#undef LABEL +#undef DOM + +void DominatorTree::findDominanceFrontiers() +{ + BasicBlock *bb; + + for (IteratorRef dtIt = iteratorDFS(false); !dtIt->end(); dtIt->next()) { + EdgeIterator succIt, chldIt; + + bb = BasicBlock::get(reinterpret_cast<Node *>(dtIt->get())); + bb->getDF().clear(); + + for (succIt = bb->cfg.outgoing(); !succIt.end(); succIt.next()) { + BasicBlock *dfLocal = BasicBlock::get(succIt.getNode()); + if (dfLocal->idom() != bb) + bb->getDF().insert(dfLocal); + } + + for (chldIt = bb->dom.outgoing(); !chldIt.end(); chldIt.next()) { + BasicBlock *cb = BasicBlock::get(chldIt.getNode()); + + DLList::Iterator dfIt = cb->getDF().iterator(); + for (; !dfIt.end(); dfIt.next()) { + BasicBlock *dfUp = BasicBlock::get(dfIt); + if (dfUp->idom() != bb) + bb->getDF().insert(dfUp); + } + } + } +} + +// liveIn(bb) = usedBeforeAssigned(bb) U (liveOut(bb) - assigned(bb)) +void +Function::buildLiveSetsPreSSA(BasicBlock *bb, const int seq) +{ + Function *f = bb->getFunction(); + BitSet usedBeforeAssigned(allLValues.getSize(), true); + BitSet assigned(allLValues.getSize(), true); + + bb->liveSet.allocate(allLValues.getSize(), false); + + int n = 0; + for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { + BasicBlock *out = BasicBlock::get(ei.getNode()); + if (out == bb) + continue; + if (out->cfg.visit(seq)) + buildLiveSetsPreSSA(out, seq); + if (!n++) + bb->liveSet = out->liveSet; + else + bb->liveSet |= out->liveSet; + } + if (!n && !bb->liveSet.marker) + bb->liveSet.fill(0); + bb->liveSet.marker = true; + + for (Instruction *i = bb->getEntry(); i; i = i->next) { + for (int s = 0; i->srcExists(s); ++s) + if (i->getSrc(s)->asLValue() && !assigned.test(i->getSrc(s)->id)) + usedBeforeAssigned.set(i->getSrc(s)->id); + for (int d = 0; i->defExists(d); ++d) + assigned.set(i->getDef(d)->id); + } + + if (bb == BasicBlock::get(f->cfgExit)) { + for (std::deque<ValueRef>::iterator it = f->outs.begin(); + it != f->outs.end(); ++it) { + if (!assigned.test(it->get()->id)) + usedBeforeAssigned.set(it->get()->id); + } + } + + bb->liveSet.andNot(assigned); + bb->liveSet |= usedBeforeAssigned; +} + +void +Function::buildDefSetsPreSSA(BasicBlock *bb, const int seq) +{ + bb->defSet.allocate(allLValues.getSize(), !bb->liveSet.marker); + bb->liveSet.marker = true; + + for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { + BasicBlock *in = BasicBlock::get(ei.getNode()); + + if (in->cfg.visit(seq)) + buildDefSetsPreSSA(in, seq); + + bb->defSet |= in->defSet; + } + + for (Instruction *i = bb->getEntry(); i; i = i->next) { + for (int d = 0; i->defExists(d); ++d) + bb->defSet.set(i->getDef(d)->id); + } +} + +class RenamePass +{ +public: + RenamePass(Function *); + ~RenamePass(); + + bool run(); + void search(BasicBlock *); + + inline LValue *getStackTop(Value *); + + LValue *mkUndefined(Value *); + +private: + Stack *stack; + Function *func; + Program *prog; +}; + +bool +Program::convertToSSA() +{ + for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) { + Function *fn = reinterpret_cast<Function *>(fi.get()); + if (!fn->convertToSSA()) + return false; + } + return true; +} + +// XXX: add edge from entry to exit ? + +// Efficiently Computing Static Single Assignment Form and +// the Control Dependence Graph, +// R. Cytron, J. Ferrante, B. K. Rosen, M. N. Wegman, F. K. Zadeck +bool +Function::convertToSSA() +{ + // 0. calculate live in variables (for pruned SSA) + buildLiveSets(); + + // 1. create the dominator tree + domTree = new DominatorTree(&cfg); + reinterpret_cast<DominatorTree *>(domTree)->findDominanceFrontiers(); + + // 2. insert PHI functions + DLList workList; + LValue *lval; + BasicBlock *bb; + int var; + int iterCount = 0; + int *hasAlready = new int[allBBlocks.getSize() * 2]; + int *work = &hasAlready[allBBlocks.getSize()]; + + memset(hasAlready, 0, allBBlocks.getSize() * 2 * sizeof(int)); + + // for each variable + for (var = 0; var < allLValues.getSize(); ++var) { + if (!allLValues.get(var)) + continue; + lval = reinterpret_cast<Value *>(allLValues.get(var))->asLValue(); + if (!lval || lval->defs.empty()) + continue; + ++iterCount; + + // TODO: don't add phi functions for values that aren't used outside + // the BB they're defined in + + // gather blocks with assignments to lval in workList + for (Value::DefIterator d = lval->defs.begin(); + d != lval->defs.end(); ++d) { + bb = ((*d)->getInsn() ? (*d)->getInsn()->bb : NULL); + if (!bb) + continue; // instruction likely been removed but not XXX deleted + + if (work[bb->getId()] == iterCount) + continue; + work[bb->getId()] = iterCount; + workList.insert(bb); + } + + // for each block in workList, insert a phi for lval in the block's + // dominance frontier (if we haven't already done so) + for (DLList::Iterator wI = workList.iterator(); !wI.end(); wI.erase()) { + bb = BasicBlock::get(wI); + + DLList::Iterator dfIter = bb->getDF().iterator(); + for (; !dfIter.end(); dfIter.next()) { + Instruction *phi; + BasicBlock *dfBB = BasicBlock::get(dfIter); + + if (hasAlready[dfBB->getId()] >= iterCount) + continue; + hasAlready[dfBB->getId()] = iterCount; + + // pruned SSA: don't need a phi if the value is not live-in + if (!dfBB->liveSet.test(lval->id)) + continue; + + phi = new_Instruction(this, OP_PHI, typeOfSize(lval->reg.size)); + dfBB->insertTail(phi); + + phi->setDef(0, lval); + for (int s = 0; s < dfBB->cfg.incidentCount(); ++s) + phi->setSrc(s, lval); + + if (work[dfBB->getId()] < iterCount) { + work[dfBB->getId()] = iterCount; + wI.insert(dfBB); + } + } + } + } + delete[] hasAlready; + + RenamePass rename(this); + return rename.run(); +} + +RenamePass::RenamePass(Function *fn) : func(fn), prog(fn->getProgram()) +{ + stack = new Stack[func->allLValues.getSize()]; +} + +RenamePass::~RenamePass() +{ + if (stack) + delete[] stack; +} + +LValue * +RenamePass::getStackTop(Value *val) +{ + if (!stack[val->id].getSize()) + return 0; + return reinterpret_cast<LValue *>(stack[val->id].peek().u.p); +} + +LValue * +RenamePass::mkUndefined(Value *val) +{ + LValue *lval = val->asLValue(); + assert(lval); + LValue *ud = new_LValue(func, lval); + Instruction *nop = new_Instruction(func, OP_NOP, typeOfSize(lval->reg.size)); + nop->setDef(0, ud); + BasicBlock::get(func->cfg.getRoot())->insertHead(nop); + return ud; +} + +bool RenamePass::run() +{ + if (!stack) + return false; + search(BasicBlock::get(func->domTree->getRoot())); + + return true; +} + +// Go through BBs in dominance order, create new values for each definition, +// and replace all sources with their current new values. +// +// NOTE: The values generated for function inputs/outputs have no connection +// to their corresponding outputs/inputs in other functions. Only allocation +// of physical registers will establish this connection. +// +void RenamePass::search(BasicBlock *bb) +{ + LValue *lval, *ssa; + int d, s; + const Target *targ = prog->getTarget(); + + // Put current definitions for function inputs values on the stack. + // They can be used before any redefinitions are pushed. + if (bb == BasicBlock::get(func->cfg.getRoot())) { + for (std::deque<ValueDef>::iterator it = func->ins.begin(); + it != func->ins.end(); ++it) { + lval = it->get()->asLValue(); + assert(lval); + + ssa = new_LValue(func, targ->nativeFile(lval->reg.file)); + ssa->reg.size = lval->reg.size; + ssa->reg.data.id = lval->reg.data.id; + + it->setSSA(ssa); + stack[lval->id].push(ssa); + } + } + + for (Instruction *stmt = bb->getFirst(); stmt; stmt = stmt->next) { + // PHI sources get definitions from the passes through the incident BBs, + // so skip them here. + if (stmt->op != OP_PHI) { + for (s = 0; stmt->srcExists(s); ++s) { + lval = stmt->getSrc(s)->asLValue(); + if (!lval) + continue; + // Values on the stack created in previously visited blocks, and + // function inputs, will be valid because they dominate this one. + lval = getStackTop(lval); + if (!lval) + lval = mkUndefined(stmt->getSrc(s)); + stmt->setSrc(s, lval); + } + } + for (d = 0; stmt->defExists(d); ++d) { + lval = stmt->def(d).get()->asLValue(); + assert(lval); + stmt->def(d).setSSA( + new_LValue(func, targ->nativeFile(lval->reg.file))); + stmt->def(d).get()->reg.size = lval->reg.size; + stmt->def(d).get()->reg.data.id = lval->reg.data.id; + stack[lval->id].push(stmt->def(d).get()); + } + } + + // Update sources of PHI ops corresponding to this BB in outgoing BBs. + for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { + Instruction *phi; + int p = 0; + BasicBlock *sb = BasicBlock::get(ei.getNode()); + + // which predecessor of sb is bb ? + for (Graph::EdgeIterator ei = sb->cfg.incident(); !ei.end(); ei.next()) { + if (ei.getNode() == &bb->cfg) + break; + ++p; + } + assert(p < sb->cfg.incidentCount()); + + for (phi = sb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) { + lval = getStackTop(phi->getSrc(p)); + if (!lval) + lval = mkUndefined(phi->getSrc(p)); + phi->setSrc(p, lval); + } + } + + // Visit the BBs we dominate. + for (Graph::EdgeIterator ei = bb->dom.outgoing(); !ei.end(); ei.next()) + search(BasicBlock::get(ei.getNode())); + + // Update function outputs to the last definitions of their pre-SSA values. + // I hope they're unique, i.e. that we get PHIs for all of them ... + if (bb == BasicBlock::get(func->cfgExit)) { + for (std::deque<ValueRef>::iterator it = func->outs.begin(); + it != func->outs.end(); ++it) { + lval = it->get()->asLValue(); + if (!lval) + continue; + lval = getStackTop(lval); + if (!lval) + lval = mkUndefined(it->get()); + it->set(lval); + } + } + + // Pop the values we created in this block from the stack because we will + // return to blocks that we do not dominate. + for (Instruction *stmt = bb->getFirst(); stmt; stmt = stmt->next) { + if (stmt->op == OP_NOP) + continue; + for (d = 0; stmt->defExists(d); ++d) + stack[stmt->def(d).preSSA()->id].pop(); + } +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp new file mode 100644 index 00000000000..443acfcd812 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -0,0 +1,469 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_target.h" + +namespace nv50_ir { + +const uint8_t Target::operationSrcNr[OP_LAST + 1] = +{ + 0, 0, // NOP, PHI + 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT + 1, 1, 2, // MOV, LOAD, STORE + 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD + 1, 1, 1, // ABS, NEG, NOT + 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR + 2, 2, 1, // MAX, MIN, SAT + 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT + 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT + 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2 + 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW + 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK, + 0, 0, 0, // PRERET,CONT,BREAK + 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR + 1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP + 1, 1, // EMIT, RESTART + 1, 1, 1, // TEX, TXB, TXL, + 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TEXCSAA, TEXPREP + 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA + 3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP + 0, // TEXBAR + 1, 1, // DFDX, DFDY + 1, 2, 2, 0, 0, // RDSV, WRSV, QUADOP, QUADON, QUADPOP + 2, 3, 2, 3, // POPCNT, INSBF, EXTBF, PERMT + 2, 2, // ATOM, BAR + 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, + 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL + 0 +}; + +const OpClass Target::operationClass[OP_LAST + 1] = +{ + // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT + OPCLASS_OTHER, + OPCLASS_PSEUDO, + OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, + // MOV; LOAD; STORE + OPCLASS_MOVE, + OPCLASS_LOAD, + OPCLASS_STORE, + // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, + OPCLASS_ARITH, OPCLASS_ARITH, + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, + // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR + OPCLASS_CONVERT, OPCLASS_CONVERT, + OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, + OPCLASS_SHIFT, OPCLASS_SHIFT, + // MAX, MIN + OPCLASS_COMPARE, OPCLASS_COMPARE, + // SAT, CEIL, FLOOR, TRUNC; CVT + OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, + OPCLASS_CONVERT, + // SET(AND,OR,XOR); SELP, SLCT + OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, + OPCLASS_COMPARE, OPCLASS_COMPARE, + // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW + OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, + OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, + OPCLASS_SFU, OPCLASS_SFU, + // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN + OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, + OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, + OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, + // DISCARD, EXIT + OPCLASS_FLOW, OPCLASS_FLOW, + // MEMBAR + OPCLASS_CONTROL, + // VFETCH, PFETCH, EXPORT + OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE, + // LINTERP, PINTERP + OPCLASS_SFU, OPCLASS_SFU, + // EMIT, RESTART + OPCLASS_CONTROL, OPCLASS_CONTROL, + // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA; TEXPREP + OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, + OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, + OPCLASS_TEXTURE, + // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA + OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE, + OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE, + // SUBFM, SUCLAMP, SUEAU, MADSP + OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH, + // TEXBAR + OPCLASS_OTHER, + // DFDX, DFDY, RDSV, WRSV; QUADOP, QUADON, QUADPOP + OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, + OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, + // POPCNT, INSBF, EXTBF, PERMT + OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, + // ATOM, BAR + OPCLASS_ATOMIC, OPCLASS_CONTROL, + // VADD, VAVG, VMIN, VMAX + OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, + // VSAD, VSET, VSHR, VSHL + OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, + // VSEL, CCTL + OPCLASS_VECTOR, OPCLASS_CONTROL, + OPCLASS_PSEUDO // LAST +}; + + +extern Target *getTargetNVC0(unsigned int chipset); +extern Target *getTargetNV50(unsigned int chipset); + +Target *Target::create(unsigned int chipset) +{ + switch (chipset & 0xf0) { + case 0xc0: + case 0xd0: + case 0xe0: + case NVISA_GK110_CHIPSET: + return getTargetNVC0(chipset); + case 0x50: + case 0x80: + case 0x90: + case 0xa0: + return getTargetNV50(chipset); + default: + ERROR("unsupported target: NV%x\n", chipset); + return 0; + } +} + +void Target::destroy(Target *targ) +{ + delete targ; +} + +CodeEmitter::CodeEmitter(const Target *target) : targ(target) +{ +} + +void +CodeEmitter::setCodeLocation(void *ptr, uint32_t size) +{ + code = reinterpret_cast<uint32_t *>(ptr); + codeSize = 0; + codeSizeLimit = size; +} + +void +CodeEmitter::printBinary() const +{ + uint32_t *bin = code - codeSize / 4; + INFO("program binary (%u bytes)", codeSize); + for (unsigned int pos = 0; pos < codeSize / 4; ++pos) { + if ((pos % 8) == 0) + INFO("\n"); + INFO("%08x ", bin[pos]); + } + INFO("\n"); +} + +static inline uint32_t sizeToBundlesNVE4(uint32_t size) +{ + return (size + 55) / 56; +} + +void +CodeEmitter::prepareEmission(Program *prog) +{ + for (ArrayList::Iterator fi = prog->allFuncs.iterator(); + !fi.end(); fi.next()) { + Function *func = reinterpret_cast<Function *>(fi.get()); + func->binPos = prog->binSize; + prepareEmission(func); + + // adjust sizes & positions for schedulding info: + if (prog->getTarget()->hasSWSched) { + uint32_t adjPos = func->binPos; + BasicBlock *bb = NULL; + for (int i = 0; i < func->bbCount; ++i) { + bb = func->bbArray[i]; + int32_t adjSize = bb->binSize; + if (adjPos % 64) { + adjSize -= 64 - adjPos % 64; + if (adjSize < 0) + adjSize = 0; + } + adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8; + bb->binPos = adjPos; + bb->binSize = adjSize; + adjPos += adjSize; + } + if (bb) + func->binSize = adjPos - func->binPos; + } + + prog->binSize += func->binSize; + } +} + +void +CodeEmitter::prepareEmission(Function *func) +{ + func->bbCount = 0; + func->bbArray = new BasicBlock * [func->cfg.getSize()]; + + BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos; + + for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next()) + prepareEmission(BasicBlock::get(*it)); +} + +void +CodeEmitter::prepareEmission(BasicBlock *bb) +{ + Instruction *i, *next; + Function *func = bb->getFunction(); + int j; + unsigned int nShort; + + for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j); + + for (; j >= 0; --j) { + BasicBlock *in = func->bbArray[j]; + Instruction *exit = in->getExit(); + + if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) { + in->binSize -= 8; + func->binSize -= 8; + + for (++j; j < func->bbCount; ++j) + func->bbArray[j]->binPos -= 8; + + in->remove(exit); + } + bb->binPos = in->binPos + in->binSize; + if (in->binSize) // no more no-op branches to bb + break; + } + func->bbArray[func->bbCount++] = bb; + + if (!bb->getExit()) + return; + + // determine encoding size, try to group short instructions + nShort = 0; + for (i = bb->getEntry(); i; i = next) { + next = i->next; + + if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) { + bb->remove(i); + continue; + } + + i->encSize = getMinEncodingSize(i); + if (next && i->encSize < 8) + ++nShort; + else + if ((nShort & 1) && next && getMinEncodingSize(next) == 4) { + if (i->isCommutationLegal(i->next)) { + bb->permuteAdjacent(i, next); + next->encSize = 4; + next = i; + i = i->prev; + ++nShort; + } else + if (i->isCommutationLegal(i->prev) && next->next) { + bb->permuteAdjacent(i->prev, i); + next->encSize = 4; + next = next->next; + bb->binSize += 4; + ++nShort; + } else { + i->encSize = 8; + i->prev->encSize = 8; + bb->binSize += 4; + nShort = 0; + } + } else { + i->encSize = 8; + if (nShort & 1) { + i->prev->encSize = 8; + bb->binSize += 4; + } + nShort = 0; + } + bb->binSize += i->encSize; + } + + if (bb->getExit()->encSize == 4) { + assert(nShort); + bb->getExit()->encSize = 8; + bb->binSize += 4; + + if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) { + bb->binSize += 8; + bb->getExit()->prev->encSize = 8; + } + } + assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8)); + + func->binSize += bb->binSize; +} + +void +Program::emitSymbolTable(struct nv50_ir_prog_info *info) +{ + unsigned int n = 0, nMax = allFuncs.getSize(); + + info->bin.syms = + (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms)); + + for (ArrayList::Iterator fi = allFuncs.iterator(); + !fi.end(); + fi.next(), ++n) { + Function *f = (Function *)fi.get(); + assert(n < nMax); + + info->bin.syms[n].label = f->getLabel(); + info->bin.syms[n].offset = f->binPos; + } + + info->bin.numSyms = n; +} + +bool +Program::emitBinary(struct nv50_ir_prog_info *info) +{ + CodeEmitter *emit = target->getCodeEmitter(progType); + + emit->prepareEmission(this); + + if (dbgFlags & NV50_IR_DEBUG_BASIC) + this->print(); + + if (!binSize) { + code = NULL; + return false; + } + code = reinterpret_cast<uint32_t *>(MALLOC(binSize)); + if (!code) + return false; + emit->setCodeLocation(code, binSize); + + for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) { + Function *fn = reinterpret_cast<Function *>(fi.get()); + + assert(emit->getCodeSize() == fn->binPos); + + for (int b = 0; b < fn->bbCount; ++b) + for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) + emit->emitInstruction(i); + } + info->bin.relocData = emit->getRelocInfo(); + + emitSymbolTable(info); + + // the nvc0 driver will print the binary iself together with the header + if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0) + emit->printBinary(); + + delete emit; + return true; +} + +#define RELOC_ALLOC_INCREMENT 8 + +bool +CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m, + int s) +{ + unsigned int n = relocInfo ? relocInfo->count : 0; + + if (!(n % RELOC_ALLOC_INCREMENT)) { + size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry); + relocInfo = reinterpret_cast<RelocInfo *>( + REALLOC(relocInfo, n ? size : 0, + size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry))); + if (!relocInfo) + return false; + if (n == 0) + memset(relocInfo, 0, sizeof(RelocInfo)); + } + ++relocInfo->count; + + relocInfo->entry[n].data = data; + relocInfo->entry[n].mask = m; + relocInfo->entry[n].offset = codeSize + w * 4; + relocInfo->entry[n].bitPos = s; + relocInfo->entry[n].type = ty; + + return true; +} + +void +RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const +{ + uint32_t value = 0; + + switch (type) { + case TYPE_CODE: value = info->codePos; break; + case TYPE_BUILTIN: value = info->libPos; break; + case TYPE_DATA: value = info->dataPos; break; + default: + assert(0); + break; + } + value += data; + value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos); + + binary[offset / 4] &= ~mask; + binary[offset / 4] |= value & mask; +} + +} // namespace nv50_ir + + +#include "codegen/nv50_ir_driver.h" + +extern "C" { + +void +nv50_ir_relocate_code(void *relocData, uint32_t *code, + uint32_t codePos, + uint32_t libPos, + uint32_t dataPos) +{ + nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData); + + info->codePos = codePos; + info->libPos = libPos; + info->dataPos = dataPos; + + for (unsigned int i = 0; i < info->count; ++i) + info->entry[i].apply(code, info); +} + +void +nv50_ir_get_target_library(uint32_t chipset, + const uint32_t **code, uint32_t *size) +{ + nv50_ir::Target *targ = nv50_ir::Target::create(chipset); + targ->getBuiltinCode(code, size); + nv50_ir::Target::destroy(targ); +} + +} diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h new file mode 100644 index 00000000000..9913ca13ddd --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h @@ -0,0 +1,235 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NV50_IR_TARGET_H__ +#define __NV50_IR_TARGET_H__ + +#include "codegen/nv50_ir.h" + +namespace nv50_ir { + +struct RelocInfo; + +struct RelocEntry +{ + enum Type + { + TYPE_CODE, + TYPE_BUILTIN, + TYPE_DATA + }; + + uint32_t data; + uint32_t mask; + uint32_t offset; + int8_t bitPos; + Type type; + + inline void apply(uint32_t *binary, const RelocInfo *info) const; +}; + +struct RelocInfo +{ + uint32_t codePos; + uint32_t libPos; + uint32_t dataPos; + + uint32_t count; + + RelocEntry entry[0]; +}; + +class CodeEmitter +{ +public: + CodeEmitter(const Target *); + virtual ~CodeEmitter() { } + + // returns whether the instruction was encodable and written + virtual bool emitInstruction(Instruction *) = 0; + + virtual uint32_t getMinEncodingSize(const Instruction *) const = 0; + + void setCodeLocation(void *, uint32_t size); + inline void *getCodeLocation() const { return code; } + inline uint32_t getCodeSize() const { return codeSize; } + + bool addReloc(RelocEntry::Type, int w, uint32_t data, uint32_t m, + int s); + + inline void *getRelocInfo() const { return relocInfo; } + + void prepareEmission(Program *); + virtual void prepareEmission(Function *); + virtual void prepareEmission(BasicBlock *); + + void printBinary() const; + +protected: + const Target *targ; + + uint32_t *code; + uint32_t codeSize; + uint32_t codeSizeLimit; + + RelocInfo *relocInfo; +}; + + +enum OpClass +{ + OPCLASS_MOVE = 0, + OPCLASS_LOAD = 1, + OPCLASS_STORE = 2, + OPCLASS_ARITH = 3, + OPCLASS_SHIFT = 4, + OPCLASS_SFU = 5, + OPCLASS_LOGIC = 6, + OPCLASS_COMPARE = 7, + OPCLASS_CONVERT = 8, + OPCLASS_ATOMIC = 9, + OPCLASS_TEXTURE = 10, + OPCLASS_SURFACE = 11, + OPCLASS_FLOW = 12, + OPCLASS_PSEUDO = 14, + OPCLASS_VECTOR = 15, + OPCLASS_BITFIELD = 16, + OPCLASS_CONTROL = 17, + OPCLASS_OTHER = 18 +}; + +class Target +{ +public: + Target(bool j, bool s) : joinAnterior(j), hasSWSched(s) { } + virtual ~Target() { } + + static Target *create(uint32_t chipset); + static void destroy(Target *); + + // 0x50 and 0x84 to 0xaf for nv50 + // 0xc0 to 0xdf for nvc0 + inline uint32_t getChipset() const { return chipset; } + + virtual CodeEmitter *getCodeEmitter(Program::Type) = 0; + + // Drivers should upload this so we can use it from all programs. + // The address chosen is supplied to the relocation routine. + virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const = 0; + + virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) { } + + virtual bool runLegalizePass(Program *, CGStage stage) const = 0; + +public: + struct OpInfo + { + OpInfo *variants; + operation op; + uint16_t srcTypes; + uint16_t dstTypes; + uint32_t immdBits; + uint8_t srcNr; + uint8_t srcMods[3]; + uint8_t dstMods; + uint8_t srcFiles[3]; + uint8_t dstFiles; + unsigned int minEncSize : 4; + unsigned int vector : 1; + unsigned int predicate : 1; + unsigned int commutative : 1; + unsigned int pseudo : 1; + unsigned int flow : 1; + unsigned int hasDest : 1; + unsigned int terminator : 1; + }; + + inline const OpInfo& getOpInfo(const Instruction *) const; + inline const OpInfo& getOpInfo(const operation) const; + + inline DataFile nativeFile(DataFile f) const; + + virtual bool insnCanLoad(const Instruction *insn, int s, + const Instruction *ld) const = 0; + virtual bool isOpSupported(operation, DataType) const = 0; + virtual bool isAccessSupported(DataFile, DataType) const = 0; + virtual bool isModSupported(const Instruction *, + int s, Modifier) const = 0; + virtual bool isSatSupported(const Instruction *) const = 0; + virtual bool isPostMultiplySupported(operation op, float f, + int& e) const { return false; } + virtual bool mayPredicate(const Instruction *, + const Value *) const = 0; + + // whether @insn can be issued together with @next (order matters) + virtual bool canDualIssue(const Instruction *insn, + const Instruction *next) const { return false; } + virtual int getLatency(const Instruction *) const { return 1; } + virtual int getThroughput(const Instruction *) const { return 1; } + + virtual unsigned int getFileSize(DataFile) const = 0; + virtual unsigned int getFileUnit(DataFile) const = 0; + + virtual uint32_t getSVAddress(DataFile, const Symbol *) const = 0; + +public: + const bool joinAnterior; // true if join is executed before the op + const bool hasSWSched; // true if code should provide scheduling data + + static const uint8_t operationSrcNr[OP_LAST + 1]; + static const OpClass operationClass[OP_LAST + 1]; + + static inline uint8_t getOpSrcNr(operation op) + { + return operationSrcNr[op]; + } + static inline OpClass getOpClass(operation op) + { + return operationClass[op]; + } + +protected: + uint32_t chipset; + + DataFile nativeFileMap[DATA_FILE_COUNT]; + + OpInfo opInfo[OP_LAST + 1]; +}; + +const Target::OpInfo& Target::getOpInfo(const Instruction *insn) const +{ + return opInfo[MIN2(insn->op, OP_LAST)]; +} + +const Target::OpInfo& Target::getOpInfo(const operation op) const +{ + return opInfo[op]; +} + +inline DataFile Target::nativeFile(DataFile f) const +{ + return nativeFileMap[f]; +} + +} // namespace nv50_ir + +#endif // __NV50_IR_TARGET_H__ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp new file mode 100644 index 00000000000..ade9be0300c --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -0,0 +1,552 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir_target_nv50.h" + +namespace nv50_ir { + +Target *getTargetNV50(unsigned int chipset) +{ + return new TargetNV50(chipset); +} + +TargetNV50::TargetNV50(unsigned int card) : Target(true, false) +{ + chipset = card; + + wposMask = 0; + for (unsigned int i = 0; i <= SV_LAST; ++i) + sysvalLocation[i] = ~0; + + initOpInfo(); +} + +#if 0 +// BULTINS / LIBRARY FUNCTIONS: + +// TODO +static const uint32_t nvc0_builtin_code[] = +{ +}; + +static const uint16_t nvc0_builtin_offsets[NV50_BUILTIN_COUNT] = +{ +}; +#endif + +void +TargetNV50::getBuiltinCode(const uint32_t **code, uint32_t *size) const +{ + *code = NULL; + *size = 0; +} + +uint32_t +TargetNV50::getBuiltinOffset(int builtin) const +{ + return 0; +} + +struct opProperties +{ + operation op; + unsigned int mNeg : 4; + unsigned int mAbs : 4; + unsigned int mNot : 4; + unsigned int mSat : 4; + unsigned int fConst : 3; + unsigned int fShared : 3; + unsigned int fAttrib : 3; + unsigned int fImm : 3; +}; + +static const struct opProperties _initProps[] = +{ + // neg abs not sat c[] s[], a[], imm + { OP_ADD, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 }, + { OP_SUB, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 }, + { OP_MUL, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 }, + { OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, + { OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, + { OP_MAD, 0x7, 0x0, 0x0, 0x0, 0x6, 0x1, 0x1, 0x0 }, // special constraint + { OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0 }, + { OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0 }, + { OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x0, 0x1, 0x1, 0x0 }, + { OP_AND, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 }, + { OP_OR, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 }, + { OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 }, + { OP_SHL, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2 }, + { OP_SHR, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2 }, + { OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, + { OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, + { OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, + { OP_LG2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, + { OP_RCP, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, + { OP_RSQ, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, + { OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, + { OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, +}; + +void TargetNV50::initOpInfo() +{ + unsigned int i, j; + + static const uint32_t commutative[(OP_LAST + 31) / 32] = + { + // ADD,MAD,MUL,AND,OR,XOR,MAX,MIN + 0x0670ca00, 0x0000003f, 0x00000000, 0x00000000 + }; + static const uint32_t shortForm[(OP_LAST + 31) / 32] = + { + // MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF + 0x00010e40, 0x00000040, 0x00000498, 0x00000000 + }; + static const operation noDestList[] = + { + OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT, + OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET, + OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART, + OP_QUADON, OP_QUADPOP, OP_TEXBAR, OP_SUSTB, OP_SUSTP, OP_SUREDP, + OP_SUREDB, OP_BAR + }; + static const operation noPredList[] = + { + OP_CALL, OP_PREBREAK, OP_PRERET, OP_QUADON, OP_QUADPOP, OP_JOINAT + }; + + for (i = 0; i < DATA_FILE_COUNT; ++i) + nativeFileMap[i] = (DataFile)i; + nativeFileMap[FILE_PREDICATE] = FILE_FLAGS; + + for (i = 0; i < OP_LAST; ++i) { + opInfo[i].variants = NULL; + opInfo[i].op = (operation)i; + opInfo[i].srcTypes = 1 << (int)TYPE_F32; + opInfo[i].dstTypes = 1 << (int)TYPE_F32; + opInfo[i].immdBits = 0xffffffff; + opInfo[i].srcNr = operationSrcNr[i]; + + for (j = 0; j < opInfo[i].srcNr; ++j) { + opInfo[i].srcMods[j] = 0; + opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR; + } + opInfo[i].dstMods = 0; + opInfo[i].dstFiles = 1 << (int)FILE_GPR; + + opInfo[i].hasDest = 1; + opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA); + opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1; + opInfo[i].pseudo = (i < OP_MOV); + opInfo[i].predicate = !opInfo[i].pseudo; + opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN); + opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8; + } + for (i = 0; i < sizeof(noDestList) / sizeof(noDestList[0]); ++i) + opInfo[noDestList[i]].hasDest = 0; + for (i = 0; i < sizeof(noPredList) / sizeof(noPredList[0]); ++i) + opInfo[noPredList[i]].predicate = 0; + + for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) { + const struct opProperties *prop = &_initProps[i]; + + for (int s = 0; s < 3; ++s) { + if (prop->mNeg & (1 << s)) + opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG; + if (prop->mAbs & (1 << s)) + opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS; + if (prop->mNot & (1 << s)) + opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT; + if (prop->fConst & (1 << s)) + opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST; + if (prop->fShared & (1 << s)) + opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_SHARED; + if (prop->fAttrib & (1 << s)) + opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_SHADER_INPUT; + if (prop->fImm & (1 << s)) + opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE; + } + if (prop->mSat & 8) + opInfo[prop->op].dstMods = NV50_IR_MOD_SAT; + } +} + +unsigned int +TargetNV50::getFileSize(DataFile file) const +{ + switch (file) { + case FILE_NULL: return 0; + case FILE_GPR: return 256; // in 16-bit units ** + case FILE_PREDICATE: return 0; + case FILE_FLAGS: return 4; + case FILE_ADDRESS: return 4; + case FILE_IMMEDIATE: return 0; + case FILE_MEMORY_CONST: return 65536; + case FILE_SHADER_INPUT: return 0x200; + case FILE_SHADER_OUTPUT: return 0x200; + case FILE_MEMORY_GLOBAL: return 0xffffffff; + case FILE_MEMORY_SHARED: return 16 << 10; + case FILE_MEMORY_LOCAL: return 48 << 10; + case FILE_SYSTEM_VALUE: return 16; + default: + assert(!"invalid file"); + return 0; + } + // ** only first 128 units encodable for 16-bit regs +} + +unsigned int +TargetNV50::getFileUnit(DataFile file) const +{ + if (file == FILE_GPR || file == FILE_ADDRESS) + return 1; + if (file == FILE_SYSTEM_VALUE) + return 2; + return 0; +} + +uint32_t +TargetNV50::getSVAddress(DataFile shaderFile, const Symbol *sym) const +{ + switch (sym->reg.data.sv.sv) { + case SV_FACE: + return 0x3fc; + case SV_POSITION: + { + uint32_t addr = sysvalLocation[sym->reg.data.sv.sv]; + for (int c = 0; c < sym->reg.data.sv.index; ++c) + if (wposMask & (1 << c)) + addr += 4; + return addr; + } + case SV_NCTAID: + return 0x8 + 2 * sym->reg.data.sv.index; + case SV_CTAID: + return 0xc + 2 * sym->reg.data.sv.index; + case SV_NTID: + return 0x2 + 2 * sym->reg.data.sv.index; + case SV_TID: + return 0; + default: + return sysvalLocation[sym->reg.data.sv.sv]; + } +} + +// long: rrr, arr, rcr, acr, rrc, arc, gcr, grr +// short: rr, ar, rc, gr +// immd: ri, gi +bool +TargetNV50::insnCanLoad(const Instruction *i, int s, + const Instruction *ld) const +{ + DataFile sf = ld->src(0).getFile(); + + if (sf == FILE_IMMEDIATE && (i->predSrc >= 0 || i->flagsDef >= 0)) + return false; + if (s >= opInfo[i->op].srcNr) + return false; + if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf))) + return false; + if (s == 2 && i->src(1).getFile() != FILE_GPR) + return false; + + // NOTE: don't rely on flagsDef + for (int d = 0; i->defExists(d); ++d) + if (i->def(d).getFile() == FILE_FLAGS) + return false; + + unsigned mode = 0; + + for (int z = 0; z < Target::operationSrcNr[i->op]; ++z) { + DataFile zf = (z == s) ? sf : i->src(z).getFile(); + switch (zf) { + case FILE_GPR: + break; + case FILE_MEMORY_SHARED: + case FILE_SHADER_INPUT: + mode |= 1 << (z * 2); + break; + case FILE_MEMORY_CONST: + mode |= 2 << (z * 2); + break; + case FILE_IMMEDIATE: + mode |= 3 << (z * 2); + default: + break; + } + } + + switch (mode) { + case 0x00: + case 0x01: + case 0x03: + case 0x08: + case 0x09: + case 0x0c: + case 0x20: + case 0x21: + break; + case 0x0d: + if (ld->bb->getProgram()->getType() != Program::TYPE_GEOMETRY) + return false; + default: + return false; + } + + uint8_t ldSize; + + if ((i->op == OP_MUL || i->op == OP_MAD) && !isFloatType(i->dType)) { + // 32-bit MUL will be split into 16-bit MULs + if (ld->src(0).isIndirect(0)) + return false; + if (sf == FILE_IMMEDIATE) + return false; + ldSize = 2; + } else { + ldSize = typeSizeof(ld->dType); + } + + if (sf == FILE_IMMEDIATE) + return true; + + + // Check if memory access is encodable: + + if (ldSize < 4 && sf == FILE_SHADER_INPUT) // no < 4-byte aligned a[] access + return false; + if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * ldSize)) + return false; + + if (ld->src(0).isIndirect(0)) { + for (int z = 0; i->srcExists(z); ++z) + if (i->src(z).isIndirect(0)) + return false; + + // s[] access only possible in CP, $aX always applies + if (sf == FILE_MEMORY_SHARED) + return true; + if (!ld->bb) // can't check type ... + return false; + Program::Type pt = ld->bb->getProgram()->getType(); + + // $aX applies to c[] only in VP, FP, GP if p[] is not accessed + if (pt == Program::TYPE_COMPUTE) + return false; + if (pt == Program::TYPE_GEOMETRY) { + if (sf == FILE_MEMORY_CONST) + return i->src(s).getFile() != FILE_SHADER_INPUT; + return sf == FILE_SHADER_INPUT; + } + return sf == FILE_MEMORY_CONST; + } + return true; +} + +bool +TargetNV50::isAccessSupported(DataFile file, DataType ty) const +{ + if (ty == TYPE_B96 || ty == TYPE_NONE) + return false; + if (typeSizeof(ty) > 4) + return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL); + return true; +} + +bool +TargetNV50::isOpSupported(operation op, DataType ty) const +{ + if (ty == TYPE_F64 && chipset < 0xa0) + return false; + + switch (op) { + case OP_PRERET: + return chipset >= 0xa0; + case OP_TXG: + return chipset >= 0xa3; + case OP_POW: + case OP_SQRT: + case OP_DIV: + case OP_MOD: + case OP_SET_AND: + case OP_SET_OR: + case OP_SET_XOR: + case OP_SLCT: + case OP_SELP: + case OP_POPCNT: + case OP_INSBF: + case OP_EXTBF: + case OP_EXIT: // want exit modifier instead (on NOP if required) + case OP_MEMBAR: + return false; + case OP_SAD: + return ty == TYPE_S32; + default: + return true; + } +} + +bool +TargetNV50::isModSupported(const Instruction *insn, int s, Modifier mod) const +{ + if (!isFloatType(insn->dType)) { + switch (insn->op) { + case OP_ABS: + case OP_NEG: + case OP_CVT: + case OP_CEIL: + case OP_FLOOR: + case OP_TRUNC: + case OP_AND: + case OP_OR: + case OP_XOR: + break; + case OP_ADD: + if (insn->src(s ? 0 : 1).mod.neg()) + return false; + break; + case OP_SUB: + if (s == 0) + return insn->src(1).mod.neg() ? false : true; + break; + case OP_SET: + if (insn->sType != TYPE_F32) + return false; + break; + default: + return false; + } + } + if (s > 3) + return false; + return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod; +} + +bool +TargetNV50::mayPredicate(const Instruction *insn, const Value *pred) const +{ + if (insn->getPredicate() || insn->flagsSrc >= 0) + return false; + for (int s = 0; insn->srcExists(s); ++s) + if (insn->src(s).getFile() == FILE_IMMEDIATE) + return false; + return opInfo[insn->op].predicate; +} + +bool +TargetNV50::isSatSupported(const Instruction *insn) const +{ + if (insn->op == OP_CVT) + return true; + if (insn->dType != TYPE_F32) + return false; + return opInfo[insn->op].dstMods & NV50_IR_MOD_SAT; +} + +int TargetNV50::getLatency(const Instruction *i) const +{ + // TODO: tune these values + if (i->op == OP_LOAD) { + switch (i->src(0).getFile()) { + case FILE_MEMORY_LOCAL: + case FILE_MEMORY_GLOBAL: + return 100; // really 400 to 800 + default: + return 22; + } + } + return 22; +} + +// These are "inverse" throughput values, i.e. the number of cycles required +// to issue a specific instruction for a full warp (32 threads). +// +// Assuming we have more than 1 warp in flight, a higher issue latency results +// in a lower result latency since the MP will have spent more time with other +// warps. +// This also helps to determine the number of cycles between instructions in +// a single warp. +// +int TargetNV50::getThroughput(const Instruction *i) const +{ + // TODO: tune these values + if (i->dType == TYPE_F32) { + switch (i->op) { + case OP_RCP: + case OP_RSQ: + case OP_LG2: + case OP_SIN: + case OP_COS: + case OP_PRESIN: + case OP_PREEX2: + return 16; + default: + return 4; + } + } else + if (i->dType == TYPE_U32 || i->dType == TYPE_S32) { + return 4; + } else + if (i->dType == TYPE_F64) { + return 32; + } else { + return 1; + } +} + +static void +recordLocation(uint16_t *locs, uint8_t *masks, + const struct nv50_ir_varying *var) +{ + uint16_t addr = var->slot[0] * 4; + + switch (var->sn) { + case TGSI_SEMANTIC_POSITION: locs[SV_POSITION] = addr; break; + case TGSI_SEMANTIC_INSTANCEID: locs[SV_INSTANCE_ID] = addr; break; + case TGSI_SEMANTIC_VERTEXID: locs[SV_VERTEX_ID] = addr; break; + case TGSI_SEMANTIC_PRIMID: locs[SV_PRIMITIVE_ID] = addr; break; + case NV50_SEMANTIC_LAYER: locs[SV_LAYER] = addr; break; + case NV50_SEMANTIC_VIEWPORTINDEX: locs[SV_VIEWPORT_INDEX] = addr; break; + default: + break; + } + if (var->sn == TGSI_SEMANTIC_POSITION && masks) + masks[0] = var->mask; +} + +void +TargetNV50::parseDriverInfo(const struct nv50_ir_prog_info *info) +{ + unsigned int i; + for (i = 0; i < info->numOutputs; ++i) + recordLocation(sysvalLocation, NULL, &info->out[i]); + for (i = 0; i < info->numInputs; ++i) + recordLocation(sysvalLocation, &wposMask, &info->in[i]); + for (i = 0; i < info->numSysVals; ++i) + recordLocation(sysvalLocation, NULL, &info->sv[i]); + + if (sysvalLocation[SV_POSITION] >= 0x200) { + // not assigned by driver, but we need it internally + wposMask = 0x8; + sysvalLocation[SV_POSITION] = 0; + } +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.h new file mode 100644 index 00000000000..0cbf180d048 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.h @@ -0,0 +1,72 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir_target.h" + +namespace nv50_ir { + +#define NVC0_BUILTIN_DIV_U32 0 +#define NVC0_BUILTIN_DIV_S32 1 +#define NVC0_BUILTIN_RCP_F64 2 +#define NVC0_BUILTIN_RSQ_F64 3 + +#define NVC0_BUILTIN_COUNT 4 + +class TargetNV50 : public Target +{ +public: + TargetNV50(unsigned int chipset); + + virtual CodeEmitter *getCodeEmitter(Program::Type); + + virtual bool runLegalizePass(Program *, CGStage stage) const; + + virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const; + + virtual void parseDriverInfo(const struct nv50_ir_prog_info *); + + virtual bool insnCanLoad(const Instruction *insn, int s, + const Instruction *ld) const; + virtual bool isOpSupported(operation, DataType) const; + virtual bool isAccessSupported(DataFile, DataType) const; + virtual bool isModSupported(const Instruction *, int s, Modifier) const; + virtual bool isSatSupported(const Instruction *) const; + virtual bool mayPredicate(const Instruction *, const Value *) const; + + virtual int getLatency(const Instruction *) const; + virtual int getThroughput(const Instruction *) const; + + virtual unsigned int getFileSize(DataFile) const; + virtual unsigned int getFileUnit(DataFile) const; + + virtual uint32_t getSVAddress(DataFile shaderFile, const Symbol *sv) const; + + uint32_t getBuiltinOffset(int builtin) const; + +private: + void initOpInfo(); + + uint16_t sysvalLocation[SV_LAST + 1]; + uint8_t wposMask; +}; + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp new file mode 100644 index 00000000000..47e9c558d35 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -0,0 +1,604 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir_target_nvc0.h" + +namespace nv50_ir { + +Target *getTargetNVC0(unsigned int chipset) +{ + return new TargetNVC0(chipset); +} + +TargetNVC0::TargetNVC0(unsigned int card) : Target(false, card >= 0xe4) +{ + chipset = card; + initOpInfo(); +} + +// BULTINS / LIBRARY FUNCTIONS: + +// lazyness -> will just hardcode everything for the time being + +#include "target_lib_nvc0.asm.h" +#include "target_lib_nve4.asm.h" +#include "target_lib_nvf0.asm.h" + +void +TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const +{ + switch (chipset & 0xf0) { + case 0xe0: + *code = (const uint32_t *)&nve4_builtin_code[0]; + *size = sizeof(nve4_builtin_code); + break; + case 0xf0: + *code = (const uint32_t *)&nvf0_builtin_code[0]; + *size = sizeof(nvf0_builtin_code); + break; + default: + *code = (const uint32_t *)&nvc0_builtin_code[0]; + *size = sizeof(nvc0_builtin_code); + break; + } +} + +uint32_t +TargetNVC0::getBuiltinOffset(int builtin) const +{ + assert(builtin < NVC0_BUILTIN_COUNT); + + switch (chipset & 0xf0) { + case 0xe0: return nve4_builtin_offsets[builtin]; + case 0xf0: return nvf0_builtin_offsets[builtin]; + default: + return nvc0_builtin_offsets[builtin]; + } +} + +struct opProperties +{ + operation op; + unsigned int mNeg : 4; + unsigned int mAbs : 4; + unsigned int mNot : 4; + unsigned int mSat : 4; + unsigned int fConst : 3; + unsigned int fImmd : 4; // last bit indicates if full immediate is suppoted +}; + +static const struct opProperties _initProps[] = +{ + // neg abs not sat c[] imm + { OP_ADD, 0x3, 0x3, 0x0, 0x8, 0x2, 0x2 | 0x8 }, + { OP_SUB, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 | 0x8 }, + { OP_MUL, 0x3, 0x0, 0x0, 0x8, 0x2, 0x2 | 0x8 }, + { OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, + { OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, + { OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint + { OP_MADSP, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, + { OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 }, + { OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0 }, + { OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, + { OP_CEIL, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, + { OP_FLOOR, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, + { OP_TRUNC, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, + { OP_AND, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 }, + { OP_OR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 }, + { OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 }, + { OP_SHL, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 }, + { OP_SHR, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 }, + { OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, + { OP_SLCT, 0x4, 0x0, 0x0, 0x0, 0x6, 0x2 }, // special c[] constraint + { OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 }, + { OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 }, + { OP_COS, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 }, + { OP_SIN, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 }, + { OP_EX2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 }, + { OP_LG2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 }, + { OP_RCP, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 }, + { OP_RSQ, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 }, + { OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 }, + { OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 }, + { OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 }, + { OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 }, + { OP_PERMT, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, + { OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, + { OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, + { OP_SET_XOR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, + // saturate only: + { OP_LINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 }, + { OP_PINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 }, + // nve4 ops: + { OP_SULDB, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0 }, + { OP_SUSTB, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0 }, + { OP_SUSTP, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0 }, + { OP_SUCLAMP, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 }, + { OP_SUBFM, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, + { OP_SUEAU, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 } +}; + +void TargetNVC0::initOpInfo() +{ + unsigned int i, j; + + static const uint32_t commutative[(OP_LAST + 31) / 32] = + { + // ADD, MAD, MUL, AND, OR, XOR, MAX, MIN + 0x0670ca00, 0x0000003f, 0x00000000, 0x00000000 + }; + + static const uint32_t shortForm[(OP_LAST + 31) / 32] = + { + // ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV + 0x0670ca00, 0x00000000, 0x00000000, 0x00000000 + }; + + static const operation noDest[] = + { + OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT, + OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET, + OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART, + OP_QUADON, OP_QUADPOP, OP_TEXBAR, OP_SUSTB, OP_SUSTP, OP_SUREDP, + OP_SUREDB, OP_BAR + }; + + static const operation noPred[] = + { + OP_CALL, OP_PRERET, OP_QUADON, OP_QUADPOP, + OP_JOINAT, OP_PREBREAK, OP_PRECONT, OP_BRKPT + }; + + for (i = 0; i < DATA_FILE_COUNT; ++i) + nativeFileMap[i] = (DataFile)i; + nativeFileMap[FILE_ADDRESS] = FILE_GPR; + + for (i = 0; i < OP_LAST; ++i) { + opInfo[i].variants = NULL; + opInfo[i].op = (operation)i; + opInfo[i].srcTypes = 1 << (int)TYPE_F32; + opInfo[i].dstTypes = 1 << (int)TYPE_F32; + opInfo[i].immdBits = 0; + opInfo[i].srcNr = operationSrcNr[i]; + + for (j = 0; j < opInfo[i].srcNr; ++j) { + opInfo[i].srcMods[j] = 0; + opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR; + } + opInfo[i].dstMods = 0; + opInfo[i].dstFiles = 1 << (int)FILE_GPR; + + opInfo[i].hasDest = 1; + opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA); + opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1; + opInfo[i].pseudo = (i < OP_MOV); + opInfo[i].predicate = !opInfo[i].pseudo; + opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN); + opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8; + } + for (i = 0; i < sizeof(noDest) / sizeof(noDest[0]); ++i) + opInfo[noDest[i]].hasDest = 0; + for (i = 0; i < sizeof(noPred) / sizeof(noPred[0]); ++i) + opInfo[noPred[i]].predicate = 0; + + for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) { + const struct opProperties *prop = &_initProps[i]; + + for (int s = 0; s < 3; ++s) { + if (prop->mNeg & (1 << s)) + opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG; + if (prop->mAbs & (1 << s)) + opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS; + if (prop->mNot & (1 << s)) + opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT; + if (prop->fConst & (1 << s)) + opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST; + if (prop->fImmd & (1 << s)) + opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE; + if (prop->fImmd & 8) + opInfo[prop->op].immdBits = 0xffffffff; + } + if (prop->mSat & 8) + opInfo[prop->op].dstMods = NV50_IR_MOD_SAT; + } +} + +unsigned int +TargetNVC0::getFileSize(DataFile file) const +{ + switch (file) { + case FILE_NULL: return 0; + case FILE_GPR: return (chipset >= NVISA_GK110_CHIPSET) ? 255 : 63; + case FILE_PREDICATE: return 7; + case FILE_FLAGS: return 1; + case FILE_ADDRESS: return 0; + case FILE_IMMEDIATE: return 0; + case FILE_MEMORY_CONST: return 65536; + case FILE_SHADER_INPUT: return 0x400; + case FILE_SHADER_OUTPUT: return 0x400; + case FILE_MEMORY_GLOBAL: return 0xffffffff; + case FILE_MEMORY_SHARED: return 16 << 10; + case FILE_MEMORY_LOCAL: return 48 << 10; + case FILE_SYSTEM_VALUE: return 32; + default: + assert(!"invalid file"); + return 0; + } +} + +unsigned int +TargetNVC0::getFileUnit(DataFile file) const +{ + if (file == FILE_GPR || file == FILE_ADDRESS || file == FILE_SYSTEM_VALUE) + return 2; + return 0; +} + +uint32_t +TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const +{ + const int idx = sym->reg.data.sv.index; + const SVSemantic sv = sym->reg.data.sv.sv; + + const bool isInput = shaderFile == FILE_SHADER_INPUT; + const bool kepler = getChipset() >= NVISA_GK104_CHIPSET; + + switch (sv) { + case SV_POSITION: return 0x070 + idx * 4; + case SV_INSTANCE_ID: return 0x2f8; + case SV_VERTEX_ID: return 0x2fc; + case SV_PRIMITIVE_ID: return isInput ? 0x060 : 0x040; + case SV_LAYER: return 0x064; + case SV_VIEWPORT_INDEX: return 0x068; + case SV_POINT_SIZE: return 0x06c; + case SV_CLIP_DISTANCE: return 0x2c0 + idx * 4; + case SV_POINT_COORD: return 0x2e0 + idx * 4; + case SV_FACE: return 0x3fc; + case SV_TESS_FACTOR: return 0x000 + idx * 4; + case SV_TESS_COORD: return 0x2f0 + idx * 4; + case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0; + case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0; + case SV_GRIDID: return kepler ? 0x18 : ~0; + default: + return 0xffffffff; + } +} + +bool +TargetNVC0::insnCanLoad(const Instruction *i, int s, + const Instruction *ld) const +{ + DataFile sf = ld->src(0).getFile(); + + // immediate 0 can be represented by GPR $r63/$r255 + if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0) + return (!i->isPseudo() && + !i->asTex() && + i->op != OP_EXPORT && i->op != OP_STORE); + + if (s >= opInfo[i->op].srcNr) + return false; + if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf))) + return false; + + // indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0 + if (ld->src(0).isIndirect(0)) + return false; + + for (int k = 0; i->srcExists(k); ++k) { + if (i->src(k).getFile() == FILE_IMMEDIATE) { + if (k == 2 && i->op == OP_SUCLAMP) // special case + continue; + if (i->getSrc(k)->reg.data.u64 != 0) + return false; + } else + if (i->src(k).getFile() != FILE_GPR && + i->src(k).getFile() != FILE_PREDICATE) { + return false; + } + } + + // not all instructions support full 32 bit immediates + if (sf == FILE_IMMEDIATE) { + Storage ® = ld->getSrc(0)->asImm()->reg; + + if (opInfo[i->op].immdBits != 0xffffffff) { + if (i->sType == TYPE_F32) { + if (reg.data.u32 & 0xfff) + return false; + } else + if (i->sType == TYPE_S32 || i->sType == TYPE_U32) { + // with u32, 0xfffff counts as 0xffffffff as well + if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000) + return false; + } + } else + if (i->op == OP_MAD || i->op == OP_FMA) { + // requires src == dst, cannot decide before RA + // (except if we implement more constraints) + if (ld->getSrc(0)->asImm()->reg.data.u32 & 0xfff) + return false; + } else + if (i->op == OP_ADD && i->sType == TYPE_F32) { + // add f32 LIMM cannot saturate + if (i->saturate && (reg.data.u32 & 0xfff)) + return false; + } + } + + return true; +} + +bool +TargetNVC0::isAccessSupported(DataFile file, DataType ty) const +{ + if (ty == TYPE_NONE) + return false; + if (file == FILE_MEMORY_CONST && getChipset() >= 0xe0) // wrong encoding ? + return typeSizeof(ty) <= 8; + if (ty == TYPE_B96) + return false; + if (getChipset() >= 0xf0) { + // XXX: find wide vfetch/export + if (ty == TYPE_B128) + return false; + if (ty == TYPE_U64) + return false; + } + return true; +} + +bool +TargetNVC0::isOpSupported(operation op, DataType ty) const +{ + if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32)) + return false; + if (op == OP_SAD && ty != TYPE_S32 && ty != TYPE_U32) + return false; + if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD) + return false; + return true; +} + +bool +TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const +{ + if (!isFloatType(insn->dType)) { + switch (insn->op) { + case OP_ABS: + case OP_NEG: + case OP_CVT: + case OP_CEIL: + case OP_FLOOR: + case OP_TRUNC: + case OP_AND: + case OP_OR: + case OP_XOR: + break; + case OP_SET: + if (insn->sType != TYPE_F32) + return false; + break; + case OP_ADD: + if (mod.abs()) + return false; + if (insn->src(s ? 0 : 1).mod.neg()) + return false; + break; + case OP_SUB: + if (s == 0) + return insn->src(1).mod.neg() ? false : true; + break; + default: + return false; + } + } + if (s > 3) + return false; + return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod; +} + +bool +TargetNVC0::mayPredicate(const Instruction *insn, const Value *pred) const +{ + if (insn->getPredicate()) + return false; + return opInfo[insn->op].predicate; +} + +bool +TargetNVC0::isSatSupported(const Instruction *insn) const +{ + if (insn->op == OP_CVT) + return true; + if (!(opInfo[insn->op].dstMods & NV50_IR_MOD_SAT)) + return false; + + if (insn->dType == TYPE_U32) + return (insn->op == OP_ADD) || (insn->op == OP_MAD); + + // add f32 LIMM cannot saturate + if (insn->op == OP_ADD && insn->sType == TYPE_F32) { + if (insn->getSrc(1)->asImm() && + insn->getSrc(1)->reg.data.u32 & 0xfff) + return false; + } + + return insn->dType == TYPE_F32; +} + +bool +TargetNVC0::isPostMultiplySupported(operation op, float f, int& e) const +{ + if (op != OP_MUL) + return false; + f = fabsf(f); + e = static_cast<int>(log2f(f)); + if (e < -3 || e > 3) + return false; + return f == exp2f(static_cast<float>(e)); +} + +// TODO: better values +// this could be more precise, e.g. depending on the issue-to-read/write delay +// of the depending instruction, but it's good enough +int TargetNVC0::getLatency(const Instruction *i) const +{ + if (chipset >= 0xe4) { + if (i->dType == TYPE_F64 || i->sType == TYPE_F64) + return 20; + switch (i->op) { + case OP_LINTERP: + case OP_PINTERP: + return 15; + case OP_LOAD: + if (i->src(0).getFile() == FILE_MEMORY_CONST) + return 9; + // fall through + case OP_VFETCH: + return 24; + default: + if (Target::getOpClass(i->op) == OPCLASS_TEXTURE) + return 17; + if (i->op == OP_MUL && i->dType != TYPE_F32) + return 15; + return 9; + } + } else { + if (i->op == OP_LOAD) { + if (i->cache == CACHE_CV) + return 700; + return 48; + } + return 24; + } + return 32; +} + +// These are "inverse" throughput values, i.e. the number of cycles required +// to issue a specific instruction for a full warp (32 threads). +// +// Assuming we have more than 1 warp in flight, a higher issue latency results +// in a lower result latency since the MP will have spent more time with other +// warps. +// This also helps to determine the number of cycles between instructions in +// a single warp. +// +int TargetNVC0::getThroughput(const Instruction *i) const +{ + // TODO: better values + if (i->dType == TYPE_F32) { + switch (i->op) { + case OP_ADD: + case OP_MUL: + case OP_MAD: + case OP_FMA: + return 1; + case OP_CVT: + case OP_CEIL: + case OP_FLOOR: + case OP_TRUNC: + case OP_SET: + case OP_SLCT: + case OP_MIN: + case OP_MAX: + return 2; + case OP_RCP: + case OP_RSQ: + case OP_LG2: + case OP_SIN: + case OP_COS: + case OP_PRESIN: + case OP_PREEX2: + default: + return 8; + } + } else + if (i->dType == TYPE_U32 || i->dType == TYPE_S32) { + switch (i->op) { + case OP_ADD: + case OP_AND: + case OP_OR: + case OP_XOR: + case OP_NOT: + return 1; + case OP_MUL: + case OP_MAD: + case OP_CVT: + case OP_SET: + case OP_SLCT: + case OP_SHL: + case OP_SHR: + case OP_NEG: + case OP_ABS: + case OP_MIN: + case OP_MAX: + default: + return 2; + } + } else + if (i->dType == TYPE_F64) { + return 2; + } else { + return 1; + } +} + +bool TargetNVC0::canDualIssue(const Instruction *a, const Instruction *b) const +{ + const OpClass clA = operationClass[a->op]; + const OpClass clB = operationClass[b->op]; + + if (getChipset() >= 0xe4) { + // not texturing + // not if the 2nd instruction isn't necessarily executed + if (clA == OPCLASS_TEXTURE || clA == OPCLASS_FLOW) + return false; + // anything with MOV + if (a->op == OP_MOV || b->op == OP_MOV) + return true; + if (clA == clB) { + // only F32 arith or integer additions + if (clA != OPCLASS_ARITH) + return false; + return (a->dType == TYPE_F32 || a->op == OP_ADD || + b->dType == TYPE_F32 || b->op == OP_ADD); + } + // nothing with TEXBAR + if (a->op == OP_TEXBAR || b->op == OP_TEXBAR) + return false; + // no loads and stores accessing the the same space + if ((clA == OPCLASS_LOAD && clB == OPCLASS_STORE) || + (clB == OPCLASS_LOAD && clA == OPCLASS_STORE)) + if (a->src(0).getFile() == b->src(0).getFile()) + return false; + // no > 32-bit ops + if (typeSizeof(a->dType) > 4 || typeSizeof(b->dType) > 4 || + typeSizeof(a->sType) > 4 || typeSizeof(b->sType) > 4) + return false; + return true; + } else { + return false; // info not needed (yet) + } +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.h new file mode 100644 index 00000000000..7831af5069b --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.h @@ -0,0 +1,74 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir_target.h" + +namespace nv50_ir { + +#define NVC0_BUILTIN_DIV_U32 0 +#define NVC0_BUILTIN_DIV_S32 1 +#define NVC0_BUILTIN_RCP_F64 2 +#define NVC0_BUILTIN_RSQ_F64 3 + +#define NVC0_BUILTIN_COUNT 4 + +class TargetNVC0 : public Target +{ +public: + TargetNVC0(unsigned int chipset); + + virtual CodeEmitter *getCodeEmitter(Program::Type); + + CodeEmitter *createCodeEmitterNVC0(Program::Type); + CodeEmitter *createCodeEmitterGK110(Program::Type); + + virtual bool runLegalizePass(Program *, CGStage stage) const; + + virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const; + + virtual bool insnCanLoad(const Instruction *insn, int s, + const Instruction *ld) const; + virtual bool isOpSupported(operation, DataType) const; + virtual bool isAccessSupported(DataFile, DataType) const; + virtual bool isModSupported(const Instruction *, int s, Modifier) const; + virtual bool isSatSupported(const Instruction *) const; + virtual bool isPostMultiplySupported(operation, float, int& e) const; + virtual bool mayPredicate(const Instruction *, const Value *) const; + + virtual bool canDualIssue(const Instruction *, const Instruction *) const; + virtual int getLatency(const Instruction *) const; + virtual int getThroughput(const Instruction *) const; + + virtual unsigned int getFileSize(DataFile) const; + virtual unsigned int getFileUnit(DataFile) const; + + virtual uint32_t getSVAddress(DataFile shaderFile, const Symbol *sv) const; + + uint32_t getBuiltinOffset(int builtin) const; + +private: + void initOpInfo(); +}; + +bool calculateSchedDataNVC0(const Target *, Function *); + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp new file mode 100644 index 00000000000..895977710ca --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp @@ -0,0 +1,390 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir_util.h" + +namespace nv50_ir { + +void DLList::clear() +{ + for (Item *next, *item = head.next; item != &head; item = next) { + next = item->next; + delete item; + } + head.next = head.prev = &head; +} + +void +DLList::Iterator::erase() +{ + Item *rem = pos; + + if (rem == term) + return; + pos = pos->next; + + DLLIST_DEL(rem); + delete rem; +} + +void DLList::Iterator::moveToList(DLList& dest) +{ + Item *item = pos; + + assert(term != &dest.head); + assert(pos != term); + + pos = pos->next; + + DLLIST_DEL(item); + DLLIST_ADDHEAD(&dest.head, item); +} + +bool +DLList::Iterator::insert(void *data) +{ + Item *ins = new Item(data); + + ins->next = pos->next; + ins->prev = pos; + pos->next->prev = ins; + pos->next = ins; + + if (pos == term) + term = ins; + + return true; +} + +void +Stack::moveTo(Stack& that) +{ + unsigned int newSize = this->size + that.size; + + while (newSize > that.limit) + that.resize(); + memcpy(&that.array[that.size], &array[0], this->size * sizeof(Item)); + + that.size = newSize; + this->size = 0; +} + +Interval::Interval(const Interval& that) : head(NULL), tail(NULL) +{ + this->insert(that); +} + +Interval::~Interval() +{ + clear(); +} + +void +Interval::clear() +{ + for (Range *next, *r = head; r; r = next) { + next = r->next; + delete r; + } + head = tail = NULL; +} + +bool +Interval::extend(int a, int b) +{ + Range *r, **nextp = &head; + + // NOTE: we need empty intervals for fixed registers + // if (a == b) + // return false; + assert(a <= b); + + for (r = head; r; r = r->next) { + if (b < r->bgn) + break; // insert before + if (a > r->end) { + // insert after + nextp = &r->next; + continue; + } + + // overlap + if (a < r->bgn) { + r->bgn = a; + if (b > r->end) + r->end = b; + r->coalesce(&tail); + return true; + } + if (b > r->end) { + r->end = b; + r->coalesce(&tail); + return true; + } + assert(a >= r->bgn); + assert(b <= r->end); + return true; + } + + (*nextp) = new Range(a, b); + (*nextp)->next = r; + + for (r = (*nextp); r->next; r = r->next); + tail = r; + return true; +} + +bool Interval::contains(int pos) const +{ + for (Range *r = head; r && r->bgn <= pos; r = r->next) + if (r->end > pos) + return true; + return false; +} + +bool Interval::overlaps(const Interval &that) const +{ +#if 1 + Range *a = this->head; + Range *b = that.head; + + while (a && b) { + if (b->bgn < a->end && + b->end > a->bgn) + return true; + if (a->end <= b->bgn) + a = a->next; + else + b = b->next; + } +#else + for (Range *rA = this->head; rA; rA = rA->next) + for (Range *rB = iv.head; rB; rB = rB->next) + if (rB->bgn < rA->end && + rB->end > rA->bgn) + return true; +#endif + return false; +} + +void Interval::insert(const Interval &that) +{ + for (Range *r = that.head; r; r = r->next) + this->extend(r->bgn, r->end); +} + +void Interval::unify(Interval &that) +{ + assert(this != &that); + for (Range *next, *r = that.head; r; r = next) { + next = r->next; + this->extend(r->bgn, r->end); + delete r; + } + that.head = NULL; +} + +int Interval::length() const +{ + int len = 0; + for (Range *r = head; r; r = r->next) + len += r->bgn - r->end; + return len; +} + +void Interval::print() const +{ + if (!head) + return; + INFO("[%i %i)", head->bgn, head->end); + for (const Range *r = head->next; r; r = r->next) + INFO(" [%i %i)", r->bgn, r->end); + INFO("\n"); +} + +void +BitSet::andNot(const BitSet &set) +{ + assert(data && set.data); + assert(size >= set.size); + for (unsigned int i = 0; i < (set.size + 31) / 32; ++i) + data[i] &= ~set.data[i]; +} + +BitSet& BitSet::operator|=(const BitSet &set) +{ + assert(data && set.data); + assert(size >= set.size); + for (unsigned int i = 0; i < (set.size + 31) / 32; ++i) + data[i] |= set.data[i]; + return *this; +} + +bool BitSet::resize(unsigned int nBits) +{ + if (!data || !nBits) + return allocate(nBits, true); + const unsigned int p = (size + 31) / 32; + const unsigned int n = (nBits + 31) / 32; + if (n == p) + return true; + + data = (uint32_t *)REALLOC(data, 4 * p, 4 * n); + if (!data) { + size = 0; + return false; + } + if (n > p) + memset(&data[4 * p + 4], 0, (n - p) * 4); + + size = nBits; + return true; +} + +bool BitSet::allocate(unsigned int nBits, bool zero) +{ + if (data && size < nBits) { + FREE(data); + data = NULL; + } + size = nBits; + + if (!data) + data = reinterpret_cast<uint32_t *>(CALLOC((size + 31) / 32, 4)); + + if (zero) + memset(data, 0, (size + 7) / 8); + else + if (nBits) + data[(size + 31) / 32 - 1] = 0; // clear unused bits (e.g. for popCount) + + return data; +} + +unsigned int BitSet::popCount() const +{ + unsigned int count = 0; + + for (unsigned int i = 0; i < (size + 31) / 32; ++i) + if (data[i]) + count += util_bitcount(data[i]); + return count; +} + +void BitSet::fill(uint32_t val) +{ + unsigned int i; + for (i = 0; i < (size + 31) / 32; ++i) + data[i] = val; + if (val) + data[i] &= ~(0xffffffff << (size % 32)); // BE ? +} + +void BitSet::setOr(BitSet *pA, BitSet *pB) +{ + if (!pB) { + *this = *pA; + } else { + for (unsigned int i = 0; i < (size + 31) / 32; ++i) + data[i] = pA->data[i] | pB->data[i]; + } +} + +int BitSet::findFreeRange(unsigned int count) const +{ + const uint32_t m = (1 << count) - 1; + int pos = size; + unsigned int i; + const unsigned int end = (size + 31) / 32; + + if (count == 1) { + for (i = 0; i < end; ++i) { + pos = ffs(~data[i]) - 1; + if (pos >= 0) + break; + } + } else + if (count == 2) { + for (i = 0; i < end; ++i) { + if (data[i] != 0xffffffff) { + uint32_t b = data[i] | (data[i] >> 1) | 0xaaaaaaaa; + pos = ffs(~b) - 1; + if (pos >= 0) + break; + } + } + } else + if (count == 4 || count == 3) { + for (i = 0; i < end; ++i) { + if (data[i] != 0xffffffff) { + uint32_t b = + (data[i] >> 0) | (data[i] >> 1) | + (data[i] >> 2) | (data[i] >> 3) | 0xeeeeeeee; + pos = ffs(~b) - 1; + if (pos >= 0) + break; + } + } + } else { + if (count <= 8) + count = 8; + else + if (count <= 16) + count = 16; + else + count = 32; + + for (i = 0; i < end; ++i) { + if (data[i] != 0xffffffff) { + for (pos = 0; pos < 32; pos += count) + if (!(data[i] & (m << pos))) + break; + if (pos < 32) + break; + } + } + } + pos += i * 32; + + return ((pos + count) <= size) ? pos : -1; +} + +void BitSet::print() const +{ + unsigned int n = 0; + INFO("BitSet of size %u:\n", size); + for (unsigned int i = 0; i < (size + 31) / 32; ++i) { + uint32_t bits = data[i]; + while (bits) { + int pos = ffs(bits) - 1; + bits &= ~(1 << pos); + INFO(" %i", i * 32 + pos); + ++n; + if ((n % 16) == 0) + INFO("\n"); + } + } + if (n % 16) + INFO("\n"); +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h new file mode 100644 index 00000000000..a4ea9d981e0 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h @@ -0,0 +1,788 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NV50_IR_UTIL_H__ +#define __NV50_IR_UTIL_H__ + +#include <new> +#include <assert.h> +#include <stdio.h> +#include <memory> +#include <map> + +#ifndef NDEBUG +# include <typeinfo> +#endif + +#include "util/u_inlines.h" +#include "util/u_memory.h" + +#define ERROR(args...) debug_printf("ERROR: " args) +#define WARN(args...) debug_printf("WARNING: " args) +#define INFO(args...) debug_printf(args) + +#define INFO_DBG(m, f, args...) \ + do { \ + if (m & NV50_IR_DEBUG_##f) \ + debug_printf(args); \ + } while(0) + +#define FATAL(args...) \ + do { \ + fprintf(stderr, args); \ + abort(); \ + } while(0) + + +#define NV50_IR_FUNC_ALLOC_OBJ_DEF(obj, f, args...) \ + new ((f)->getProgram()->mem_##obj.allocate()) obj(f, args) + +#define new_Instruction(f, args...) \ + NV50_IR_FUNC_ALLOC_OBJ_DEF(Instruction, f, args) +#define new_CmpInstruction(f, args...) \ + NV50_IR_FUNC_ALLOC_OBJ_DEF(CmpInstruction, f, args) +#define new_TexInstruction(f, args...) \ + NV50_IR_FUNC_ALLOC_OBJ_DEF(TexInstruction, f, args) +#define new_FlowInstruction(f, args...) \ + NV50_IR_FUNC_ALLOC_OBJ_DEF(FlowInstruction, f, args) + +#define new_LValue(f, args...) \ + NV50_IR_FUNC_ALLOC_OBJ_DEF(LValue, f, args) + + +#define NV50_IR_PROG_ALLOC_OBJ_DEF(obj, p, args...) \ + new ((p)->mem_##obj.allocate()) obj(p, args) + +#define new_Symbol(p, args...) \ + NV50_IR_PROG_ALLOC_OBJ_DEF(Symbol, p, args) +#define new_ImmediateValue(p, args...) \ + NV50_IR_PROG_ALLOC_OBJ_DEF(ImmediateValue, p, args) + + +#define delete_Instruction(p, insn) (p)->releaseInstruction(insn) +#define delete_Value(p, val) (p)->releaseValue(val) + + +namespace nv50_ir { + +class Iterator +{ +public: + virtual ~Iterator() { }; + virtual void next() = 0; + virtual void *get() const = 0; + virtual bool end() const = 0; // if true, get will return 0 + virtual void reset() { assert(0); } // only for graph iterators +}; + +typedef std::auto_ptr<Iterator> IteratorRef; + +class ManipIterator : public Iterator +{ +public: + virtual bool insert(void *) = 0; // insert after current position + virtual void erase() = 0; +}; + +// WARNING: do not use a->prev/next for __item or __list + +#define DLLIST_DEL(__item) \ + do { \ + (__item)->prev->next = (__item)->next; \ + (__item)->next->prev = (__item)->prev; \ + (__item)->next = (__item); \ + (__item)->prev = (__item); \ + } while(0) + +#define DLLIST_ADDTAIL(__list, __item) \ + do { \ + (__item)->next = (__list); \ + (__item)->prev = (__list)->prev; \ + (__list)->prev->next = (__item); \ + (__list)->prev = (__item); \ + } while(0) + +#define DLLIST_ADDHEAD(__list, __item) \ + do { \ + (__item)->prev = (__list); \ + (__item)->next = (__list)->next; \ + (__list)->next->prev = (__item); \ + (__list)->next = (__item); \ + } while(0) + +#define DLLIST_MERGE(__listA, __listB, ty) \ + do { \ + ty prevB = (__listB)->prev; \ + (__listA)->prev->next = (__listB); \ + (__listB)->prev->next = (__listA); \ + (__listB)->prev = (__listA)->prev; \ + (__listA)->prev = prevB; \ + } while(0) + +#define DLLIST_EMPTY(__list) ((__list)->next == (__list)) + +#define DLLIST_FOR_EACH(list, it) \ + for (DLList::Iterator (it) = (list)->iterator(); !(it).end(); (it).next()) + +class DLList +{ +public: + class Item + { + public: + Item(void *priv) : next(this), prev(this), data(priv) { } + + public: + Item *next; + Item *prev; + void *data; + }; + + DLList() : head(0) { } + ~DLList() { clear(); } + + inline void insertHead(void *data) + { + Item *item = new Item(data); + + assert(data); + + item->prev = &head; + item->next = head.next; + head.next->prev = item; + head.next = item; + } + + inline void insertTail(void *data) + { + Item *item = new Item(data); + + assert(data); + + DLLIST_ADDTAIL(&head, item); + } + + inline void insert(void *data) { insertTail(data); } + + void clear(); + + class Iterator : public ManipIterator + { + public: + Iterator(Item *head, bool r) : rev(r), pos(r ? head->prev : head->next), + term(head) { } + + virtual void next() { if (!end()) pos = rev ? pos->prev : pos->next; } + virtual void *get() const { return pos->data; } + virtual bool end() const { return pos == term; } + + // caution: if you're at end-2 and erase it, then do next, you're at end + virtual void erase(); + virtual bool insert(void *data); + + // move item to a another list, no consistency with its iterators though + void moveToList(DLList&); + + private: + const bool rev; + Item *pos; + Item *term; + + friend class DLList; + }; + + inline void erase(Iterator& pos) + { + pos.erase(); + } + + Iterator iterator() + { + return Iterator(&head, false); + } + + Iterator revIterator() + { + return Iterator(&head, true); + } + +private: + Item head; +}; + +class Stack +{ +public: + class Item { + public: + union { + void *p; + int i; + unsigned int u; + float f; + double d; + } u; + + Item() { memset(&u, 0, sizeof(u)); } + }; + + Stack() : size(0), limit(0), array(0) { } + ~Stack() { if (array) FREE(array); } + + inline void push(int i) { Item data; data.u.i = i; push(data); } + inline void push(unsigned int u) { Item data; data.u.u = u; push(data); } + inline void push(void *p) { Item data; data.u.p = p; push(data); } + inline void push(float f) { Item data; data.u.f = f; push(data); } + + inline void push(Item data) + { + if (size == limit) + resize(); + array[size++] = data; + } + + inline Item pop() + { + if (!size) { + Item data; + assert(0); + return data; + } + return array[--size]; + } + + inline unsigned int getSize() { return size; } + + inline Item& peek() { assert(size); return array[size - 1]; } + + void clear(bool releaseStorage = false) + { + if (releaseStorage && array) + FREE(array); + size = limit = 0; + } + + void moveTo(Stack&); // move all items to target (not like push(pop())) + +private: + void resize() + { + unsigned int sizeOld, sizeNew; + + sizeOld = limit * sizeof(Item); + limit = MAX2(4, limit + limit); + sizeNew = limit * sizeof(Item); + + array = (Item *)REALLOC(array, sizeOld, sizeNew); + } + + unsigned int size; + unsigned int limit; + Item *array; +}; + +class DynArray +{ +public: + class Item + { + public: + union { + uint32_t u32; + void *p; + }; + }; + + DynArray() : data(NULL), size(0) { } + + ~DynArray() { if (data) FREE(data); } + + inline Item& operator[](unsigned int i) + { + if (i >= size) + resize(i); + return data[i]; + } + + inline const Item operator[](unsigned int i) const + { + return data[i]; + } + + void resize(unsigned int index) + { + const unsigned int oldSize = size * sizeof(Item); + + if (!size) + size = 8; + while (size <= index) + size <<= 1; + + data = (Item *)REALLOC(data, oldSize, size * sizeof(Item)); + } + + void clear() + { + FREE(data); + data = NULL; + size = 0; + } + +private: + Item *data; + unsigned int size; +}; + +class ArrayList +{ +public: + ArrayList() : size(0) { } + + void insert(void *item, int& id) + { + id = ids.getSize() ? ids.pop().u.i : size++; + data[id].p = item; + } + + void remove(int& id) + { + const unsigned int uid = id; + assert(uid < size && data[id].p); + ids.push(uid); + data[uid].p = NULL; + id = -1; + } + + inline int getSize() const { return size; } + + inline void *get(unsigned int id) { assert(id < size); return data[id].p; } + + class Iterator : public nv50_ir::Iterator + { + public: + Iterator(const ArrayList *array) : pos(0), data(array->data) + { + size = array->getSize(); + if (size) + nextValid(); + } + + void nextValid() { while ((pos < size) && !data[pos].p) ++pos; } + + void next() { if (pos < size) { ++pos; nextValid(); } } + void *get() const { assert(pos < size); return data[pos].p; } + bool end() const { return pos >= size; } + + private: + unsigned int pos; + unsigned int size; + const DynArray& data; + + friend class ArrayList; + }; + + Iterator iterator() const { return Iterator(this); } + + void clear() + { + data.clear(); + ids.clear(true); + size = 0; + } + +private: + DynArray data; + Stack ids; + unsigned int size; +}; + +class Interval +{ +public: + Interval() : head(0), tail(0) { } + Interval(const Interval&); + ~Interval(); + + bool extend(int, int); + void insert(const Interval&); + void unify(Interval&); // clears source interval + void clear(); + + inline int begin() const { return head ? head->bgn : -1; } + inline int end() const { checkTail(); return tail ? tail->end : -1; } + inline bool isEmpty() const { return !head; } + bool overlaps(const Interval&) const; + bool contains(int pos) const; + + inline int extent() const { return end() - begin(); } + int length() const; + + void print() const; + + inline void checkTail() const; + +private: + class Range + { + public: + Range(int a, int b) : next(0), bgn(a), end(b) { } + + Range *next; + int bgn; + int end; + + void coalesce(Range **ptail) + { + Range *rnn; + + while (next && end >= next->bgn) { + assert(bgn <= next->bgn); + rnn = next->next; + end = MAX2(end, next->end); + delete next; + next = rnn; + } + if (!next) + *ptail = this; + } + }; + + Range *head; + Range *tail; +}; + +class BitSet +{ +public: + BitSet() : marker(false), data(0), size(0) { } + BitSet(unsigned int nBits, bool zero) : marker(false), data(0), size(0) + { + allocate(nBits, zero); + } + ~BitSet() + { + if (data) + FREE(data); + } + + bool allocate(unsigned int nBits, bool zero); + bool resize(unsigned int nBits); // keep old data, zero additional bits + + inline unsigned int getSize() const { return size; } + + void fill(uint32_t val); + + void setOr(BitSet *, BitSet *); // second BitSet may be NULL + + inline void set(unsigned int i) + { + assert(i < size); + data[i / 32] |= 1 << (i % 32); + } + // NOTE: range may not cross 32 bit boundary (implies n <= 32) + inline void setRange(unsigned int i, unsigned int n) + { + assert((i + n) <= size && (((i % 32) + n) <= 32)); + data[i / 32] |= ((1 << n) - 1) << (i % 32); + } + inline void setMask(unsigned int i, uint32_t m) + { + assert(i < size); + data[i / 32] |= m; + } + + inline void clr(unsigned int i) + { + assert(i < size); + data[i / 32] &= ~(1 << (i % 32)); + } + // NOTE: range may not cross 32 bit boundary (implies n <= 32) + inline void clrRange(unsigned int i, unsigned int n) + { + assert((i + n) <= size && (((i % 32) + n) <= 32)); + data[i / 32] &= ~(((1 << n) - 1) << (i % 32)); + } + + inline bool test(unsigned int i) const + { + assert(i < size); + return data[i / 32] & (1 << (i % 32)); + } + // NOTE: range may not cross 32 bit boundary (implies n <= 32) + inline bool testRange(unsigned int i, unsigned int n) const + { + assert((i + n) <= size && (((i % 32) + n) <= 32)); + return data[i / 32] & (((1 << n) - 1) << (i % 32)); + } + + // Find a range of size (<= 32) clear bits aligned to roundup_pow2(size). + int findFreeRange(unsigned int size) const; + + BitSet& operator|=(const BitSet&); + + BitSet& operator=(const BitSet& set) + { + assert(data && set.data); + assert(size == set.size); + memcpy(data, set.data, (set.size + 7) / 8); + return *this; + } + + void andNot(const BitSet&); + + // bits = (bits | setMask) & ~clrMask + inline void periodicMask32(uint32_t setMask, uint32_t clrMask) + { + for (unsigned int i = 0; i < (size + 31) / 32; ++i) + data[i] = (data[i] | setMask) & ~clrMask; + } + + unsigned int popCount() const; + + void print() const; + +public: + bool marker; // for user + +private: + uint32_t *data; + unsigned int size; +}; + +void Interval::checkTail() const +{ +#if NV50_DEBUG & NV50_DEBUG_PROG_RA + Range *r = head; + while (r->next) + r = r->next; + assert(tail == r); +#endif +} + +class MemoryPool +{ +private: + inline bool enlargeAllocationsArray(const unsigned int id, unsigned int nr) + { + const unsigned int size = sizeof(uint8_t *) * id; + const unsigned int incr = sizeof(uint8_t *) * nr; + + uint8_t **alloc = (uint8_t **)REALLOC(allocArray, size, size + incr); + if (!alloc) + return false; + allocArray = alloc; + return true; + } + + inline bool enlargeCapacity() + { + const unsigned int id = count >> objStepLog2; + + uint8_t *const mem = (uint8_t *)MALLOC(objSize << objStepLog2); + if (!mem) + return false; + + if (!(id % 32)) { + if (!enlargeAllocationsArray(id, 32)) { + FREE(mem); + return false; + } + } + allocArray[id] = mem; + return true; + } + +public: + MemoryPool(unsigned int size, unsigned int incr) : objSize(size), + objStepLog2(incr) + { + allocArray = NULL; + released = NULL; + count = 0; + } + + ~MemoryPool() + { + unsigned int allocCount = (count + (1 << objStepLog2) - 1) >> objStepLog2; + for (unsigned int i = 0; i < allocCount && allocArray[i]; ++i) + FREE(allocArray[i]); + if (allocArray) + FREE(allocArray); + } + + void *allocate() + { + void *ret; + const unsigned int mask = (1 << objStepLog2) - 1; + + if (released) { + ret = released; + released = *(void **)released; + return ret; + } + + if (!(count & mask)) + if (!enlargeCapacity()) + return NULL; + + ret = allocArray[count >> objStepLog2] + (count & mask) * objSize; + ++count; + return ret; + } + + void release(void *ptr) + { + *(void **)ptr = released; + released = ptr; + } + +private: + uint8_t **allocArray; // array (list) of MALLOC allocations + + void *released; // list of released objects + + unsigned int count; // highest allocated object + + const unsigned int objSize; + const unsigned int objStepLog2; +}; + +/** + * Composite object cloning policy. + * + * Encapsulates how sub-objects are to be handled (if at all) when a + * composite object is being cloned. + */ +template<typename C> +class ClonePolicy +{ +protected: + C *c; + +public: + ClonePolicy(C *c) : c(c) {} + + C *context() { return c; } + + template<typename T> T *get(T *obj) + { + void *clone = lookup(obj); + if (!clone) + clone = obj->clone(*this); + return reinterpret_cast<T *>(clone); + } + + template<typename T> void set(const T *obj, T *clone) + { + insert(obj, clone); + } + +protected: + virtual void *lookup(void *obj) = 0; + virtual void insert(const void *obj, void *clone) = 0; +}; + +/** + * Shallow non-recursive cloning policy. + * + * Objects cloned with the "shallow" policy don't clone their + * children recursively, instead, the new copy shares its children + * with the original object. + */ +template<typename C> +class ShallowClonePolicy : public ClonePolicy<C> +{ +public: + ShallowClonePolicy(C *c) : ClonePolicy<C>(c) {} + +protected: + virtual void *lookup(void *obj) + { + return obj; + } + + virtual void insert(const void *obj, void *clone) + { + } +}; + +template<typename C, typename T> +inline T *cloneShallow(C *c, T *obj) +{ + ShallowClonePolicy<C> pol(c); + return obj->clone(pol); +} + +/** + * Recursive cloning policy. + * + * Objects cloned with the "deep" policy clone their children + * recursively, keeping track of what has already been cloned to + * avoid making several new copies of the same object. + */ +template<typename C> +class DeepClonePolicy : public ClonePolicy<C> +{ +public: + DeepClonePolicy(C *c) : ClonePolicy<C>(c) {} + +private: + std::map<const void *, void *> map; + +protected: + virtual void *lookup(void *obj) + { + return map[obj]; + } + + virtual void insert(const void *obj, void *clone) + { + map[obj] = clone; + } +}; + +template<typename S, typename T> +struct bimap +{ + std::map<S, T> forth; + std::map<T, S> back; + +public: + bimap() : l(back), r(forth) { } + bimap(const bimap<S, T> &m) + : forth(m.forth), back(m.back), l(back), r(forth) { } + + void insert(const S &s, const T &t) + { + forth.insert(std::make_pair(s, t)); + back.insert(std::make_pair(t, s)); + } + + typedef typename std::map<T, S>::const_iterator l_iterator; + const std::map<T, S> &l; + typedef typename std::map<S, T>::const_iterator r_iterator; + const std::map<S, T> &r; +}; + +} // namespace nv50_ir + +#endif // __NV50_IR_UTIL_H__ diff --git a/src/gallium/drivers/nouveau/codegen/target_lib_nvc0.asm b/src/gallium/drivers/nouveau/codegen/target_lib_nvc0.asm new file mode 100644 index 00000000000..f40becc0b88 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/target_lib_nvc0.asm @@ -0,0 +1,96 @@ +// +// DIV U32 +// +// UNR recurrence (q = a / b): +// look for z such that 2^32 - b <= b * z < 2^32 +// then q - 1 <= (a * z) / 2^32 <= q +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p1 +// SIZE: 22 / 14 * 8 bytes +// +bfind u32 $r2 $r1 +xor b32 $r2 $r2 0x1f +mov b32 $r3 0x1 +shl b32 $r2 $r3 clamp $r2 +cvt u32 $r1 neg u32 $r1 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mov b32 $r3 $r0 +mul high $r0 u32 $r0 u32 $r2 +cvt u32 $r2 neg u32 $r1 +add $r1 (mul u32 $r1 u32 $r0) $r3 +set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +$p0 add b32 $r0 $r0 0x1 +$p0 set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +$p0 add b32 $r0 $r0 0x1 +ret +// +// DIV S32, like DIV U32 after taking ABS(inputs) +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p3 +// +set $p2 0x1 lt s32 $r0 0x0 +set $p3 0x1 lt s32 $r1 0x0 xor $p2 +cvt s32 $r0 abs s32 $r0 +cvt s32 $r1 abs s32 $r1 +bfind u32 $r2 $r1 +xor b32 $r2 $r2 0x1f +mov b32 $r3 0x1 +shl b32 $r2 $r3 clamp $r2 +cvt u32 $r1 neg u32 $r1 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mov b32 $r3 $r0 +mul high $r0 u32 $r0 u32 $r2 +cvt u32 $r2 neg u32 $r1 +add $r1 (mul u32 $r1 u32 $r0) $r3 +set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +$p0 add b32 $r0 $r0 0x1 +$p0 set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +$p0 add b32 $r0 $r0 0x1 +$p3 cvt s32 $r0 neg s32 $r0 +$p2 cvt s32 $r1 neg s32 $r1 +ret +// +// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i) +// +// INPUT: $r0d (x) +// OUTPUT: $r0d (rcp(x)) +// CLOBBER: $r2 - $r7 +// SIZE: 9 * 8 bytes +// +nop +ret +// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i) +// +// INPUT: $r0d (x) +// OUTPUT: $r0d (rsqrt(x)) +// CLOBBER: $r2 - $r7 +// SIZE: 14 * 8 bytes +// +nop +ret diff --git a/src/gallium/drivers/nouveau/codegen/target_lib_nvc0.asm.h b/src/gallium/drivers/nouveau/codegen/target_lib_nvc0.asm.h new file mode 100644 index 00000000000..37905049f50 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/target_lib_nvc0.asm.h @@ -0,0 +1,112 @@ + +static const uint32_t nvc0_builtin_code[] = +{ + 0x04009c03, + 0x78000000, + 0x7c209cdd, + 0x0010dd18, + 0x08309c03, + 0x60000000, + 0x05605c18, + 0x0810dc2a, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0000dde4, + 0x28000000, + 0x08001c43, + 0x50000000, + 0x05609c18, + 0x0010430d, + 0x0811dc03, + 0x1b0e0000, + 0x08104103, + 0x48000000, + 0x04000002, + 0x08000000, + 0x0811c003, + 0x1b0e0000, + 0x08104103, + 0x48000000, + 0x040000ac, + 0x90001dff, + 0xfc05dc23, + 0x188e0000, + 0xfc17dc23, + 0x18c40000, + 0x03301e18, + 0x07305e18, + 0x04009c03, + 0x78000000, + 0x7c209cdd, + 0x0010dd18, + 0x08309c03, + 0x60000000, + 0x05605c18, + 0x0810dc2a, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0810dc03, + 0x50000000, + 0x0c209c43, + 0x20040000, + 0x0000dde4, + 0x28000000, + 0x08001c43, + 0x50000000, + 0x05609c18, + 0x0010430d, + 0x0811dc03, + 0x1b0e0000, + 0x08104103, + 0x48000000, + 0x04000002, + 0x08000000, + 0x0811c003, + 0x1b0e0000, + 0x08104103, + 0x48000000, + 0x040000ac, + 0x01700e18, + 0x05704a18, + 0x90001dff, + 0x00001c08, + 0x90001dff, + 0x00001c08, + 0x90001dff, +}; + +static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] = +{ + 0x0000, + 0x00b0, + 0x0180, + 0x0188 +}; diff --git a/src/gallium/drivers/nouveau/codegen/target_lib_nve4.asm b/src/gallium/drivers/nouveau/codegen/target_lib_nve4.asm new file mode 100644 index 00000000000..5adc9ff38a5 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/target_lib_nve4.asm @@ -0,0 +1,698 @@ +// +// DIV U32 +// +// UNR recurrence (q = a / b): +// look for z such that 2^32 - b <= b * z < 2^32 +// then q - 1 <= (a * z) / 2^32 <= q +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p1 +// SIZE: 22 / 14 * 8 bytes +// +sched 0x28 0x4 0x28 0x4 0x28 0x28 0x28 +bfind u32 $r2 $r1 +long xor b32 $r2 $r2 0x1f +long mov b32 $r3 0x1 +shl b32 $r2 $r3 clamp $r2 +long cvt u32 $r1 neg u32 $r1 +long mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +sched 0x4 0x28 0x4 0x28 0x28 0x2c 0x4 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mov b32 $r3 $r0 +mul high $r0 u32 $r0 u32 $r2 +long cvt u32 $r2 neg u32 $r1 +long add $r1 (mul u32 $r1 u32 $r0) $r3 +set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +sched 0x28 0x2c 0x4 0x20 0x2e 0x28 0x20 +$p0 add b32 $r0 $r0 0x1 +$p0 set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +$p0 add b32 $r0 $r0 0x1 +long ret +// +// DIV S32, like DIV U32 after taking ABS(inputs) +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p3 +// +set $p2 0x1 lt s32 $r0 0x0 +set $p3 0x1 lt s32 $r1 0x0 xor $p2 +sched 0x20 0x28 0x28 0x4 0x28 0x04 0x28 +long cvt s32 $r0 abs s32 $r0 +long cvt s32 $r1 abs s32 $r1 +bfind u32 $r2 $r1 +long xor b32 $r2 $r2 0x1f +long mov b32 $r3 0x1 +shl b32 $r2 $r3 clamp $r2 +cvt u32 $r1 neg u32 $r1 +sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +sched 0x28 0x28 0x4 0x28 0x04 0x28 0x28 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mul $r3 u32 $r1 u32 $r2 +add $r2 (mul high u32 $r2 u32 $r3) $r2 +mov b32 $r3 $r0 +mul high $r0 u32 $r0 u32 $r2 +long cvt u32 $r2 neg u32 $r1 +long add $r1 (mul u32 $r1 u32 $r0) $r3 +sched 0x2c 0x04 0x28 0x2c 0x04 0x28 0x20 +set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +$p0 add b32 $r0 $r0 0x1 +$p0 set $p0 0x1 ge u32 $r1 $r2 +$p0 sub b32 $r1 $r1 $r2 +long $p0 add b32 $r0 $r0 0x1 +long $p3 cvt s32 $r0 neg s32 $r0 +sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c +$p2 cvt s32 $r1 neg s32 $r1 +long ret +// +// SULDP [for each format] +// $r4d: address +// $r2: surface info (format) +// $p0: access predicate +// $p1, $p2: caching predicate (00: cv, 01: ca, 10: cg) +// +// RGBA32 +$p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0 +long ret +// RGBA16_UNORM +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0 +cvt rn f32 $r3 u16 1 $r1 +cvt rn f32 $r2 u16 0 $r1 +mul f32 $r3 $r3 0x37800074 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt rn f32 $r1 u16 1 $r0 +mul f32 $r2 $r2 0x37800074 +cvt rn f32 $r0 u16 0 $r0 +mul f32 $r1 $r1 0x37800074 +mul f32 $r0 $r0 0x37800074 +long ret +// RGBA16_SNORM +$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 +cvt rn f32 $r3 s16 1 $r1 +cvt rn f32 $r2 s16 0 $r1 +mul f32 $r3 $r3 0x38000187 +cvt rn f32 $r1 s16 1 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r2 $r2 0x38000187 +cvt rn f32 $r0 s16 0 $r0 +mul f32 $r1 $r1 0x38000187 +mul f32 $r0 $r0 0x38000187 +long ret +// RGBA16_SINT +$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 +cvt s32 $r3 s16 1 $r1 +cvt s32 $r2 s16 0 $r1 +cvt s32 $r1 s16 1 $r0 +cvt s32 $r0 s16 0 $r0 +long ret +// RGBA16_UINT +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 +cvt u32 $r3 u16 1 $r1 +cvt u32 $r2 u16 0 $r1 +cvt u32 $r1 u16 1 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt u32 $r0 u16 0 $r0 +long ret +// RGBA16_FLOAT +$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 +cvt f32 $r3 f16 $r1 1 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt f32 $r2 f16 $r1 0 +cvt f32 $r1 f16 $r0 1 +cvt f32 $r0 f16 $r0 0 +long ret +// RG32_FLOAT +$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r2 0x00000000 +long mov b32 $r3 0x3f800000 +long ret +// RG32_xINT +$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r2 0x00000000 +long mov b32 $r3 0x00000001 +long ret +// RGB10A2_UNORM +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +ext u32 $r1 $r0 0x0a0a +long mov b32 $r3 0x3f800000 +ext u32 $r2 $r0 0x0a14 +long and b32 $r0 $r0 0x3ff +cvt rn f32 $r2 u16 0 $r2 +cvt rn f32 $r1 u16 0 $r1 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r2 $r2 0x3a802007 +cvt rn f32 $r0 u16 0 $r0 +mul f32 $r1 $r1 0x3a802007 +mul f32 $r0 $r0 0x3a802007 +long ret +// RGB10A2_UINT +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +ext u32 $r1 $r0 0x0a0a +long mov b32 $r3 0x00000001 +ext u32 $r2 $r0 0x0a14 +long and b32 $r0 $r0 0x3ff +long ret +// RGBA8_UNORM +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +cvt rn f32 $r3 u8 3 $r0 +cvt rn f32 $r2 u8 2 $r0 +mul f32 $r3 $r3 0x3b808081 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt rn f32 $r1 u8 1 $r0 +mul f32 $r2 $r2 0x3b808081 +cvt rn f32 $r0 u8 0 $r0 +mul f32 $r1 $r1 0x3b808081 +mul f32 $r0 $r0 0x3b808081 +long ret +// RGBA8_SNORM +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +cvt rn f32 $r3 s8 3 $r0 +cvt rn f32 $r2 s8 2 $r0 +mul f32 $r3 $r3 0x3c010204 +cvt rn f32 $r1 s8 1 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r2 $r2 0x3c010204 +cvt rn f32 $r0 s8 0 $r0 +mul f32 $r1 $r1 0x3c010204 +mul f32 $r0 $r0 0x3c010204 +long ret +// RGBA8_SINT +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +cvt s32 $r3 s8 3 $r0 +cvt s32 $r2 s8 2 $r0 +cvt s32 $r1 s8 1 $r0 +cvt s32 $r0 s8 0 $r0 +long ret +// RGBA8_UINT +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +cvt u32 $r3 u8 3 $r0 +cvt u32 $r2 u8 2 $r0 +cvt u32 $r1 u8 1 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt u32 $r0 u8 0 $r0 +long ret +// R5G6B5_UNORM +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +ext u32 $r1 $r0 0x0605 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +long mov b32 $r3 0x3f800000 +ext u32 $r2 $r0 0x050b +long and b32 $r0 $r0 0x1f +cvt rn f32 $r2 u8 0 $r2 +cvt rn f32 $r1 u8 0 $r1 +mul f32 $r2 $r2 0x3d042108 +cvt rn f32 $r0 u8 0 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r1 $r1 0x3c820821 +mul f32 $r0 $r0 0x3d042108 +long ret +// R5G5B5X1_UNORM +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +ext u32 $r1 $r0 0x0505 +ext u32 $r2 $r0 0x050a +long and b32 $r0 $r0 0x1f +long mov b32 $r3 0x3f800000 +cvt rn f32 $r2 u8 0 $r2 +cvt rn f32 $r1 u8 0 $r1 +cvt rn f32 $r0 u8 0 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r2 $r2 0x3d042108 +mul f32 $r1 $r1 0x3d042108 +mul f32 $r0 $r0 0x3d042108 +long ret +// RG16_UNORM +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +cvt rn f32 $r1 u16 1 $r0 +cvt rn f32 $r0 u16 0 $r0 +mul f32 $r1 $r1 0x37800074 +mul f32 $r0 $r0 0x37800074 +long mov b32 $r2 0x00000000 +long mov b32 $r3 0x3f800000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +long ret +// RG16_SNORM +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x3f800000 +cvt rn f32 $r1 s16 1 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mov b32 $r2 0x00000000 +cvt rn f32 $r0 s16 0 $r0 +mul f32 $r1 $r1 0x38000187 +mul f32 $r0 $r0 0x38000187 +long ret +// RG16_SINT +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x00000001 +cvt s32 $r1 s16 1 $r0 +mov b32 $r2 0x00000000 +cvt s32 $r0 s16 0 $r0 +long ret +// RG16_UINT +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x00000001 +cvt u32 $r1 u16 1 $r0 +mov b32 $r2 0x00000000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt u32 $r0 u16 0 $r0 +long ret +// RG16_FLOAT +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x3f800000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt f32 $r1 f16 $r0 1 +mov b32 $r2 0x00000000 +cvt f32 $r0 f16 $r0 0 +long ret +// R32_FLOAT +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x3f800000 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +long ret +// R32_xINT +$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x00000001 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +long ret +// RG8_UNORM +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x3f800000 +cvt rn f32 $r1 u8 1 $r0 +mov b32 $r2 0x00000000 +cvt rn f32 $r0 u8 0 $r0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r1 $r1 0x3b808081 +mul f32 $r0 $r0 0x3b808081 +long ret +// RG8_SNORM +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +long mov b32 $r3 0x3f800000 +cvt rn f32 $r1 s8 1 $r0 +long mov b32 $r2 0x00000000 +cvt rn f32 $r0 s8 0 $r0 +mul f32 $r1 $r1 0x3c010204 +mul f32 $r0 $r0 0x3c010204 +long ret +// RG8_UINT +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x00000001 +cvt u32 $r1 u8 1 $r0 +long mov b32 $r2 0x00000000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt u32 $r0 u8 0 $r0 +long ret +// RG8_SINT +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x00000001 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +cvt s32 $r1 s8 1 $r0 +long mov b32 $r2 0x00000000 +cvt s32 $r0 s8 0 $r0 +long ret +// R16_UNORM +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x3f800000 +cvt rn f32 $r0 u16 0 $r0 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +mul f32 $r0 $r0 0x37800074 +long ret +// R16_SNORM +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x3f800000 +cvt rn f32 $r0 s16 0 $r0 +long mov b32 $r2 0x00000000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +long mov b32 $r1 0x00000000 +mul f32 $r0 $r0 0x38000187 +long ret +// R16_SINT +$p1 suldgb s16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb s16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb s16 $r0 cv zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +long mov b32 $r3 0x00000001 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +long ret +// R16_UINT +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x00000001 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +long ret +// R16_FLOAT +$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x3f800000 +long mov b32 $r2 0x00000000 +cvt f32 $r0 f16 $r0 0 +mov b32 $r1 0x00000000 +long ret +// R8_UNORM +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0 +mov b32 $r3 0x3f800000 +cvt rn f32 $r0 u8 0 $r0 +mov b32 $r2 0x00000000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mul f32 $r0 $r0 0x3b808081 +mov b32 $r1 0x00000000 +long ret +// R8_SNORM +$p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +mov b32 $r3 0x3f800000 +cvt rn f32 $r0 s8 0 $r0 +mov b32 $r2 0x00000000 +mul f32 $r0 $r0 0x3c010204 +mov b32 $r1 0x00000000 +long ret +// R8_SINT +$p1 suldgb s8 $r0 ca zero u8 g[$r4d] $r2 $p0 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb s8 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb s8 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x00000001 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +long ret +// R8_UINT +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +$p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x00000001 +long mov b32 $r2 0x00000000 +long mov b32 $r1 0x00000000 +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +long ret +// R11G11B10_FLOAT TODO +$p1 suldgb b32 $r3 ca zero u8 g[$r4d] $r2 $p0 +set $p1 0x1 $p1 xor not $p2 +$p2 suldgb b32 $r3 cg zero u8 g[$r4d] $r2 $p0 +$p1 suldgb b32 $r3 cv zero u8 g[$r4d] $r2 $p0 +long mov b32 $r3 0x3f800000 +long nop +long ret +// +// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i) +// +// INPUT: $r0d (x) +// OUTPUT: $r0d (rcp(x)) +// CLOBBER: $r2 - $r7 +// SIZE: 9 * 8 bytes +// +long nop +long ret +// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i) +// +// INPUT: $r0d (x) +// OUTPUT: $r0d (rsqrt(x)) +// CLOBBER: $r2 - $r7 +// SIZE: 14 * 8 bytes +// +long nop +long ret +// +// Trap handler. +// Requires at least 4 GPRs and 32 bytes of l[] memory to temporarily save GPRs. +// Low 32 bytes of l[] memory shouldn't be used if resumeability is required. +// +// Trap info: +// 0x000: mutex +// 0x004: PC +// 0x008: trapstat +// 0x00c: warperr +// 0x010: tidx +// 0x014: tidy +// 0x018: tidz +// 0x01c: ctaidx +// 0x020: ctaidy +// 0x024: ctaidz +// 0x030: $r0q +// 0x130: $flags +// 0x140: s[] +// +st b128 wb l[0x00] $r0q +// check state of the warp and continue if it didn't cause the trap +long mov b32 $r1 $trapstat +long mov b32 $r3 $warperr +mov $r2 $flags mask 0xffff +and b32 0 $c $r1 $r3 +e $c bra #end_cont +// spill control flow stack to l[] +long mov b32 $r3 16 +spill_cfstack: +preret #end_exit +sub b32 $r3 $c $r3 0x1 +lg $c bra #spill_cfstack +// retrieve pointer to trap info +mov b32 $r0 c0[0x1900] +mov b32 $r1 c0[0x1904] +// we only let a single faulting thread store its state +mov b32 $r3 0x1 +exch b32 $r3 g[$r0d] $r3 +joinat #end_exit +set $p0 0x1 eq u32 $r3 0x1 +join $p0 nop +// store $c and $p registers +st b32 wb g[$r0d+0x130] $r2 +// store $trapstat and $warperr +long mov b32 $r2 $trapstat +long mov b32 $r3 $warperr +st b64 wb g[$r0d+0x8] $r2d +// store registers +st b128 wb g[$r0d+0x40] $r4q +st b128 wb g[$r0d+0x50] $r8q +st b128 wb g[$r0d+0x60] $r12q +st b128 wb g[$r0d+0x70] $r16q +st b128 wb g[$r0d+0x80] $r20q +st b128 wb g[$r0d+0x90] $r24q +st b128 wb g[$r0d+0xa0] $r28q +st b128 wb g[$r0d+0xb0] $r32q +st b128 wb g[$r0d+0xc0] $r36q +st b128 wb g[$r0d+0xd0] $r40q +st b128 wb g[$r0d+0xe0] $r44q +st b128 wb g[$r0d+0xf0] $r48q +st b128 wb g[$r0d+0x100] $r52q +st b128 wb g[$r0d+0x110] $r56q +st b128 wb g[$r0d+0x120] $r60q +ld b64 $r2d cs l[0x0] +st b64 wb g[$r0d+0x30] $r2d +ld b64 $r2d cs l[0x8] +st b64 wb g[$r0d+0x38] $r2d +// store thread id +long mov b32 $r2 $tidx +long mov b32 $r3 $tidy +st b64 wb g[$r0d+0x10] $r2d +long mov b32 $r2 $tidz +long mov b32 $r3 $ctaidx +st b64 wb g[$r0d+0x18] $r2d +long mov b32 $r2 $ctaidy +long mov b32 $r3 $ctaidz +st b64 wb g[$r0d+0x20] $r2d +// store shared memory (in reverse order so $r0d is base again at the end) +long mov b32 $r3 $smemsz +sub b32 $r3 $c $r3 0x4 +s $c bra #shared_done +add b32 $r0 $c $r0 $r3 +add b32 $r1 $r1 0x0 $c +shared_loop: +long ld b32 $r2 s[$r3] +long st b32 wb g[$r0d+0x140] $r2 +sub b32 $r0 $c $r0 0x4 +sub b32 $r1 $r1 0x0 $c +sub b32 $r3 $c $r3 0x4 +lg $c bra #shared_loop +shared_done: +// search the stack for trap entry to retrieve PC +mov b32 $r0 c0[0x1908] +mov b32 $r1 c0[0x190c] +membar sys +// invalidate caches so we can read stack entries via g[] +cctl ivall 0 l[0] +cctl ivall 0 g[$r0d] +// get offsets +mov b32 $r2 $physid +ext u32 $r3 $r2 0x0814 // MP id +ext u32 $r2 $r2 0x0608 // warp id +mul $r2 u32 $r2 u32 c0[0x1914] // warp offset +mul $r3 u32 $r3 u32 c0[0x1910] // MP offset +add b32 $r2 $r2 $r3 // MP + warp offset +add b32 $r0 $c $r0 $r2 +add b32 $r1 $r1 0x0 $c +search_cstack: +mov b32 $r3 c0[0x1918] // cstack size +ld u8 $r2 cv g[$r0d+0x8] +set $p0 0x1 eq u32 $r2 0xa +$p0 bra #entry_found +add b32 $r0 $c $r0 0x10 +add b32 $r1 $r1 0x0 $c +sub b32 $r3 $c $r3 0x10 +lg $c bra #search_cstack +bra #end_exit +entry_found: +// load PC (may be unaligned and spread out) +ld b32 $r2 cv g[$r0d] +mov b32 $r0 c0[0x1900] +mov b32 $r1 c0[0x1904] +st b32 wb g[$r0d+0x4] $r2 +join nop +// invalidate caches and exit +end_exit: +cctl ivall 0 g[0] +bpt pause 0x0 +rtt terminate +end_cont: +bpt pause 0x0 +mov $flags $r2 mask 0xffff +ld b128 $r0q cs l[0x00] +rtt diff --git a/src/gallium/drivers/nouveau/codegen/target_lib_nve4.asm.h b/src/gallium/drivers/nouveau/codegen/target_lib_nve4.asm.h new file mode 100644 index 00000000000..53fa12c51fd --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/target_lib_nve4.asm.h @@ -0,0 +1,592 @@ + +// Assembled from target_lib_nve4.asm by envyas -m nvc0 -V nve4 -W. + +static const uint64_t nve4_builtin_code[] = +{ + 0x2282828042804287ULL, + 0x7800000004009c03ULL, + 0x380000007c209c82ULL, + 0x180000000400dde2ULL, + 0x6000000008309c03ULL, + 0x1c00000005205d04ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x2282828282828287ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x2042c28280428047ULL, + 0x200400000c209c43ULL, + 0x280000000000dde4ULL, + 0x5000000008001c43ULL, + 0x1c00000005209d04ULL, + 0x2006000000105c03ULL, + 0x1b0e00000811dc03ULL, + 0x4800000008104103ULL, + 0x220282e20042c287ULL, + 0x0800000004000002ULL, + 0x1b0e00000811c003ULL, + 0x4800000008104103ULL, + 0x0800000004000002ULL, + 0x9000000000001de7ULL, + 0x188e0000fc05dc23ULL, + 0x18c40000fc17dc23ULL, + 0x2280428042828207ULL, + 0x1c00000001201ec4ULL, + 0x1c00000005205ec4ULL, + 0x7800000004009c03ULL, + 0x380000007c209c82ULL, + 0x180000000400dde2ULL, + 0x6000000008309c03ULL, + 0x1c00000005205d04ULL, + 0x2282828282828287ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x2282804280428287ULL, + 0x200400000c209c43ULL, + 0x500000000810dc03ULL, + 0x200400000c209c43ULL, + 0x280000000000dde4ULL, + 0x5000000008001c43ULL, + 0x1c00000005209d04ULL, + 0x2006000000105c03ULL, + 0x22028042c28042c7ULL, + 0x1b0e00000811dc03ULL, + 0x4800000008104103ULL, + 0x0800000004000002ULL, + 0x1b0e00000811c003ULL, + 0x4800000008104103ULL, + 0x0800000004000002ULL, + 0x1c00000001200f84ULL, + 0x22c200428042e047ULL, + 0x1c00000005204b84ULL, + 0x9000000000001de7ULL, + 0xd4004000084004c5ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009c5ULL, + 0xd4004000084007c5ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd4004000084004c5ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009c5ULL, + 0xd4004000084007c5ULL, + 0x1900000004a0dc04ULL, + 0x1800000004a09c04ULL, + 0x30de0001d030dc02ULL, + 0x2000000000000007ULL, + 0x1900000000a05c04ULL, + 0x30de0001d0209c02ULL, + 0x1800000000a01c04ULL, + 0x30de0001d0105c02ULL, + 0x30de0001d0001c02ULL, + 0x9000000000001de7ULL, + 0xd4004000084004a5ULL, + 0x2000000000000007ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009a5ULL, + 0xd4004000084007a5ULL, + 0x1900000004a0de04ULL, + 0x1800000004a09e04ULL, + 0x30e000061c30dc02ULL, + 0x1900000000a05e04ULL, + 0x2000000000000007ULL, + 0x30e000061c209c02ULL, + 0x1800000000a01e04ULL, + 0x30e000061c105c02ULL, + 0x30e000061c001c02ULL, + 0x9000000000001de7ULL, + 0xd4004000084004a5ULL, + 0x0c5400000013dc04ULL, + 0x2000000000000007ULL, + 0xd4004000084009a5ULL, + 0xd4004000084007a5ULL, + 0x1d00000004a0de84ULL, + 0x1c00000004a09e84ULL, + 0x1d00000000a05e84ULL, + 0x1c00000000a01e84ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd4004000084004a5ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009a5ULL, + 0xd4004000084007a5ULL, + 0x1d00000004a0dc04ULL, + 0x1c00000004a09c04ULL, + 0x1d00000000a05c04ULL, + 0x2000000000000007ULL, + 0x1c00000000a01c04ULL, + 0x9000000000001de7ULL, + 0xd4004000084004a5ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009a5ULL, + 0xd4004000084007a5ULL, + 0x1100000004a0dc04ULL, + 0x2000000000000007ULL, + 0x1000000004a09c04ULL, + 0x1100000000a05c04ULL, + 0x1000000000a01c04ULL, + 0x9000000000001de7ULL, + 0xd4004000084004a5ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009a5ULL, + 0x2000000000000007ULL, + 0xd4004000084007a5ULL, + 0x1800000000009de2ULL, + 0x18fe00000000dde2ULL, + 0x9000000000001de7ULL, + 0xd4004000084004a5ULL, + 0x0c5400000013dc04ULL, + 0xd4004000084009a5ULL, + 0x2000000000000007ULL, + 0xd4004000084007a5ULL, + 0x1800000000009de2ULL, + 0x180000000400dde2ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0x2000000000000007ULL, + 0xd400400008400785ULL, + 0x7000c02828005c03ULL, + 0x18fe00000000dde2ULL, + 0x7000c02850009c03ULL, + 0x3800000ffc001c02ULL, + 0x1800000008a09c04ULL, + 0x1800000004a05c04ULL, + 0x2000000000000007ULL, + 0x30ea00801c209c02ULL, + 0x1800000000a01c04ULL, + 0x30ea00801c105c02ULL, + 0x30ea00801c001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0x2000000000000007ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x7000c02828005c03ULL, + 0x180000000400dde2ULL, + 0x7000c02850009c03ULL, + 0x3800000ffc001c02ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x198000000020dc04ULL, + 0x1900000000209c04ULL, + 0x30ee02020430dc02ULL, + 0x2000000000000007ULL, + 0x1880000000205c04ULL, + 0x30ee020204209c02ULL, + 0x1800000000201c04ULL, + 0x30ee020204105c02ULL, + 0x30ee020204001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x2000000000000007ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x198000000020de04ULL, + 0x1900000000209e04ULL, + 0x30f004081030dc02ULL, + 0x1880000000205e04ULL, + 0x2000000000000007ULL, + 0x30f0040810209c02ULL, + 0x1800000000201e04ULL, + 0x30f0040810105c02ULL, + 0x30f0040810001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0x2000000000000007ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x1d8000000020de84ULL, + 0x1d00000000209e84ULL, + 0x1c80000000205e84ULL, + 0x1c00000000201e84ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x1d8000000020dc04ULL, + 0x1d00000000209c04ULL, + 0x1c80000000205c04ULL, + 0x2000000000000007ULL, + 0x1c00000000201c04ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x7000c01814005c03ULL, + 0x2000000000000007ULL, + 0x18fe00000000dde2ULL, + 0x7000c0142c009c03ULL, + 0x380000007c001c02ULL, + 0x1800000008209c04ULL, + 0x1800000004205c04ULL, + 0x30f4108420209c02ULL, + 0x1800000000201c04ULL, + 0x2000000000000007ULL, + 0x30f2082084105c02ULL, + 0x30f4108420001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x2000000000000007ULL, + 0x7000c01414005c03ULL, + 0x7000c01428009c03ULL, + 0x380000007c001c02ULL, + 0x18fe00000000dde2ULL, + 0x1800000008209c04ULL, + 0x1800000004205c04ULL, + 0x1800000000201c04ULL, + 0x2000000000000007ULL, + 0x30f4108420209c02ULL, + 0x30f4108420105c02ULL, + 0x30f4108420001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0x2000000000000007ULL, + 0xd400400008400785ULL, + 0x1900000000a05c04ULL, + 0x1800000000a01c04ULL, + 0x30de0001d0105c02ULL, + 0x30de0001d0001c02ULL, + 0x1800000000009de2ULL, + 0x18fe00000000dde2ULL, + 0x2000000000000007ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x18fe00000000dde2ULL, + 0x1900000000a05e04ULL, + 0x2000000000000007ULL, + 0x1800000000009de2ULL, + 0x1800000000a01e04ULL, + 0x30e000061c105c02ULL, + 0x30e000061c001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0x2000000000000007ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x180000000400dde2ULL, + 0x1d00000000a05e84ULL, + 0x1800000000009de2ULL, + 0x1c00000000a01e84ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x180000000400dde2ULL, + 0x1d00000000a05c04ULL, + 0x1800000000009de2ULL, + 0x2000000000000007ULL, + 0x1c00000000a01c04ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x18fe00000000dde2ULL, + 0x2000000000000007ULL, + 0x1100000000a05c04ULL, + 0x1800000000009de2ULL, + 0x1000000000a01c04ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400985ULL, + 0x2000000000000007ULL, + 0xd400400008400785ULL, + 0x18fe00000000dde2ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0xd400400008400485ULL, + 0x0c5400000013dc04ULL, + 0x2000000000000007ULL, + 0xd400400008400985ULL, + 0xd400400008400785ULL, + 0x180000000400dde2ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x2000000000000007ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x18fe00000000dde2ULL, + 0x1880000000205c04ULL, + 0x1800000000009de2ULL, + 0x1800000000201c04ULL, + 0x2000000000000007ULL, + 0x30ee020204105c02ULL, + 0x30ee020204001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x2000000000000007ULL, + 0x18fe00000000dde2ULL, + 0x1880000000205e04ULL, + 0x1800000000009de2ULL, + 0x1800000000201e04ULL, + 0x30f0040810105c02ULL, + 0x30f0040810001c02ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x180000000400dde2ULL, + 0x1c80000000205c04ULL, + 0x1800000000009de2ULL, + 0x2000000000000007ULL, + 0x1c00000000201c04ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x180000000400dde2ULL, + 0x2000000000000007ULL, + 0x1c80000000205e84ULL, + 0x1800000000009de2ULL, + 0x1c00000000201e84ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0x2000000000000007ULL, + 0xd400400008400745ULL, + 0x18fe00000000dde2ULL, + 0x1800000000a01c04ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x30de0001d0001c02ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x18fe00000000dde2ULL, + 0x1800000000a01e04ULL, + 0x1800000000009de2ULL, + 0x2000000000000007ULL, + 0x1800000000005de2ULL, + 0x30e000061c001c02ULL, + 0x9000000000001de7ULL, + 0xd400400008400465ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400965ULL, + 0xd400400008400765ULL, + 0x2000000000000007ULL, + 0x180000000400dde2ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400945ULL, + 0x2000000000000007ULL, + 0xd400400008400745ULL, + 0x180000000400dde2ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0xd400400008400445ULL, + 0x0c5400000013dc04ULL, + 0x2000000000000007ULL, + 0xd400400008400945ULL, + 0xd400400008400745ULL, + 0x18fe00000000dde2ULL, + 0x1800000000009de2ULL, + 0x1000000000a01c04ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400405ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400905ULL, + 0xd400400008400705ULL, + 0x18fe00000000dde2ULL, + 0x1800000000201c04ULL, + 0x1800000000009de2ULL, + 0x2000000000000007ULL, + 0x30ee020204001c02ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0xd400400008400405ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400905ULL, + 0xd400400008400705ULL, + 0x2000000000000007ULL, + 0x18fe00000000dde2ULL, + 0x1800000000201e04ULL, + 0x1800000000009de2ULL, + 0x30f0040810001c02ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0xd400400008400425ULL, + 0x2000000000000007ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400925ULL, + 0xd400400008400725ULL, + 0x180000000400dde2ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x9000000000001de7ULL, + 0x2000000000000007ULL, + 0xd400400008400405ULL, + 0x0c5400000013dc04ULL, + 0xd400400008400905ULL, + 0xd400400008400705ULL, + 0x180000000400dde2ULL, + 0x1800000000009de2ULL, + 0x1800000000005de2ULL, + 0x2000000000000007ULL, + 0x9000000000001de7ULL, + 0xd40040000840c485ULL, + 0x0c5400000013dc04ULL, + 0xd40040000840c985ULL, + 0xd40040000840c785ULL, + 0x18fe00000000dde2ULL, + 0x4000000000001de4ULL, + 0x9000000000001de7ULL, + 0x4000000000001de4ULL, + 0x9000000000001de7ULL, + 0x4000000000001de4ULL, + 0x9000000000001de7ULL, + 0xc800000003f01cc5ULL, + 0x2c00000100005c04ULL, + 0x2c0000010800dc04ULL, + 0x3000c3fffff09c04ULL, + 0x680100000c1fdc03ULL, + 0x4000000a60001c47ULL, + 0x180000004000dde2ULL, + 0x78000009c0000007ULL, + 0x0c0000000430dd02ULL, + 0x4003ffffa0001ca7ULL, + 0x2800406400001de4ULL, + 0x2800406410005de4ULL, + 0x180000000400dde2ULL, + 0x547e18000000dd05ULL, + 0x60000008e0000007ULL, + 0x190ec0000431dc03ULL, + 0x40000000000001f4ULL, + 0x94000004c0009c85ULL, + 0x2c00000100009c04ULL, + 0x2c0000010800dc04ULL, + 0x9400000020009ca5ULL, + 0x9400000100011cc5ULL, + 0x9400000140021cc5ULL, + 0x9400000180031cc5ULL, + 0x94000001c0041cc5ULL, + 0x9400000200051cc5ULL, + 0x9400000240061cc5ULL, + 0x9400000280071cc5ULL, + 0x94000002c0081cc5ULL, + 0x9400000300091cc5ULL, + 0x94000003400a1cc5ULL, + 0x94000003800b1cc5ULL, + 0x94000003c00c1cc5ULL, + 0x94000004000d1cc5ULL, + 0x94000004400e1cc5ULL, + 0x94000004800f1cc5ULL, + 0xc000000003f09ea5ULL, + 0x94000000c0009ca5ULL, + 0xc000000023f09ea5ULL, + 0x94000000e0009ca5ULL, + 0x2c00000084009c04ULL, + 0x2c0000008800dc04ULL, + 0x9400000040009ca5ULL, + 0x2c0000008c009c04ULL, + 0x2c0000009400dc04ULL, + 0x9400000060009ca5ULL, + 0x2c00000098009c04ULL, + 0x2c0000009c00dc04ULL, + 0x9400000080009ca5ULL, + 0x2c000000c800dc04ULL, + 0x0c0000001030dd02ULL, + 0x4000000100001ea7ULL, + 0x480100000c001c03ULL, + 0x0800000000105c42ULL, + 0xc100000000309c85ULL, + 0x9400000500009c85ULL, + 0x0c00000010001d02ULL, + 0x0800000000105d42ULL, + 0x0c0000001030dd02ULL, + 0x4003ffff40001ca7ULL, + 0x2800406420001de4ULL, + 0x2800406430005de4ULL, + 0xe000000000001c45ULL, + 0xd000000003ffdcc5ULL, + 0x9c000000000fdcc5ULL, + 0x2c0000000c009c04ULL, + 0x7000c0205020dc03ULL, + 0x7000c01820209c03ULL, + 0x5000406450209c03ULL, + 0x500040644030dc03ULL, + 0x480000000c209c03ULL, + 0x4801000008001c03ULL, + 0x0800000000105c42ULL, + 0x280040646000dde4ULL, + 0x8400000020009f05ULL, + 0x190ec0002821dc03ULL, + 0x40000000800001e7ULL, + 0x0c00000040001c02ULL, + 0x0800000000105c42ULL, + 0x0c0000004030dd02ULL, + 0x00029dff0ffc5cbfULL, + 0x8400000000009f85ULL, + 0x2800406400001de4ULL, + 0x2800406410005de4ULL, + 0x9400000010009c85ULL, + 0x4000000000001df4ULL, + 0x9800000003ffdcc5ULL, + 0xd000000000008007ULL, + 0xa000000000004007ULL, + 0xd000000000008007ULL, + 0x3400c3fffc201c04ULL, + 0xc000000003f01ec5ULL, + 0xa000000000000007ULL +}; + +static const uint16_t nve4_builtin_offsets[NVC0_BUILTIN_COUNT] = +{ + 0x0000, + 0x00f0, + 0x0f08, + 0x0f18, +}; diff --git a/src/gallium/drivers/nouveau/codegen/target_lib_nvf0.asm.h b/src/gallium/drivers/nouveau/codegen/target_lib_nvf0.asm.h new file mode 100644 index 00000000000..d10b6b07693 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/target_lib_nvf0.asm.h @@ -0,0 +1,13 @@ + +static const uint64_t nvf0_builtin_code[] = +{ + 0x19000000001c003cULL, +}; + +static const uint16_t nvf0_builtin_offsets[NVC0_BUILTIN_COUNT] = +{ + 0, + 0, + 0, + 0 +}; diff --git a/src/gallium/drivers/nouveau/nouveau_mm.c b/src/gallium/drivers/nouveau/nouveau_mm.c index 439c0fb2255..87f2f460a9d 100644 --- a/src/gallium/drivers/nouveau/nouveau_mm.c +++ b/src/gallium/drivers/nouveau/nouveau_mm.c @@ -296,4 +296,3 @@ nouveau_mm_destroy(struct nouveau_mman *cache) FREE(cache); } - diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index d129a55b387..5b35ee47c12 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -85,7 +85,7 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen, struct nouveau_device *dev = nouveau_screen(pscreen)->device; struct nouveau_bo *bo = 0; int ret; - + ret = nouveau_bo_name_ref(dev, whandle->handle, &bo); if (ret) { debug_printf("%s: ref name 0x%08x failed with %d\n", @@ -106,7 +106,7 @@ nouveau_screen_bo_get_handle(struct pipe_screen *pscreen, { whandle->stride = stride; - if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { return nouveau_bo_name_get(bo, &whandle->handle) == 0; } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { whandle->handle = bo->handle; diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c index 8e08cab4b2d..7deea881a25 100644 --- a/src/gallium/drivers/nouveau/nouveau_video.c +++ b/src/gallium/drivers/nouveau/nouveau_video.c @@ -27,7 +27,7 @@ #include "nouveau_context.h" #include "nouveau_video.h" -#include "nouveau/nouveau_buffer.h" +#include "nouveau_buffer.h" #include "util/u_video.h" #include "util/u_format.h" #include "util/u_sampler.h" diff --git a/src/gallium/drivers/nouveau/nv30/nv01_2d.xml.h b/src/gallium/drivers/nouveau/nv30/nv01_2d.xml.h new file mode 100644 index 00000000000..afae00cd0a2 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv01_2d.xml.h @@ -0,0 +1,1416 @@ +#ifndef _HOME_SKEGGSB_GIT_ENVYTOOLS_RNNDB_NV01_2D_XML +#define _HOME_SKEGGSB_GIT_ENVYTOOLS_RNNDB_NV01_2D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- /home/skeggsb/git/envytools/rnndb/nv_objects.xml ( 794 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6452 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv_m2mf.xml ( 2696 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv_object.xml ( 12672 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 3617 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv_defs.xml ( 4437 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv50_defs.xml ( 5468 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nvc0_m2mf.xml ( 2687 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv01_2d.xml ( 32584 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv04_dvd.xml ( 3000 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv03_3d.xml ( 5209 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv04_3d.xml ( 17759 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv_3ddefs.xml ( 16394 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv10_3d.xml ( 18437 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv20_3d.xml ( 21107 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv30-40_3d.xml ( 31987 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv50_2d.xml ( 11113 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv50_3d.xml ( 65233 bytes, from 2011-11-30 05:49:35) +- /home/skeggsb/git/envytools/rnndb/nv50_compute.xml ( 14012 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nv84_crypt.xml ( 2071 bytes, from 2011-11-30 05:49:35) +- /home/skeggsb/git/envytools/rnndb/nv31_mpeg.xml ( 2269 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/nvc0_3d.xml ( 52547 bytes, from 2011-11-30 05:49:35) +- /home/skeggsb/git/envytools/rnndb/nvc0_compute.xml ( 10865 bytes, from 2011-10-22 08:01:09) +- /home/skeggsb/git/envytools/rnndb/blob_nvc0_pcopy.xml ( 4516 bytes, from 2011-10-22 08:01:09) + +Copyright (C) 2006-2011 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NV01_BETA_DMA_NOTIFY 0x00000180 + +#define NV01_BETA_BETA_1D31 0x00000300 + + +#define NV04_BETA4_DMA_NOTIFY 0x00000180 + +#define NV04_BETA4_BETA_FACTOR 0x00000300 + + +#define NV01_CHROMA_DMA_NOTIFY 0x00000180 + +#define NV01_CHROMA_COLOR_FORMAT 0x00000300 +#define NV01_CHROMA_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV01_CHROMA_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV01_CHROMA_COLOR_FORMAT_A8R8G8B8 0x00000003 + +#define NV01_CHROMA_COLOR 0x00000304 + + +#define NV01_PATTERN_DMA_NOTIFY 0x00000180 + +#define NV01_PATTERN_COLOR_FORMAT 0x00000300 +#define NV01_PATTERN_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV01_PATTERN_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV01_PATTERN_COLOR_FORMAT_A8R8G8B8 0x00000003 + +#define NV01_PATTERN_MONOCHROME_FORMAT 0x00000304 +#define NV01_PATTERN_MONOCHROME_FORMAT_CGA6 0x00000001 +#define NV01_PATTERN_MONOCHROME_FORMAT_LE 0x00000002 + +#define NV01_PATTERN_MONOCHROME_SHAPE 0x00000308 +#define NV01_PATTERN_MONOCHROME_SHAPE_8X8 0x00000000 +#define NV01_PATTERN_MONOCHROME_SHAPE_64X1 0x00000001 +#define NV01_PATTERN_MONOCHROME_SHAPE_1X64 0x00000002 + +#define NV04_PATTERN_PATTERN_SELECT 0x0000030c +#define NV04_PATTERN_PATTERN_SELECT_MONO 0x00000001 +#define NV04_PATTERN_PATTERN_SELECT_COLOR 0x00000002 + +#define NV01_PATTERN_MONOCHROME_COLOR(i0) (0x00000310 + 0x4*(i0)) +#define NV01_PATTERN_MONOCHROME_COLOR__ESIZE 0x00000004 +#define NV01_PATTERN_MONOCHROME_COLOR__LEN 0x00000002 + +#define NV01_PATTERN_MONOCHROME_PATTERN(i0) (0x00000318 + 0x4*(i0)) +#define NV01_PATTERN_MONOCHROME_PATTERN__ESIZE 0x00000004 +#define NV01_PATTERN_MONOCHROME_PATTERN__LEN 0x00000002 + +#define NV04_PATTERN_PATTERN_Y8(i0) (0x00000400 + 0x4*(i0)) +#define NV04_PATTERN_PATTERN_Y8__ESIZE 0x00000004 +#define NV04_PATTERN_PATTERN_Y8__LEN 0x00000010 +#define NV04_PATTERN_PATTERN_Y8_Y0__MASK 0x000000ff +#define NV04_PATTERN_PATTERN_Y8_Y0__SHIFT 0 +#define NV04_PATTERN_PATTERN_Y8_Y1__MASK 0x0000ff00 +#define NV04_PATTERN_PATTERN_Y8_Y1__SHIFT 8 +#define NV04_PATTERN_PATTERN_Y8_Y2__MASK 0x00ff0000 +#define NV04_PATTERN_PATTERN_Y8_Y2__SHIFT 16 +#define NV04_PATTERN_PATTERN_Y8_Y3__MASK 0xff000000 +#define NV04_PATTERN_PATTERN_Y8_Y3__SHIFT 24 + +#define NV04_PATTERN_PATTERN_R5G6B5(i0) (0x00000500 + 0x4*(i0)) +#define NV04_PATTERN_PATTERN_R5G6B5__ESIZE 0x00000004 +#define NV04_PATTERN_PATTERN_R5G6B5__LEN 0x00000020 +#define NV04_PATTERN_PATTERN_R5G6B5_B0__MASK 0x0000001f +#define NV04_PATTERN_PATTERN_R5G6B5_B0__SHIFT 0 +#define NV04_PATTERN_PATTERN_R5G6B5_G0__MASK 0x000007e0 +#define NV04_PATTERN_PATTERN_R5G6B5_G0__SHIFT 5 +#define NV04_PATTERN_PATTERN_R5G6B5_R0__MASK 0x0000f800 +#define NV04_PATTERN_PATTERN_R5G6B5_R0__SHIFT 11 +#define NV04_PATTERN_PATTERN_R5G6B5_B1__MASK 0x001f0000 +#define NV04_PATTERN_PATTERN_R5G6B5_B1__SHIFT 16 +#define NV04_PATTERN_PATTERN_R5G6B5_G1__MASK 0x07e00000 +#define NV04_PATTERN_PATTERN_R5G6B5_G1__SHIFT 21 +#define NV04_PATTERN_PATTERN_R5G6B5_R1__MASK 0xf8000000 +#define NV04_PATTERN_PATTERN_R5G6B5_R1__SHIFT 27 + +#define NV04_PATTERN_PATTERN_X1R5G5B5(i0) (0x00000600 + 0x4*(i0)) +#define NV04_PATTERN_PATTERN_X1R5G5B5__ESIZE 0x00000004 +#define NV04_PATTERN_PATTERN_X1R5G5B5__LEN 0x00000020 +#define NV04_PATTERN_PATTERN_X1R5G5B5_B0__MASK 0x0000001f +#define NV04_PATTERN_PATTERN_X1R5G5B5_B0__SHIFT 0 +#define NV04_PATTERN_PATTERN_X1R5G5B5_G0__MASK 0x000003e0 +#define NV04_PATTERN_PATTERN_X1R5G5B5_G0__SHIFT 5 +#define NV04_PATTERN_PATTERN_X1R5G5B5_R0__MASK 0x00007c00 +#define NV04_PATTERN_PATTERN_X1R5G5B5_R0__SHIFT 10 +#define NV04_PATTERN_PATTERN_X1R5G5B5_B1__MASK 0x001f0000 +#define NV04_PATTERN_PATTERN_X1R5G5B5_B1__SHIFT 16 +#define NV04_PATTERN_PATTERN_X1R5G5B5_G1__MASK 0x03e00000 +#define NV04_PATTERN_PATTERN_X1R5G5B5_G1__SHIFT 21 +#define NV04_PATTERN_PATTERN_X1R5G5B5_R1__MASK 0x7c000000 +#define NV04_PATTERN_PATTERN_X1R5G5B5_R1__SHIFT 26 + +#define NV04_PATTERN_PATTERN_X8R8G8B8(i0) (0x00000700 + 0x4*(i0)) +#define NV04_PATTERN_PATTERN_X8R8G8B8__ESIZE 0x00000004 +#define NV04_PATTERN_PATTERN_X8R8G8B8__LEN 0x00000040 +#define NV04_PATTERN_PATTERN_X8R8G8B8_B__MASK 0x000000ff +#define NV04_PATTERN_PATTERN_X8R8G8B8_B__SHIFT 0 +#define NV04_PATTERN_PATTERN_X8R8G8B8_G__MASK 0x0000ff00 +#define NV04_PATTERN_PATTERN_X8R8G8B8_G__SHIFT 8 +#define NV04_PATTERN_PATTERN_X8R8G8B8_R__MASK 0x00ff0000 +#define NV04_PATTERN_PATTERN_X8R8G8B8_R__SHIFT 16 + + +#define NV01_CLIP_DMA_NOTIFY 0x00000180 + +#define NV01_CLIP_POINT 0x00000300 +#define NV01_CLIP_POINT_X__MASK 0x0000ffff +#define NV01_CLIP_POINT_X__SHIFT 0 +#define NV01_CLIP_POINT_Y__MASK 0xffff0000 +#define NV01_CLIP_POINT_Y__SHIFT 16 + +#define NV01_CLIP_SIZE 0x00000304 +#define NV01_CLIP_SIZE_W__MASK 0x0000ffff +#define NV01_CLIP_SIZE_W__SHIFT 0 +#define NV01_CLIP_SIZE_H__MASK 0xffff0000 +#define NV01_CLIP_SIZE_H__SHIFT 16 + + +#define NV01_ROP_DMA_NOTIFY 0x00000180 + +#define NV01_ROP_ROP 0x00000300 + + +#define NV04_SURFACE_2D_DMA_NOTIFY 0x00000180 + +#define NV04_SURFACE_2D_DMA_IMAGE_SOURCE 0x00000184 + +#define NV04_SURFACE_2D_DMA_IMAGE_DESTIN 0x00000188 + + +#define NV50_SURFACE_2D_SRC_LINEAR 0x00000200 + +#define NV50_SURFACE_2D_SRC_TILE_MODE 0x00000204 + +#define NV50_SURFACE_2D_SRC_WIDTH 0x00000208 + +#define NV50_SURFACE_2D_SRC_HEIGHT 0x0000020c + +#define NV50_SURFACE_2D_UNK0210 0x00000210 + +#define NV50_SURFACE_2D_UNK0214 0x00000214 + +#define NV50_SURFACE_2D_DST_LINEAR 0x00000218 + +#define NV50_SURFACE_2D_DST_TILE_MODE 0x0000021c + +#define NV50_SURFACE_2D_DST_WIDTH 0x00000220 + +#define NV50_SURFACE_2D_DST_HEIGHT 0x00000224 + +#define NV50_SURFACE_2D_UNK0228 0x00000228 + +#define NV50_SURFACE_2D_UNK022C 0x0000022c + +#define NV50_SURFACE_2D_OFFSET_SOURCE_HIGH 0x00000230 + +#define NV50_SURFACE_2D_OFFSET_DESTIN_HIGH 0x00000234 + +#define NV04_SURFACE_2D_FORMAT 0x00000300 +#define NV04_SURFACE_2D_FORMAT_Y8 0x00000001 +#define NV04_SURFACE_2D_FORMAT_X1R5G5B5_Z1R5G5B5 0x00000002 +#define NV04_SURFACE_2D_FORMAT_X1R5G5B5_X1R5G5B5 0x00000003 +#define NV04_SURFACE_2D_FORMAT_R5G6B5 0x00000004 +#define NV04_SURFACE_2D_FORMAT_Y16 0x00000005 +#define NV04_SURFACE_2D_FORMAT_X8R8G8B8_Z8R8G8B8 0x00000006 +#define NV04_SURFACE_2D_FORMAT_X8R8G8B8_X8R8G8B8 0x00000007 +#define NV04_SURFACE_2D_FORMAT_X1A7R8G8B8_Z1A7R8G8B8 0x00000008 +#define NV04_SURFACE_2D_FORMAT_X1A7R8G8B8_X1A7R8G8B8 0x00000009 +#define NV04_SURFACE_2D_FORMAT_A8R8G8B8 0x0000000a +#define NV04_SURFACE_2D_FORMAT_Y32 0x0000000b + +#define NV04_SURFACE_2D_PITCH 0x00000304 +#define NV04_SURFACE_2D_PITCH_SOURCE__MASK 0x0000ffff +#define NV04_SURFACE_2D_PITCH_SOURCE__SHIFT 0 +#define NV04_SURFACE_2D_PITCH_DESTIN__MASK 0xffff0000 +#define NV04_SURFACE_2D_PITCH_DESTIN__SHIFT 16 + +#define NV04_SURFACE_2D_OFFSET_SOURCE 0x00000308 + +#define NV04_SURFACE_2D_OFFSET_DESTIN 0x0000030c + + +#define NV04_SURFACE_SWZ_DMA_NOTIFY 0x00000180 + +#define NV04_SURFACE_SWZ_DMA_IMAGE 0x00000184 + +#define NV04_SURFACE_SWZ_FORMAT 0x00000300 +#define NV04_SURFACE_SWZ_FORMAT_COLOR__MASK 0x000000ff +#define NV04_SURFACE_SWZ_FORMAT_COLOR__SHIFT 0 +#define NV04_SURFACE_SWZ_FORMAT_COLOR_Y8 0x00000001 +#define NV04_SURFACE_SWZ_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5 0x00000002 +#define NV04_SURFACE_SWZ_FORMAT_COLOR_X1R5G5B5_X1R5G5B5 0x00000003 +#define NV04_SURFACE_SWZ_FORMAT_COLOR_R5G6B5 0x00000004 +#define NV04_SURFACE_SWZ_FORMAT_COLOR_Y16 0x00000005 +#define NV04_SURFACE_SWZ_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8 0x00000006 +#define NV04_SURFACE_SWZ_FORMAT_COLOR_X8R8G8B8_X8R8G8B8 0x00000007 +#define NV04_SURFACE_SWZ_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8 0x00000008 +#define NV04_SURFACE_SWZ_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8 0x00000009 +#define NV04_SURFACE_SWZ_FORMAT_COLOR_A8R8G8B8 0x0000000a +#define NV04_SURFACE_SWZ_FORMAT_COLOR_Y32 0x0000000b +#define NV04_SURFACE_SWZ_FORMAT_BASE_SIZE_U__MASK 0x00ff0000 +#define NV04_SURFACE_SWZ_FORMAT_BASE_SIZE_U__SHIFT 16 +#define NV04_SURFACE_SWZ_FORMAT_BASE_SIZE_V__MASK 0xff000000 +#define NV04_SURFACE_SWZ_FORMAT_BASE_SIZE_V__SHIFT 24 + +#define NV04_SURFACE_SWZ_OFFSET 0x00000304 + + +#define NV01_POINT_PATCH 0x0000010c + +#define NV01_POINT_DMA_NOTIFY 0x00000180 + +#define NV01_POINT_CLIP 0x00000184 + +#define NV01_POINT_PATTERN 0x00000188 + +#define NV01_POINT_ROP 0x0000018c + +#define NV01_POINT_BETA 0x00000190 + +#define NV01_POINT_SURFACE_DST 0x00000194 + +#define NV01_POINT_OPERATION 0x000002fc +#define NV01_POINT_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_POINT_OPERATION_ROP_AND 0x00000001 +#define NV01_POINT_OPERATION_BLEND_AND 0x00000002 +#define NV01_POINT_OPERATION_SRCCOPY 0x00000003 +#define NV01_POINT_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_POINT_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV01_POINT_COLOR_FORMAT 0x00000300 +#define NV01_POINT_COLOR_FORMAT_X16A8Y8 0x00000001 +#define NV01_POINT_COLOR_FORMAT_X24Y8 0x00000002 +#define NV01_POINT_COLOR_FORMAT_X16A1R5G5B5 0x00000003 +#define NV01_POINT_COLOR_FORMAT_X17R5G5B5 0x00000004 +#define NV01_POINT_COLOR_FORMAT_A8R8G8B8 0x00000005 +#define NV01_POINT_COLOR_FORMAT_X8R8G8B8 0x00000006 +#define NV01_POINT_COLOR_FORMAT_A16Y16 0x00000007 +#define NV01_POINT_COLOR_FORMAT_X16Y16 0x00000008 + +#define NV01_POINT_COLOR 0x00000304 + +#define NV01_POINT_POINT(i0) (0x00000400 + 0x4*(i0)) +#define NV01_POINT_POINT__ESIZE 0x00000004 +#define NV01_POINT_POINT__LEN 0x00000020 +#define NV01_POINT_POINT_X__MASK 0x0000ffff +#define NV01_POINT_POINT_X__SHIFT 0 +#define NV01_POINT_POINT_Y__MASK 0xffff0000 +#define NV01_POINT_POINT_Y__SHIFT 16 + +#define NV01_POINT_POINT32_X(i0) (0x00000480 + 0x8*(i0)) +#define NV01_POINT_POINT32_X__ESIZE 0x00000008 +#define NV01_POINT_POINT32_X__LEN 0x00000010 + +#define NV01_POINT_POINT32_Y(i0) (0x00000484 + 0x8*(i0)) +#define NV01_POINT_POINT32_Y__ESIZE 0x00000008 +#define NV01_POINT_POINT32_Y__LEN 0x00000010 + +#define NV01_POINT_CPOINT_COLOR(i0) (0x00000500 + 0x8*(i0)) +#define NV01_POINT_CPOINT_COLOR__ESIZE 0x00000008 +#define NV01_POINT_CPOINT_COLOR__LEN 0x00000010 + +#define NV01_POINT_CPOINT_POINT(i0) (0x00000504 + 0x8*(i0)) +#define NV01_POINT_CPOINT_POINT__ESIZE 0x00000008 +#define NV01_POINT_CPOINT_POINT__LEN 0x00000010 +#define NV01_POINT_CPOINT_POINT_X__MASK 0x0000ffff +#define NV01_POINT_CPOINT_POINT_X__SHIFT 0 +#define NV01_POINT_CPOINT_POINT_Y__MASK 0xffff0000 +#define NV01_POINT_CPOINT_POINT_Y__SHIFT 16 + + +#define NV01_LINE_PATCH 0x0000010c + +#define NV01_LINE_DMA_NOTIFY 0x00000180 + +#define NV01_LINE_CLIP 0x00000184 + +#define NV01_LINE_PATTERN 0x00000188 + +#define NV04_LIN_PATTERN 0x00000188 + +#define NV01_LINE_ROP 0x0000018c + +#define NV01_LINE_BETA 0x00000190 + +#define NV01_LINE_SURFACE_DST 0x00000194 + + +#define NV04_LIN_BETA4 0x00000194 + +#define NV04_LIN_SURFACE 0x00000198 + +#define NV01_LINE_OPERATION 0x000002fc +#define NV01_LINE_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_LINE_OPERATION_ROP_AND 0x00000001 +#define NV01_LINE_OPERATION_BLEND_AND 0x00000002 +#define NV01_LINE_OPERATION_SRCCOPY 0x00000003 +#define NV01_LINE_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_LINE_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV01_LINE_COLOR_FORMAT 0x00000300 +#define NV01_LINE_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV01_LINE_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV01_LINE_COLOR_FORMAT_A8R8G8B8 0x00000003 + +#define NV01_LINE_COLOR 0x00000304 + +#define NV01_LINE_LINE_POINT0(i0) (0x00000400 + 0x8*(i0)) +#define NV01_LINE_LINE_POINT0__ESIZE 0x00000008 +#define NV01_LINE_LINE_POINT0__LEN 0x00000010 +#define NV01_LINE_LINE_POINT0_X__MASK 0x0000ffff +#define NV01_LINE_LINE_POINT0_X__SHIFT 0 +#define NV01_LINE_LINE_POINT0_Y__MASK 0xffff0000 +#define NV01_LINE_LINE_POINT0_Y__SHIFT 16 + +#define NV01_LINE_LINE_POINT1(i0) (0x00000404 + 0x8*(i0)) +#define NV01_LINE_LINE_POINT1__ESIZE 0x00000008 +#define NV01_LINE_LINE_POINT1__LEN 0x00000010 +#define NV01_LINE_LINE_POINT1_X__MASK 0x0000ffff +#define NV01_LINE_LINE_POINT1_X__SHIFT 0 +#define NV01_LINE_LINE_POINT1_Y__MASK 0xffff0000 +#define NV01_LINE_LINE_POINT1_Y__SHIFT 16 + +#define NV01_LINE_LINE32_POINT0_X(i0) (0x00000480 + 0x10*(i0)) +#define NV01_LINE_LINE32_POINT0_X__ESIZE 0x00000010 +#define NV01_LINE_LINE32_POINT0_X__LEN 0x00000008 + +#define NV01_LINE_LINE32_POINT0_Y(i0) (0x00000484 + 0x10*(i0)) +#define NV01_LINE_LINE32_POINT0_Y__ESIZE 0x00000010 +#define NV01_LINE_LINE32_POINT0_Y__LEN 0x00000008 + +#define NV01_LINE_LINE32_POINT1_X(i0) (0x00000488 + 0x10*(i0)) +#define NV01_LINE_LINE32_POINT1_X__ESIZE 0x00000010 +#define NV01_LINE_LINE32_POINT1_X__LEN 0x00000008 + +#define NV01_LINE_LINE32_POINT1_Y(i0) (0x0000048c + 0x10*(i0)) +#define NV01_LINE_LINE32_POINT1_Y__ESIZE 0x00000010 +#define NV01_LINE_LINE32_POINT1_Y__LEN 0x00000008 + +#define NV01_LINE_POLYLINE(i0) (0x00000500 + 0x4*(i0)) +#define NV01_LINE_POLYLINE__ESIZE 0x00000004 +#define NV01_LINE_POLYLINE__LEN 0x00000020 +#define NV01_LINE_POLYLINE_X__MASK 0x0000ffff +#define NV01_LINE_POLYLINE_X__SHIFT 0 +#define NV01_LINE_POLYLINE_Y__MASK 0xffff0000 +#define NV01_LINE_POLYLINE_Y__SHIFT 16 + +#define NV01_LINE_POLYLINE32_POINT_X(i0) (0x00000580 + 0x8*(i0)) +#define NV01_LINE_POLYLINE32_POINT_X__ESIZE 0x00000008 +#define NV01_LINE_POLYLINE32_POINT_X__LEN 0x00000010 + +#define NV01_LINE_POLYLINE32_POINT_Y(i0) (0x00000584 + 0x8*(i0)) +#define NV01_LINE_POLYLINE32_POINT_Y__ESIZE 0x00000008 +#define NV01_LINE_POLYLINE32_POINT_Y__LEN 0x00000010 + +#define NV01_LINE_CPOLYLINE_COLOR(i0) (0x00000600 + 0x8*(i0)) +#define NV01_LINE_CPOLYLINE_COLOR__ESIZE 0x00000008 +#define NV01_LINE_CPOLYLINE_COLOR__LEN 0x00000010 + +#define NV01_LINE_CPOLYLINE_POINT(i0) (0x00000604 + 0x8*(i0)) +#define NV01_LINE_CPOLYLINE_POINT__ESIZE 0x00000008 +#define NV01_LINE_CPOLYLINE_POINT__LEN 0x00000010 +#define NV01_LINE_CPOLYLINE_POINT_X__MASK 0x0000ffff +#define NV01_LINE_CPOLYLINE_POINT_X__SHIFT 0 +#define NV01_LINE_CPOLYLINE_POINT_Y__MASK 0xffff0000 +#define NV01_LINE_CPOLYLINE_POINT_Y__SHIFT 16 + + +#define NV01_TRI_PATCH 0x0000010c + +#define NV01_TRI_DMA_NOTIFY 0x00000180 + +#define NV01_TRI_CLIP 0x00000184 + +#define NV01_TRI_PATTERN 0x00000188 + +#define NV04_TRI_PATTERN 0x00000188 + +#define NV01_TRI_ROP 0x0000018c + +#define NV01_TRI_BETA 0x00000190 + +#define NV01_TRI_SURFACE_DST 0x00000194 + + +#define NV04_TRI_BETA4 0x00000194 + +#define NV04_TRI_SURFACE 0x00000198 + +#define NV01_TRI_OPERATION 0x000002fc +#define NV01_TRI_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_TRI_OPERATION_ROP_AND 0x00000001 +#define NV01_TRI_OPERATION_BLEND_AND 0x00000002 +#define NV01_TRI_OPERATION_SRCCOPY 0x00000003 +#define NV01_TRI_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_TRI_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV01_TRI_COLOR_FORMAT 0x00000300 +#define NV01_TRI_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV01_TRI_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV01_TRI_COLOR_FORMAT_A8R8G8B8 0x00000003 + +#define NV01_TRI_COLOR 0x00000304 + +#define NV01_TRI_TRIANGLE_POINT0 0x00000310 +#define NV01_TRI_TRIANGLE_POINT0_X__MASK 0x0000ffff +#define NV01_TRI_TRIANGLE_POINT0_X__SHIFT 0 +#define NV01_TRI_TRIANGLE_POINT0_Y__MASK 0xffff0000 +#define NV01_TRI_TRIANGLE_POINT0_Y__SHIFT 16 + +#define NV01_TRI_TRIANGLE_POINT1 0x00000314 +#define NV01_TRI_TRIANGLE_POINT1_X__MASK 0x0000ffff +#define NV01_TRI_TRIANGLE_POINT1_X__SHIFT 0 +#define NV01_TRI_TRIANGLE_POINT1_Y__MASK 0xffff0000 +#define NV01_TRI_TRIANGLE_POINT1_Y__SHIFT 16 + +#define NV01_TRI_TRIANGLE_POINT2 0x00000318 +#define NV01_TRI_TRIANGLE_POINT2_X__MASK 0x0000ffff +#define NV01_TRI_TRIANGLE_POINT2_X__SHIFT 0 +#define NV01_TRI_TRIANGLE_POINT2_Y__MASK 0xffff0000 +#define NV01_TRI_TRIANGLE_POINT2_Y__SHIFT 16 + +#define NV01_TRI_TRIANGLE32_POINT0_X 0x00000320 + +#define NV01_TRI_TRIANGLE32_POINT0_Y 0x00000324 + +#define NV01_TRI_TRIANGLE32_POINT1_X 0x00000328 + +#define NV01_TRI_TRIANGLE32_POINT1_Y 0x0000032c + +#define NV01_TRI_TRIANGLE32_POINT2_X 0x00000330 + +#define NV01_TRI_TRIANGLE32_POINT2_Y 0x00000334 + +#define NV01_TRI_TRIMESH(i0) (0x00000400 + 0x4*(i0)) +#define NV01_TRI_TRIMESH__ESIZE 0x00000004 +#define NV01_TRI_TRIMESH__LEN 0x00000020 +#define NV01_TRI_TRIMESH_X__MASK 0x0000ffff +#define NV01_TRI_TRIMESH_X__SHIFT 0 +#define NV01_TRI_TRIMESH_Y__MASK 0xffff0000 +#define NV01_TRI_TRIMESH_Y__SHIFT 16 + +#define NV01_TRI_TRIMESH32_POINT_X(i0) (0x00000480 + 0x8*(i0)) +#define NV01_TRI_TRIMESH32_POINT_X__ESIZE 0x00000008 +#define NV01_TRI_TRIMESH32_POINT_X__LEN 0x00000010 + +#define NV01_TRI_TRIMESH32_POINT_Y(i0) (0x00000484 + 0x8*(i0)) +#define NV01_TRI_TRIMESH32_POINT_Y__ESIZE 0x00000008 +#define NV01_TRI_TRIMESH32_POINT_Y__LEN 0x00000010 + +#define NV01_TRI_CTRIANGLE_COLOR(i0) (0x00000500 + 0x10*(i0)) +#define NV01_TRI_CTRIANGLE_COLOR__ESIZE 0x00000010 +#define NV01_TRI_CTRIANGLE_COLOR__LEN 0x00000008 + +#define NV01_TRI_CTRIANGLE_POINT0(i0) (0x00000504 + 0x10*(i0)) +#define NV01_TRI_CTRIANGLE_POINT0__ESIZE 0x00000010 +#define NV01_TRI_CTRIANGLE_POINT0__LEN 0x00000008 +#define NV01_TRI_CTRIANGLE_POINT0_X__MASK 0x0000ffff +#define NV01_TRI_CTRIANGLE_POINT0_X__SHIFT 0 +#define NV01_TRI_CTRIANGLE_POINT0_Y__MASK 0xffff0000 +#define NV01_TRI_CTRIANGLE_POINT0_Y__SHIFT 16 + +#define NV01_TRI_CTRIANGLE_POINT1(i0) (0x00000508 + 0x10*(i0)) +#define NV01_TRI_CTRIANGLE_POINT1__ESIZE 0x00000010 +#define NV01_TRI_CTRIANGLE_POINT1__LEN 0x00000008 +#define NV01_TRI_CTRIANGLE_POINT1_X__MASK 0x0000ffff +#define NV01_TRI_CTRIANGLE_POINT1_X__SHIFT 0 +#define NV01_TRI_CTRIANGLE_POINT1_Y__MASK 0xffff0000 +#define NV01_TRI_CTRIANGLE_POINT1_Y__SHIFT 16 + +#define NV01_TRI_CTRIANGLE_POINT2(i0) (0x0000050c + 0x10*(i0)) +#define NV01_TRI_CTRIANGLE_POINT2__ESIZE 0x00000010 +#define NV01_TRI_CTRIANGLE_POINT2__LEN 0x00000008 +#define NV01_TRI_CTRIANGLE_POINT2_X__MASK 0x0000ffff +#define NV01_TRI_CTRIANGLE_POINT2_X__SHIFT 0 +#define NV01_TRI_CTRIANGLE_POINT2_Y__MASK 0xffff0000 +#define NV01_TRI_CTRIANGLE_POINT2_Y__SHIFT 16 + +#define NV01_TRI_CTRIMESH_COLOR(i0) (0x00000580 + 0x8*(i0)) +#define NV01_TRI_CTRIMESH_COLOR__ESIZE 0x00000008 +#define NV01_TRI_CTRIMESH_COLOR__LEN 0x00000010 + +#define NV01_TRI_CTRIMESH_POINT(i0) (0x00000584 + 0x8*(i0)) +#define NV01_TRI_CTRIMESH_POINT__ESIZE 0x00000008 +#define NV01_TRI_CTRIMESH_POINT__LEN 0x00000010 +#define NV01_TRI_CTRIMESH_POINT_X__MASK 0x0000ffff +#define NV01_TRI_CTRIMESH_POINT_X__SHIFT 0 +#define NV01_TRI_CTRIMESH_POINT_Y__MASK 0xffff0000 +#define NV01_TRI_CTRIMESH_POINT_Y__SHIFT 16 + + +#define NV01_RECT_PATCH 0x0000010c + +#define NV01_RECT_DMA_NOTIFY 0x00000180 + +#define NV01_RECT_CLIP 0x00000184 + +#define NV01_RECT_PATTERN 0x00000188 + +#define NV04_RECT_PATTERN 0x00000188 + +#define NV01_RECT_ROP 0x0000018c + +#define NV01_RECT_BETA 0x00000190 + +#define NV01_RECT_SURFACE_DST 0x00000194 + + +#define NV04_RECT_BETA4 0x00000194 + +#define NV04_RECT_SURFACE 0x00000198 + +#define NV01_RECT_OPERATION 0x000002fc +#define NV01_RECT_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_RECT_OPERATION_ROP_AND 0x00000001 +#define NV01_RECT_OPERATION_BLEND_AND 0x00000002 +#define NV01_RECT_OPERATION_SRCCOPY 0x00000003 +#define NV01_RECT_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_RECT_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV01_RECT_COLOR_FORMAT 0x00000300 +#define NV01_RECT_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV01_RECT_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV01_RECT_COLOR_FORMAT_A8R8G8B8 0x00000003 + +#define NV01_RECT_COLOR 0x00000304 + +#define NV01_RECT_RECTANGLE_POINT(i0) (0x00000400 + 0x8*(i0)) +#define NV01_RECT_RECTANGLE_POINT__ESIZE 0x00000008 +#define NV01_RECT_RECTANGLE_POINT__LEN 0x00000010 +#define NV01_RECT_RECTANGLE_POINT_X__MASK 0x0000ffff +#define NV01_RECT_RECTANGLE_POINT_X__SHIFT 0 +#define NV01_RECT_RECTANGLE_POINT_Y__MASK 0xffff0000 +#define NV01_RECT_RECTANGLE_POINT_Y__SHIFT 16 + +#define NV01_RECT_RECTANGLE_SIZE(i0) (0x00000404 + 0x8*(i0)) +#define NV01_RECT_RECTANGLE_SIZE__ESIZE 0x00000008 +#define NV01_RECT_RECTANGLE_SIZE__LEN 0x00000010 +#define NV01_RECT_RECTANGLE_SIZE_W__MASK 0x0000ffff +#define NV01_RECT_RECTANGLE_SIZE_W__SHIFT 0 +#define NV01_RECT_RECTANGLE_SIZE_H__MASK 0xffff0000 +#define NV01_RECT_RECTANGLE_SIZE_H__SHIFT 16 + + +#define NV01_BLIT_PATCH 0x0000010c + + +#define NV15_BLIT_WAIT_FOR_IDLE 0x00000108 + +#define NV15_BLIT_FLIP_SET_READ 0x00000120 + +#define NV15_BLIT_FLIP_SET_WRITE 0x00000124 + +#define NV15_BLIT_FLIP_MAX 0x00000128 + +#define NV15_BLIT_FLIP_INCR_WRITE 0x0000012c + +#define NV15_BLIT_FLIP_WAIT 0x00000130 + +#define NV15_BLIT_FLIP_CRTC_INCR_READ 0x00000134 + +#define NV01_BLIT_DMA_NOTIFY 0x00000180 + +#define NV01_BLIT_COLOR_KEY 0x00000184 + +#define NV04_BLIT_COLOR_KEY 0x00000184 + +#define NV01_BLIT_CLIP 0x00000188 + +#define NV01_BLIT_PATTERN 0x0000018c + +#define NV04_BLIT_PATTERN 0x0000018c + +#define NV01_BLIT_ROP 0x00000190 + +#define NV01_BLIT_BETA 0x00000194 + + +#define NV01_BLIT_SURFACE_SRC 0x00000198 + +#define NV01_BLIT_SURFACE_DST 0x0000019c + + +#define NV04_BLIT_BETA4 0x00000198 + +#define NV04_BLIT_SURFACES 0x0000019c + +#define NV01_BLIT_OPERATION 0x000002fc +#define NV01_BLIT_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_BLIT_OPERATION_ROP_AND 0x00000001 +#define NV01_BLIT_OPERATION_BLEND_AND 0x00000002 +#define NV01_BLIT_OPERATION_SRCCOPY 0x00000003 +#define NV01_BLIT_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_BLIT_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV01_BLIT_POINT_IN 0x00000300 +#define NV01_BLIT_POINT_IN_X__MASK 0x0000ffff +#define NV01_BLIT_POINT_IN_X__SHIFT 0 +#define NV01_BLIT_POINT_IN_Y__MASK 0xffff0000 +#define NV01_BLIT_POINT_IN_Y__SHIFT 16 + +#define NV01_BLIT_POINT_OUT 0x00000304 +#define NV01_BLIT_POINT_OUT_X__MASK 0x0000ffff +#define NV01_BLIT_POINT_OUT_X__SHIFT 0 +#define NV01_BLIT_POINT_OUT_Y__MASK 0xffff0000 +#define NV01_BLIT_POINT_OUT_Y__SHIFT 16 + +#define NV01_BLIT_SIZE 0x00000308 +#define NV01_BLIT_SIZE_W__MASK 0x0000ffff +#define NV01_BLIT_SIZE_W__SHIFT 0 +#define NV01_BLIT_SIZE_H__MASK 0xffff0000 +#define NV01_BLIT_SIZE_H__SHIFT 16 + + +#define NV04_INDEX_PATCH 0x0000010c + +#define NV04_INDEX_DMA_NOTIFY 0x00000180 + +#define NV04_INDEX_DMA_LUT 0x00000184 + +#define NV04_INDEX_COLOR_KEY 0x00000188 + +#define NV04_INDEX_CLIP 0x0000018c + +#define NV04_INDEX_PATTERN 0x00000190 + +#define NV04_INDEX_ROP 0x00000194 + +#define NV04_INDEX_BETA 0x00000198 + +#define NV04_INDEX_BETA4 0x0000019c + +#define NV04_INDEX_SURFACE 0x000001a0 + +#define NV05_INDEX_SURFACE 0x000001a0 + +#define NV05_INDEX_COLOR_CONVERSION 0x000003e0 + +#define NV04_INDEX_OPERATION 0x000003e4 + +#define NV04_INDEX_COLOR_FORMAT 0x000003e8 + +#define NV04_INDEX_INDEX_FORMAT 0x000003ec + +#define NV04_INDEX_LUT_OFFSET 0x000003f0 + +#define NV04_INDEX_POINT 0x000003f4 + +#define NV04_INDEX_SIZE_OUT 0x000003f8 + +#define NV04_INDEX_SIZE_IN 0x000003fc + +#define NV04_INDEX_COLOR(i0) (0x00000400 + 0x4*(i0)) +#define NV04_INDEX_COLOR__ESIZE 0x00000004 +#define NV04_INDEX_COLOR__LEN 0x00000700 + + +#define NV10_IFC_WAIT_FOR_IDLE 0x00000108 + +#define NV01_IFC_PATCH 0x0000010c + +#define NV01_IFC_DMA_NOTIFY 0x00000180 + +#define NV01_IFC_COLOR_KEY 0x00000184 + +#define NV04_IFC_COLOR_KEY 0x00000184 + +#define NV01_IFC_CLIP 0x00000188 + +#define NV01_IFC_PATTERN 0x0000018c + +#define NV04_IFC_PATTERN 0x0000018c + +#define NV01_IFC_ROP 0x00000190 + +#define NV01_IFC_BETA 0x00000194 + + +#define NV01_IFC_SURFACE_DST 0x00000198 + + +#define NV04_IFC_BETA4 0x00000198 + +#define NV04_IFC_SURFACE 0x0000019c + +#define NV05_IFC_COLOR_CONVERSION 0x000002f8 + +#define NV01_IFC_OPERATION 0x000002fc +#define NV01_IFC_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_IFC_OPERATION_ROP_AND 0x00000001 +#define NV01_IFC_OPERATION_BLEND_AND 0x00000002 +#define NV01_IFC_OPERATION_SRCCOPY 0x00000003 +#define NV01_IFC_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_IFC_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV01_IFC_COLOR_FORMAT 0x00000300 +#define NV01_IFC_COLOR_FORMAT_R5G6G5 0x00000001 +#define NV01_IFC_COLOR_FORMAT_A1R5G5B5 0x00000002 +#define NV01_IFC_COLOR_FORMAT_X1R5G5B5 0x00000003 +#define NV01_IFC_COLOR_FORMAT_A8R8G8B8 0x00000004 +#define NV01_IFC_COLOR_FORMAT_X8R8G8B8 0x00000005 + +#define NV01_IFC_POINT 0x00000304 +#define NV01_IFC_POINT_X__MASK 0x0000ffff +#define NV01_IFC_POINT_X__SHIFT 0 +#define NV01_IFC_POINT_Y__MASK 0xffff0000 +#define NV01_IFC_POINT_Y__SHIFT 16 + +#define NV01_IFC_SIZE_OUT 0x00000308 +#define NV01_IFC_SIZE_OUT_W__MASK 0x0000ffff +#define NV01_IFC_SIZE_OUT_W__SHIFT 0 +#define NV01_IFC_SIZE_OUT_H__MASK 0xffff0000 +#define NV01_IFC_SIZE_OUT_H__SHIFT 16 + +#define NV01_IFC_SIZE_IN 0x0000030c +#define NV01_IFC_SIZE_IN_W__MASK 0x0000ffff +#define NV01_IFC_SIZE_IN_W__SHIFT 0 +#define NV01_IFC_SIZE_IN_H__MASK 0xffff0000 +#define NV01_IFC_SIZE_IN_H__SHIFT 16 + +#define NV01_IFC_COLOR(i0) (0x00000400 + 0x4*(i0)) +#define NV01_IFC_COLOR__ESIZE 0x00000004 +#define NV01_IFC_COLOR__LEN 0x00000020 + +#define NV04_IFC_COLOR(i0) (0x00000400 + 0x4*(i0)) +#define NV04_IFC_COLOR__ESIZE 0x00000004 +#define NV04_IFC_COLOR__LEN 0x00000700 + + +#define NV03_SIFC_PATCH 0x0000010c + +#define NV03_SIFC_DMA_NOTIFY 0x00000180 + +#define NV03_SIFC_COLOR_KEY 0x00000184 + +#define NV04_SIFC_COLOR_KEY 0x00000184 + +#define NV03_SIFC_PATTERN 0x00000188 + +#define NV04_SIFC_PATTERN 0x00000188 + +#define NV03_SIFC_ROP 0x0000018c + +#define NV03_SIFC_BETA 0x00000190 + + +#define NV03_SIFC_SURFACE_DST 0x00000194 + + +#define NV04_SIFC_BETA4 0x00000194 + +#define NV04_SIFC_SURFACE 0x00000198 + +#define NV05_SIFC_COLOR_CONVERSION 0x000002f8 + +#define NV03_SIFC_OPERATION 0x000002fc + +#define NV03_SIFC_COLOR_FORMAT 0x00000300 + +#define NV03_SIFC_SIZE_IN 0x00000304 +#define NV03_SIFC_SIZE_IN_W__MASK 0x0000ffff +#define NV03_SIFC_SIZE_IN_W__SHIFT 0 +#define NV03_SIFC_SIZE_IN_H__MASK 0xffff0000 +#define NV03_SIFC_SIZE_IN_H__SHIFT 16 + +#define NV03_SIFC_DX_DU 0x00000308 + +#define NV03_SIFC_DY_DV 0x0000030c + +#define NV03_SIFC_CLIP_POINT 0x00000310 +#define NV03_SIFC_CLIP_POINT_X__MASK 0x0000ffff +#define NV03_SIFC_CLIP_POINT_X__SHIFT 0 +#define NV03_SIFC_CLIP_POINT_Y__MASK 0xffff0000 +#define NV03_SIFC_CLIP_POINT_Y__SHIFT 16 + +#define NV03_SIFC_CLIP_SIZE 0x00000314 +#define NV03_SIFC_CLIP_SIZE_W__MASK 0x0000ffff +#define NV03_SIFC_CLIP_SIZE_W__SHIFT 0 +#define NV03_SIFC_CLIP_SIZE_H__MASK 0xffff0000 +#define NV03_SIFC_CLIP_SIZE_H__SHIFT 16 + +#define NV03_SIFC_POINT12D4 0x00000318 +#define NV03_SIFC_POINT12D4_X__MASK 0x0000ffff +#define NV03_SIFC_POINT12D4_X__SHIFT 0 +#define NV03_SIFC_POINT12D4_Y__MASK 0xffff0000 +#define NV03_SIFC_POINT12D4_Y__SHIFT 16 + +#define NV03_SIFC_COLOR(i0) (0x00000400 + 0x4*(i0)) +#define NV03_SIFC_COLOR__ESIZE 0x00000004 +#define NV03_SIFC_COLOR__LEN 0x00000700 + + +#define NV10_SIFM_WAIT_FOR_IDLE 0x00000108 + +#define NV03_SIFM_DMA_NOTIFY 0x00000180 + +#define NV03_SIFM_DMA_IMAGE 0x00000184 + +#define NV03_SIFM_PATTERN 0x00000188 + +#define NV04_SIFM_PATTERN 0x00000188 + +#define NV03_SIFM_ROP 0x0000018c + +#define NV03_SIFM_BETA 0x00000190 + + +#define NV03_SIFM_SURFACE_DST 0x00000194 + + +#define NV04_SIFM_BETA4 0x00000194 + +#define NV04_SIFM_SURFACE 0x00000198 + +#define NV05_SIFM_SURFACE 0x00000198 + +#define NV05_SIFM_COLOR_CONVERSION 0x000002fc +#define NV05_SIFM_COLOR_CONVERSION_DITHER 0x00000000 +#define NV05_SIFM_COLOR_CONVERSION_TRUNCATE 0x00000001 +#define NV05_SIFM_COLOR_CONVERSION_SUBTR_TRUNCATE 0x00000002 + +#define NV03_SIFM_COLOR_FORMAT 0x00000300 +#define NV03_SIFM_COLOR_FORMAT_A1R5G5B5 0x00000001 +#define NV03_SIFM_COLOR_FORMAT_X1R5G5B5 0x00000002 +#define NV03_SIFM_COLOR_FORMAT_A8R8G8B8 0x00000003 +#define NV03_SIFM_COLOR_FORMAT_X8R8G8B8 0x00000004 +#define NV03_SIFM_COLOR_FORMAT_V8YB8U8YA8 0x00000005 +#define NV03_SIFM_COLOR_FORMAT_YB8V8YA8U8 0x00000006 +#define NV03_SIFM_COLOR_FORMAT_R5G6B5 0x00000007 +#define NV03_SIFM_COLOR_FORMAT_Y8 0x00000008 +#define NV03_SIFM_COLOR_FORMAT_AY8 0x00000009 + +#define NV03_SIFM_OPERATION 0x00000304 +#define NV03_SIFM_OPERATION_SRCCOPY_AND 0x00000000 +#define NV03_SIFM_OPERATION_ROP_AND 0x00000001 +#define NV03_SIFM_OPERATION_BLEND_AND 0x00000002 +#define NV03_SIFM_OPERATION_SRCCOPY 0x00000003 +#define NV03_SIFM_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV03_SIFM_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV03_SIFM_CLIP_POINT 0x00000308 +#define NV03_SIFM_CLIP_POINT_X__MASK 0x0000ffff +#define NV03_SIFM_CLIP_POINT_X__SHIFT 0 +#define NV03_SIFM_CLIP_POINT_Y__MASK 0xffff0000 +#define NV03_SIFM_CLIP_POINT_Y__SHIFT 16 + +#define NV03_SIFM_CLIP_SIZE 0x0000030c +#define NV03_SIFM_CLIP_SIZE_W__MASK 0x0000ffff +#define NV03_SIFM_CLIP_SIZE_W__SHIFT 0 +#define NV03_SIFM_CLIP_SIZE_H__MASK 0xffff0000 +#define NV03_SIFM_CLIP_SIZE_H__SHIFT 16 + +#define NV03_SIFM_OUT_POINT 0x00000310 +#define NV03_SIFM_OUT_POINT_X__MASK 0x0000ffff +#define NV03_SIFM_OUT_POINT_X__SHIFT 0 +#define NV03_SIFM_OUT_POINT_Y__MASK 0xffff0000 +#define NV03_SIFM_OUT_POINT_Y__SHIFT 16 + +#define NV03_SIFM_OUT_SIZE 0x00000314 +#define NV03_SIFM_OUT_SIZE_W__MASK 0x0000ffff +#define NV03_SIFM_OUT_SIZE_W__SHIFT 0 +#define NV03_SIFM_OUT_SIZE_H__MASK 0xffff0000 +#define NV03_SIFM_OUT_SIZE_H__SHIFT 16 + +#define NV03_SIFM_DU_DX 0x00000318 + +#define NV03_SIFM_DV_DY 0x0000031c + +#define NV03_SIFM_SIZE 0x00000400 +#define NV03_SIFM_SIZE_W__MASK 0x0000ffff +#define NV03_SIFM_SIZE_W__SHIFT 0 +#define NV03_SIFM_SIZE_H__MASK 0xffff0000 +#define NV03_SIFM_SIZE_H__SHIFT 16 + +#define NV03_SIFM_FORMAT 0x00000404 +#define NV03_SIFM_FORMAT_PITCH__MASK 0x0000ffff +#define NV03_SIFM_FORMAT_PITCH__SHIFT 0 +#define NV03_SIFM_FORMAT_ORIGIN__MASK 0x00ff0000 +#define NV03_SIFM_FORMAT_ORIGIN__SHIFT 16 +#define NV03_SIFM_FORMAT_ORIGIN_CENTER 0x00010000 +#define NV03_SIFM_FORMAT_ORIGIN_CORNER 0x00020000 +#define NV03_SIFM_FORMAT_FILTER__MASK 0xff000000 +#define NV03_SIFM_FORMAT_FILTER__SHIFT 24 +#define NV03_SIFM_FORMAT_FILTER_POINT_SAMPLE 0x00000000 +#define NV03_SIFM_FORMAT_FILTER_BILINEAR 0x01000000 + +#define NV03_SIFM_OFFSET 0x00000408 + +#define NV03_SIFM_POINT 0x0000040c +#define NV03_SIFM_POINT_U__MASK 0x0000ffff +#define NV03_SIFM_POINT_U__SHIFT 0 +#define NV03_SIFM_POINT_V__MASK 0xffff0000 +#define NV03_SIFM_POINT_V__SHIFT 16 + + +#define NV50_SIFM_OFFSET_HIGH 0x00000410 + +#define NV50_SIFM_SRC_LINEAR 0x00000414 + +#define NV50_SIFM_SRC_TILE_MODE 0x00000418 + + +#define NV03_GDI_DMA_NOTIFY 0x00000180 + +#define NV03_GDI_PATTERN 0x00000184 + +#define NV03_GDI_ROP 0x00000188 + +#define NV03_GDI_BETA 0x0000019c + +#define NV03_GDI_SURFACE_DST 0x00000190 + +#define NV03_GDI_OPERATION 0x000002fc + +#define NV03_GDI_COLOR_FORMAT 0x00000300 + +#define NV03_GDI_MONOCHROME_FORMAT 0x00000304 + +#define NV03_GDI_COLOR1_A 0x000003fc + +#define NV03_GDI_UNCLIPPED_RECTANGLE_POINT(i0) (0x00000400 + 0x8*(i0)) +#define NV03_GDI_UNCLIPPED_RECTANGLE_POINT__ESIZE 0x00000008 +#define NV03_GDI_UNCLIPPED_RECTANGLE_POINT__LEN 0x00000040 +#define NV03_GDI_UNCLIPPED_RECTANGLE_POINT_Y__MASK 0x0000ffff +#define NV03_GDI_UNCLIPPED_RECTANGLE_POINT_Y__SHIFT 0 +#define NV03_GDI_UNCLIPPED_RECTANGLE_POINT_X__MASK 0xffff0000 +#define NV03_GDI_UNCLIPPED_RECTANGLE_POINT_X__SHIFT 16 + +#define NV03_GDI_UNCLIPPED_RECTANGLE_SIZE(i0) (0x00000404 + 0x8*(i0)) +#define NV03_GDI_UNCLIPPED_RECTANGLE_SIZE__ESIZE 0x00000008 +#define NV03_GDI_UNCLIPPED_RECTANGLE_SIZE__LEN 0x00000040 +#define NV03_GDI_UNCLIPPED_RECTANGLE_SIZE_H__MASK 0x0000ffff +#define NV03_GDI_UNCLIPPED_RECTANGLE_SIZE_H__SHIFT 0 +#define NV03_GDI_UNCLIPPED_RECTANGLE_SIZE_W__MASK 0xffff0000 +#define NV03_GDI_UNCLIPPED_RECTANGLE_SIZE_W__SHIFT 16 + +#define NV03_GDI_CLIP_POINT0_B 0x000007f4 +#define NV03_GDI_CLIP_POINT0_B_L__MASK 0x0000ffff +#define NV03_GDI_CLIP_POINT0_B_L__SHIFT 0 +#define NV03_GDI_CLIP_POINT0_B_T__MASK 0xffff0000 +#define NV03_GDI_CLIP_POINT0_B_T__SHIFT 16 + +#define NV03_GDI_CLIP_POINT1_B 0x000007f8 +#define NV03_GDI_CLIP_POINT1_B_R__MASK 0x0000ffff +#define NV03_GDI_CLIP_POINT1_B_R__SHIFT 0 +#define NV03_GDI_CLIP_POINT1_B_B__MASK 0xffff0000 +#define NV03_GDI_CLIP_POINT1_B_B__SHIFT 16 + +#define NV03_GDI_COLOR1_B 0x000007fc + +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_0(i0) (0x00000800 + 0x8*(i0)) +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_0__ESIZE 0x00000008 +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_0__LEN 0x00000040 +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_0_L__MASK 0x0000ffff +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_0_L__SHIFT 0 +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_0_T__MASK 0xffff0000 +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_0_T__SHIFT 16 + +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_1(i0) (0x00000804 + 0x8*(i0)) +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_1__ESIZE 0x00000008 +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_1__LEN 0x00000040 +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_1_R__MASK 0x0000ffff +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_1_R__SHIFT 0 +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_1_B__MASK 0xffff0000 +#define NV03_GDI_CLIPPED_RECTANGLE_POINT_1_B__SHIFT 16 + +#define NV03_GDI_CLIP_C_POINT0 0x00000bec +#define NV03_GDI_CLIP_C_POINT0_L__MASK 0x0000ffff +#define NV03_GDI_CLIP_C_POINT0_L__SHIFT 0 +#define NV03_GDI_CLIP_C_POINT0_T__MASK 0xffff0000 +#define NV03_GDI_CLIP_C_POINT0_T__SHIFT 16 + +#define NV03_GDI_CLIP_C_POINT1 0x00000bf0 +#define NV03_GDI_CLIP_C_POINT1_R__MASK 0x0000ffff +#define NV03_GDI_CLIP_C_POINT1_R__SHIFT 0 +#define NV03_GDI_CLIP_C_POINT1_B__MASK 0xffff0000 +#define NV03_GDI_CLIP_C_POINT1_B__SHIFT 16 + +#define NV03_GDI_COLOR1_C 0x00000bf4 + +#define NV03_GDI_SIZE_C 0x00000bf8 +#define NV03_GDI_SIZE_C_W__MASK 0x0000ffff +#define NV03_GDI_SIZE_C_W__SHIFT 0 +#define NV03_GDI_SIZE_C_H__MASK 0xffff0000 +#define NV03_GDI_SIZE_C_H__SHIFT 16 + +#define NV03_GDI_POINT_C 0x00000bfc +#define NV03_GDI_POINT_C_X__MASK 0x0000ffff +#define NV03_GDI_POINT_C_X__SHIFT 0 +#define NV03_GDI_POINT_C_Y__MASK 0xffff0000 +#define NV03_GDI_POINT_C_Y__SHIFT 16 + +#define NV03_GDI_MONOCHROME_COLOR1_C(i0) (0x00000c00 + 0x4*(i0)) +#define NV03_GDI_MONOCHROME_COLOR1_C__ESIZE 0x00000004 +#define NV03_GDI_MONOCHROME_COLOR1_C__LEN 0x00000080 + +#define NV03_GDI_CLIP_D_POINT0 0x00000fe8 +#define NV03_GDI_CLIP_D_POINT0_L__MASK 0x0000ffff +#define NV03_GDI_CLIP_D_POINT0_L__SHIFT 0 +#define NV03_GDI_CLIP_D_POINT0_T__MASK 0xffff0000 +#define NV03_GDI_CLIP_D_POINT0_T__SHIFT 16 + +#define NV03_GDI_CLIP_D_POINT1 0x00000fec +#define NV03_GDI_CLIP_D_POINT1_R__MASK 0x0000ffff +#define NV03_GDI_CLIP_D_POINT1_R__SHIFT 0 +#define NV03_GDI_CLIP_D_POINT1_B__MASK 0xffff0000 +#define NV03_GDI_CLIP_D_POINT1_B__SHIFT 16 + +#define NV03_GDI_COLOR1_D 0x00000ff0 + +#define NV03_GDI_SIZE_IN_D 0x00000ff4 +#define NV03_GDI_SIZE_IN_D_W__MASK 0x0000ffff +#define NV03_GDI_SIZE_IN_D_W__SHIFT 0 +#define NV03_GDI_SIZE_IN_D_H__MASK 0xffff0000 +#define NV03_GDI_SIZE_IN_D_H__SHIFT 16 + +#define NV03_GDI_SIZE_OUT_D 0x00000ff8 +#define NV03_GDI_SIZE_OUT_D_W__MASK 0x0000ffff +#define NV03_GDI_SIZE_OUT_D_W__SHIFT 0 +#define NV03_GDI_SIZE_OUT_D_H__MASK 0xffff0000 +#define NV03_GDI_SIZE_OUT_D_H__SHIFT 16 + +#define NV03_GDI_POINT_D 0x00000ffc +#define NV03_GDI_POINT_D_X__MASK 0x0000ffff +#define NV03_GDI_POINT_D_X__SHIFT 0 +#define NV03_GDI_POINT_D_Y__MASK 0xffff0000 +#define NV03_GDI_POINT_D_Y__SHIFT 16 + +#define NV03_GDI_MONOCHROME_COLOR1_D(i0) (0x00001000 + 0x4*(i0)) +#define NV03_GDI_MONOCHROME_COLOR1_D__ESIZE 0x00000004 +#define NV03_GDI_MONOCHROME_COLOR1_D__LEN 0x00000080 + +#define NV03_GDI_CLIP_E_POINT0 0x000013e4 +#define NV03_GDI_CLIP_E_POINT0_L__MASK 0x0000ffff +#define NV03_GDI_CLIP_E_POINT0_L__SHIFT 0 +#define NV03_GDI_CLIP_E_POINT0_T__MASK 0xffff0000 +#define NV03_GDI_CLIP_E_POINT0_T__SHIFT 16 + +#define NV03_GDI_CLIP_E_POINT1 0x000013e8 +#define NV03_GDI_CLIP_E_POINT1_R__MASK 0x0000ffff +#define NV03_GDI_CLIP_E_POINT1_R__SHIFT 0 +#define NV03_GDI_CLIP_E_POINT1_B__MASK 0xffff0000 +#define NV03_GDI_CLIP_E_POINT1_B__SHIFT 16 + +#define NV03_GDI_COLOR0_E 0x000013ec + +#define NV03_GDI_COLOR1_E 0x000013f0 + +#define NV03_GDI_SIZE_IN_E 0x000013f4 +#define NV03_GDI_SIZE_IN_E_W__MASK 0x0000ffff +#define NV03_GDI_SIZE_IN_E_W__SHIFT 0 +#define NV03_GDI_SIZE_IN_E_H__MASK 0xffff0000 +#define NV03_GDI_SIZE_IN_E_H__SHIFT 16 + +#define NV03_GDI_SIZE_OUT_E 0x000013f8 +#define NV03_GDI_SIZE_OUT_E_W__MASK 0x0000ffff +#define NV03_GDI_SIZE_OUT_E_W__SHIFT 0 +#define NV03_GDI_SIZE_OUT_E_H__MASK 0xffff0000 +#define NV03_GDI_SIZE_OUT_E_H__SHIFT 16 + +#define NV03_GDI_POINT_E 0x000013fc +#define NV03_GDI_POINT_E_X__MASK 0x0000ffff +#define NV03_GDI_POINT_E_X__SHIFT 0 +#define NV03_GDI_POINT_E_Y__MASK 0xffff0000 +#define NV03_GDI_POINT_E_Y__SHIFT 16 + +#define NV03_GDI_MONOCHROME_COLOR01_E(i0) (0x00001400 + 0x4*(i0)) +#define NV03_GDI_MONOCHROME_COLOR01_E__ESIZE 0x00000004 +#define NV03_GDI_MONOCHROME_COLOR01_E__LEN 0x00000080 + + +#define NV04_GDI_PATCH 0x0000010c + +#define NV04_GDI_DMA_NOTIFY 0x00000180 + +#define NV04_GDI_DMA_FONTS 0x00000184 + +#define NV04_GDI_PATTERN 0x00000188 + +#define NV04_GDI_ROP 0x0000018c + +#define NV04_GDI_BETA 0x00000190 + +#define NV04_GDI_BETA4 0x00000194 + +#define NV04_GDI_SURFACE 0x00000198 + +#define NV04_GDI_OPERATION 0x000002fc +#define NV04_GDI_OPERATION_SRCCOPY_AND 0x00000000 +#define NV04_GDI_OPERATION_ROP_AND 0x00000001 +#define NV04_GDI_OPERATION_BLEND_AND 0x00000002 +#define NV04_GDI_OPERATION_SRCCOPY 0x00000003 +#define NV04_GDI_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV04_GDI_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV04_GDI_COLOR_FORMAT 0x00000300 +#define NV04_GDI_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV04_GDI_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV04_GDI_COLOR_FORMAT_A8R8G8B8 0x00000003 + +#define NV04_GDI_MONOCHROME_FORMAT 0x00000304 +#define NV04_GDI_MONOCHROME_FORMAT_CGA6 0x00000001 +#define NV04_GDI_MONOCHROME_FORMAT_LE 0x00000002 + +#define NV04_GDI_COLOR1_A 0x000003fc + +#define NV04_GDI_UNCLIPPED_RECTANGLE_POINT(i0) (0x00000400 + 0x8*(i0)) +#define NV04_GDI_UNCLIPPED_RECTANGLE_POINT__ESIZE 0x00000008 +#define NV04_GDI_UNCLIPPED_RECTANGLE_POINT__LEN 0x00000020 +#define NV04_GDI_UNCLIPPED_RECTANGLE_POINT_Y__MASK 0x0000ffff +#define NV04_GDI_UNCLIPPED_RECTANGLE_POINT_Y__SHIFT 0 +#define NV04_GDI_UNCLIPPED_RECTANGLE_POINT_X__MASK 0xffff0000 +#define NV04_GDI_UNCLIPPED_RECTANGLE_POINT_X__SHIFT 16 + +#define NV04_GDI_UNCLIPPED_RECTANGLE_SIZE(i0) (0x00000404 + 0x8*(i0)) +#define NV04_GDI_UNCLIPPED_RECTANGLE_SIZE__ESIZE 0x00000008 +#define NV04_GDI_UNCLIPPED_RECTANGLE_SIZE__LEN 0x00000020 +#define NV04_GDI_UNCLIPPED_RECTANGLE_SIZE_H__MASK 0x0000ffff +#define NV04_GDI_UNCLIPPED_RECTANGLE_SIZE_H__SHIFT 0 +#define NV04_GDI_UNCLIPPED_RECTANGLE_SIZE_W__MASK 0xffff0000 +#define NV04_GDI_UNCLIPPED_RECTANGLE_SIZE_W__SHIFT 16 + +#define NV04_GDI_CLIP_B_POINT0 0x000005f4 +#define NV04_GDI_CLIP_B_POINT0_L__MASK 0x0000ffff +#define NV04_GDI_CLIP_B_POINT0_L__SHIFT 0 +#define NV04_GDI_CLIP_B_POINT0_T__MASK 0xffff0000 +#define NV04_GDI_CLIP_B_POINT0_T__SHIFT 16 + +#define NV04_GDI_CLIP_B_POINT1 0x000005f8 +#define NV04_GDI_CLIP_B_POINT1_R__MASK 0x0000ffff +#define NV04_GDI_CLIP_B_POINT1_R__SHIFT 0 +#define NV04_GDI_CLIP_B_POINT1_B__MASK 0xffff0000 +#define NV04_GDI_CLIP_B_POINT1_B__SHIFT 16 + +#define NV04_GDI_COLOR1_B 0x000005fc + +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_0(i0) (0x00000600 + 0x8*(i0)) +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_0__ESIZE 0x00000008 +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_0__LEN 0x00000020 +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_0_L__MASK 0x0000ffff +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_0_L__SHIFT 0 +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_0_T__MASK 0xffff0000 +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_0_T__SHIFT 16 + +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_1(i0) (0x00000604 + 0x8*(i0)) +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_1__ESIZE 0x00000008 +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_1__LEN 0x00000020 +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_1_R__MASK 0x0000ffff +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_1_R__SHIFT 0 +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_1_B__MASK 0xffff0000 +#define NV04_GDI_CLIPPED_RECTANGLE_POINT_1_B__SHIFT 16 + +#define NV04_GDI_CLIP_C_POINT0 0x000007ec +#define NV04_GDI_CLIP_C_POINT0_L__MASK 0x0000ffff +#define NV04_GDI_CLIP_C_POINT0_L__SHIFT 0 +#define NV04_GDI_CLIP_C_POINT0_T__MASK 0xffff0000 +#define NV04_GDI_CLIP_C_POINT0_T__SHIFT 16 + +#define NV04_GDI_CLIP_C_POINT1 0x000007f0 +#define NV04_GDI_CLIP_C_POINT1_R__MASK 0x0000ffff +#define NV04_GDI_CLIP_C_POINT1_R__SHIFT 0 +#define NV04_GDI_CLIP_C_POINT1_B__MASK 0xffff0000 +#define NV04_GDI_CLIP_C_POINT1_B__SHIFT 16 + +#define NV04_GDI_COLOR1_C 0x000007f4 + +#define NV04_GDI_SIZE_C 0x000007f8 +#define NV04_GDI_SIZE_C_W__MASK 0x0000ffff +#define NV04_GDI_SIZE_C_W__SHIFT 0 +#define NV04_GDI_SIZE_C_H__MASK 0xffff0000 +#define NV04_GDI_SIZE_C_H__SHIFT 16 + +#define NV04_GDI_POINT_C 0x000007fc +#define NV04_GDI_POINT_C_X__MASK 0x0000ffff +#define NV04_GDI_POINT_C_X__SHIFT 0 +#define NV04_GDI_POINT_C_Y__MASK 0xffff0000 +#define NV04_GDI_POINT_C_Y__SHIFT 16 + +#define NV04_GDI_MONOCHROME_COLOR1_C(i0) (0x00000800 + 0x4*(i0)) +#define NV04_GDI_MONOCHROME_COLOR1_C__ESIZE 0x00000004 +#define NV04_GDI_MONOCHROME_COLOR1_C__LEN 0x00000080 + +#define NV04_GDI_CLIP_E_POINT0 0x00000be4 +#define NV04_GDI_CLIP_E_POINT0_L__MASK 0x0000ffff +#define NV04_GDI_CLIP_E_POINT0_L__SHIFT 0 +#define NV04_GDI_CLIP_E_POINT0_T__MASK 0xffff0000 +#define NV04_GDI_CLIP_E_POINT0_T__SHIFT 16 + +#define NV04_GDI_CLIP_E_POINT1 0x00000be8 +#define NV04_GDI_CLIP_E_POINT1_R__MASK 0x0000ffff +#define NV04_GDI_CLIP_E_POINT1_R__SHIFT 0 +#define NV04_GDI_CLIP_E_POINT1_B__MASK 0xffff0000 +#define NV04_GDI_CLIP_E_POINT1_B__SHIFT 16 + +#define NV04_GDI_COLOR0_E 0x00000bec + +#define NV04_GDI_COLOR1_E 0x00000bf0 + +#define NV04_GDI_SIZE_IN_E 0x00000bf4 +#define NV04_GDI_SIZE_IN_E_W__MASK 0x0000ffff +#define NV04_GDI_SIZE_IN_E_W__SHIFT 0 +#define NV04_GDI_SIZE_IN_E_H__MASK 0xffff0000 +#define NV04_GDI_SIZE_IN_E_H__SHIFT 16 + +#define NV04_GDI_SIZE_OUT_E 0x00000bf8 +#define NV04_GDI_SIZE_OUT_E_W__MASK 0x0000ffff +#define NV04_GDI_SIZE_OUT_E_W__SHIFT 0 +#define NV04_GDI_SIZE_OUT_E_H__MASK 0xffff0000 +#define NV04_GDI_SIZE_OUT_E_H__SHIFT 16 + +#define NV04_GDI_POINT_E 0x00000bfc +#define NV04_GDI_POINT_E_X__MASK 0x0000ffff +#define NV04_GDI_POINT_E_X__SHIFT 0 +#define NV04_GDI_POINT_E_Y__MASK 0xffff0000 +#define NV04_GDI_POINT_E_Y__SHIFT 16 + +#define NV04_GDI_MONOCHROME_COLOR01_E(i0) (0x00000c00 + 0x4*(i0)) +#define NV04_GDI_MONOCHROME_COLOR01_E__ESIZE 0x00000004 +#define NV04_GDI_MONOCHROME_COLOR01_E__LEN 0x00000080 + +#define NV04_GDI_FONT_F 0x00000ff0 +#define NV04_GDI_FONT_F_OFFSET__MASK 0x0fffffff +#define NV04_GDI_FONT_F_OFFSET__SHIFT 0 +#define NV04_GDI_FONT_F_PITCH__MASK 0xf0000000 +#define NV04_GDI_FONT_F_PITCH__SHIFT 28 + +#define NV04_GDI_CLIP_F_POINT0 0x00000ff4 +#define NV04_GDI_CLIP_F_POINT0_L__MASK 0x0000ffff +#define NV04_GDI_CLIP_F_POINT0_L__SHIFT 0 +#define NV04_GDI_CLIP_F_POINT0_T__MASK 0xffff0000 +#define NV04_GDI_CLIP_F_POINT0_T__SHIFT 16 + +#define NV04_GDI_CLIP_F_POINT1 0x00000ff8 +#define NV04_GDI_CLIP_F_POINT1_R__MASK 0x0000ffff +#define NV04_GDI_CLIP_F_POINT1_R__SHIFT 0 +#define NV04_GDI_CLIP_F_POINT1_B__MASK 0xffff0000 +#define NV04_GDI_CLIP_F_POINT1_B__SHIFT 16 + +#define NV04_GDI_COLOR1_F 0x00000ffc + +#define NV04_GDI_CHARACTER_COLOR1_F(i0) (0x00001000 + 0x4*(i0)) +#define NV04_GDI_CHARACTER_COLOR1_F__ESIZE 0x00000004 +#define NV04_GDI_CHARACTER_COLOR1_F__LEN 0x00000100 +#define NV04_GDI_CHARACTER_COLOR1_F_INDEX__MASK 0x000000ff +#define NV04_GDI_CHARACTER_COLOR1_F_INDEX__SHIFT 0 +#define NV04_GDI_CHARACTER_COLOR1_F_X__MASK 0x000fff00 +#define NV04_GDI_CHARACTER_COLOR1_F_X__SHIFT 8 +#define NV04_GDI_CHARACTER_COLOR1_F_Y__MASK 0xfff00000 +#define NV04_GDI_CHARACTER_COLOR1_F_Y__SHIFT 20 + +#define NV04_GDI_FONT_G 0x000017f0 +#define NV04_GDI_FONT_G_OFFSET__MASK 0x0fffffff +#define NV04_GDI_FONT_G_OFFSET__SHIFT 0 +#define NV04_GDI_FONT_G_PITCH__MASK 0xf0000000 +#define NV04_GDI_FONT_G_PITCH__SHIFT 28 + +#define NV04_GDI_CLIP_G_POINT0 0x000017f4 +#define NV04_GDI_CLIP_G_POINT0_L__MASK 0x0000ffff +#define NV04_GDI_CLIP_G_POINT0_L__SHIFT 0 +#define NV04_GDI_CLIP_G_POINT0_T__MASK 0xffff0000 +#define NV04_GDI_CLIP_G_POINT0_T__SHIFT 16 + +#define NV04_GDI_CLIP_G_POINT1 0x000017f8 +#define NV04_GDI_CLIP_G_POINT1_R__MASK 0x0000ffff +#define NV04_GDI_CLIP_G_POINT1_R__SHIFT 0 +#define NV04_GDI_CLIP_G_POINT1_B__MASK 0xffff0000 +#define NV04_GDI_CLIP_G_POINT1_B__SHIFT 16 + +#define NV04_GDI_COLOR1_G 0x000017fc + +#define NV04_GDI_CHARACTER_COLOR1_G_POINT(i0) (0x00001800 + 0x8*(i0)) +#define NV04_GDI_CHARACTER_COLOR1_G_POINT__ESIZE 0x00000008 +#define NV04_GDI_CHARACTER_COLOR1_G_POINT__LEN 0x00000100 +#define NV04_GDI_CHARACTER_COLOR1_G_POINT_X__MASK 0x0000ffff +#define NV04_GDI_CHARACTER_COLOR1_G_POINT_X__SHIFT 0 +#define NV04_GDI_CHARACTER_COLOR1_G_POINT_Y__MASK 0xffff0000 +#define NV04_GDI_CHARACTER_COLOR1_G_POINT_Y__SHIFT 16 + +#define NV04_GDI_CHARACTER_COLOR1_G_INDEX(i0) (0x00001804 + 0x8*(i0)) +#define NV04_GDI_CHARACTER_COLOR1_G_INDEX__ESIZE 0x00000008 +#define NV04_GDI_CHARACTER_COLOR1_G_INDEX__LEN 0x00000100 + + +#define NV10_TEXUPLOAD_WAIT_FOR_IDLE 0x00000108 + +#define NV10_TEXUPLOAD_DMA_NOTIFY 0x00000180 + +#define NV10_TEXUPLOAD_SURFACE 0x00000184 + +#define NV10_TEXUPLOAD_COLOR_FORMAT 0x00000300 + +#define NV10_TEXUPLOAD_POINT 0x00000304 +#define NV10_TEXUPLOAD_POINT_X__MASK 0x0000ffff +#define NV10_TEXUPLOAD_POINT_X__SHIFT 0 +#define NV10_TEXUPLOAD_POINT_Y__MASK 0xffff0000 +#define NV10_TEXUPLOAD_POINT_Y__SHIFT 16 + +#define NV10_TEXUPLOAD_SIZE 0x00000308 +#define NV10_TEXUPLOAD_SIZE_W__MASK 0x0000ffff +#define NV10_TEXUPLOAD_SIZE_W__SHIFT 0 +#define NV10_TEXUPLOAD_SIZE_H__MASK 0xffff0000 +#define NV10_TEXUPLOAD_SIZE_H__SHIFT 16 + +#define NV10_TEXUPLOAD_CLIP_HORIZONTAL 0x0000030c +#define NV10_TEXUPLOAD_CLIP_HORIZONTAL_X__MASK 0x0000ffff +#define NV10_TEXUPLOAD_CLIP_HORIZONTAL_X__SHIFT 0 +#define NV10_TEXUPLOAD_CLIP_HORIZONTAL_W__MASK 0xffff0000 +#define NV10_TEXUPLOAD_CLIP_HORIZONTAL_W__SHIFT 16 + +#define NV10_TEXUPLOAD_CLIP_VERTICAL 0x00000310 +#define NV10_TEXUPLOAD_CLIP_VERTICAL_Y__MASK 0x0000ffff +#define NV10_TEXUPLOAD_CLIP_VERTICAL_Y__SHIFT 0 +#define NV10_TEXUPLOAD_CLIP_VERTICAL_H__MASK 0xffff0000 +#define NV10_TEXUPLOAD_CLIP_VERTICAL_H__SHIFT 16 + +#define NV10_TEXUPLOAD_COLOR(i0) (0x00000400 + 0x4*(i0)) +#define NV10_TEXUPLOAD_COLOR__ESIZE 0x00000004 +#define NV10_TEXUPLOAD_COLOR__LEN 0x00000700 + + +#endif /* _HOME_SKEGGSB_GIT_ENVYTOOLS_RNNDB_NV01_2D_XML */ diff --git a/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h b/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h new file mode 100644 index 00000000000..447f4b3b7ae --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h @@ -0,0 +1,2045 @@ +#ifndef _HOME_SKEGGSB_GIT_ENVYTOOLS_RNNDB_NV30_40_3D_XML +#define _HOME_SKEGGSB_GIT_ENVYTOOLS_RNNDB_NV30_40_3D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- /home/skeggsb/git/envytools/rnndb/nv30-40_3d.xml ( 32450 bytes, from 2012-02-10 02:41:48) +- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6452 bytes, from 2011-06-30 00:27:55) +- /home/skeggsb/git/envytools/rnndb/nv_3ddefs.xml ( 16394 bytes, from 2011-06-30 00:27:55) +- /home/skeggsb/git/envytools/rnndb/nv_defs.xml ( 4437 bytes, from 2011-06-30 00:27:55) +- /home/skeggsb/git/envytools/rnndb/nv_object.xml ( 12672 bytes, from 2011-07-13 22:28:24) +- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 3617 bytes, from 2011-07-17 23:19:55) +- /home/skeggsb/git/envytools/rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-08 05:22:51) + +Copyright (C) 2006-2012 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NV30_3D_FLIP_SET_READ 0x00000120 + +#define NV30_3D_FLIP_SET_WRITE 0x00000124 + +#define NV30_3D_FLIP_MAX 0x00000128 + +#define NV30_3D_FLIP_INCR_WRITE 0x0000012c + +#define NV30_3D_FLIP_WAIT 0x00000130 + +#define NV30_3D_DMA_NOTIFY 0x00000180 + +#define NV30_3D_DMA_TEXTURE0 0x00000184 + +#define NV30_3D_DMA_TEXTURE1 0x00000188 + +#define NV30_3D_DMA_COLOR1 0x0000018c + +#define NV30_3D_DMA_UNK190 0x00000190 + +#define NV30_3D_DMA_COLOR0 0x00000194 + +#define NV30_3D_DMA_ZETA 0x00000198 + +#define NV30_3D_DMA_VTXBUF0 0x0000019c + +#define NV30_3D_DMA_VTXBUF1 0x000001a0 + +#define NV30_3D_DMA_FENCE 0x000001a4 + +#define NV30_3D_DMA_QUERY 0x000001a8 + +#define NV30_3D_DMA_UNK1AC 0x000001ac + +#define NV30_3D_DMA_UNK1B0 0x000001b0 + +#define NV40_3D_DMA_COLOR2 0x000001b4 + +#define NV40_3D_DMA_COLOR3 0x000001b8 + +#define NV30_3D_RT_HORIZ 0x00000200 +#define NV30_3D_RT_HORIZ_X__MASK 0x0000ffff +#define NV30_3D_RT_HORIZ_X__SHIFT 0 +#define NV30_3D_RT_HORIZ_W__MASK 0xffff0000 +#define NV30_3D_RT_HORIZ_W__SHIFT 16 + +#define NV30_3D_RT_VERT 0x00000204 +#define NV30_3D_RT_VERT_Y__MASK 0x0000ffff +#define NV30_3D_RT_VERT_Y__SHIFT 0 +#define NV30_3D_RT_VERT_H__MASK 0xffff0000 +#define NV30_3D_RT_VERT_H__SHIFT 16 + +#define NV30_3D_RT_FORMAT 0x00000208 +#define NV30_3D_RT_FORMAT_COLOR__MASK 0x0000001f +#define NV30_3D_RT_FORMAT_COLOR__SHIFT 0 +#define NV30_3D_RT_FORMAT_COLOR_R5G6B5 0x00000003 +#define NV30_3D_RT_FORMAT_COLOR_X8R8G8B8 0x00000005 +#define NV30_3D_RT_FORMAT_COLOR_A8R8G8B8 0x00000008 +#define NV30_3D_RT_FORMAT_COLOR_B8 0x00000009 +#define NV30_3D_RT_FORMAT_COLOR_A16B16G16R16_FLOAT 0x0000000b +#define NV30_3D_RT_FORMAT_COLOR_A32B32G32R32_FLOAT 0x0000000c +#define NV30_3D_RT_FORMAT_COLOR_R32_FLOAT 0x0000000d +#define NV30_3D_RT_FORMAT_COLOR_UNK0D 0x0000000d +#define NV30_3D_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f +#define NV30_3D_RT_FORMAT_COLOR_A8B8G8R8 0x00000010 +#define NV30_3D_RT_FORMAT_ZETA__MASK 0x000000e0 +#define NV30_3D_RT_FORMAT_ZETA__SHIFT 5 +#define NV30_3D_RT_FORMAT_ZETA_Z16 0x00000020 +#define NV30_3D_RT_FORMAT_ZETA_Z24S8 0x00000040 +#define NV30_3D_RT_FORMAT_TYPE__MASK 0x00000f00 +#define NV30_3D_RT_FORMAT_TYPE__SHIFT 8 +#define NV30_3D_RT_FORMAT_TYPE_LINEAR 0x00000100 +#define NV30_3D_RT_FORMAT_TYPE_SWIZZLED 0x00000200 +#define NV30_3D_RT_FORMAT_LOG2_WIDTH__MASK 0x00ff0000 +#define NV30_3D_RT_FORMAT_LOG2_WIDTH__SHIFT 16 +#define NV30_3D_RT_FORMAT_LOG2_HEIGHT__MASK 0xff000000 +#define NV30_3D_RT_FORMAT_LOG2_HEIGHT__SHIFT 24 + +#define NV30_3D_COLOR0_PITCH 0x0000020c +#define NV30_3D_COLOR0_PITCH_COLOR0__MASK 0x0000ffff +#define NV30_3D_COLOR0_PITCH_COLOR0__SHIFT 0 +#define NV30_3D_COLOR0_PITCH_ZETA__MASK 0xffff0000 +#define NV30_3D_COLOR0_PITCH_ZETA__SHIFT 16 + +#define NV40_3D_COLOR0_PITCH 0x0000020c + +#define NV30_3D_COLOR0_OFFSET 0x00000210 + +#define NV30_3D_ZETA_OFFSET 0x00000214 + +#define NV30_3D_COLOR1_OFFSET 0x00000218 + +#define NV30_3D_COLOR1_PITCH 0x0000021c + +#define NV30_3D_RT_ENABLE 0x00000220 +#define NV30_3D_RT_ENABLE_COLOR0 0x00000001 +#define NV30_3D_RT_ENABLE_COLOR1 0x00000002 +#define NV40_3D_RT_ENABLE_COLOR2 0x00000004 +#define NV40_3D_RT_ENABLE_COLOR3 0x00000008 +#define NV30_3D_RT_ENABLE_MRT 0x00000010 + +#define NV40_3D_ZETA_PITCH 0x0000022c + +#define NV30_3D_HIERZ_PITCH 0x0000022c + +#define NV30_3D_HIERZ_OFFSET 0x00000230 + +#define NV30_3D_TEX_UNITS_ENABLE 0x0000023c +#define NV30_3D_TEX_UNITS_ENABLE_TX0 0x00000001 +#define NV30_3D_TEX_UNITS_ENABLE_TX1 0x00000002 +#define NV30_3D_TEX_UNITS_ENABLE_TX2 0x00000004 +#define NV30_3D_TEX_UNITS_ENABLE_TX3 0x00000008 +#define NV30_3D_TEX_UNITS_ENABLE_TX4 0x00000010 +#define NV30_3D_TEX_UNITS_ENABLE_TX5 0x00000020 +#define NV30_3D_TEX_UNITS_ENABLE_TX6 0x00000040 +#define NV30_3D_TEX_UNITS_ENABLE_TX7 0x00000080 + +#define NV30_3D_TEX_MATRIX_ENABLE(i0) (0x00000240 + 0x4*(i0)) +#define NV30_3D_TEX_MATRIX_ENABLE__ESIZE 0x00000004 +#define NV30_3D_TEX_MATRIX_ENABLE__LEN 0x00000008 + +#define NV40_3D_COLOR2_PITCH 0x00000280 + +#define NV40_3D_COLOR3_PITCH 0x00000284 + +#define NV40_3D_COLOR2_OFFSET 0x00000288 + +#define NV40_3D_COLOR3_OFFSET 0x0000028c + +#define NV30_3D_VIEWPORT_TX_ORIGIN 0x000002b8 +#define NV30_3D_VIEWPORT_TX_ORIGIN_X__MASK 0x0000ffff +#define NV30_3D_VIEWPORT_TX_ORIGIN_X__SHIFT 0 +#define NV30_3D_VIEWPORT_TX_ORIGIN_Y__MASK 0xffff0000 +#define NV30_3D_VIEWPORT_TX_ORIGIN_Y__SHIFT 16 + +#define NV30_3D_VIEWPORT_CLIP_MODE 0x000002bc + +#define NV30_3D_VIEWPORT_CLIP_HORIZ(i0) (0x000002c0 + 0x8*(i0)) +#define NV30_3D_VIEWPORT_CLIP_HORIZ__ESIZE 0x00000008 +#define NV30_3D_VIEWPORT_CLIP_HORIZ__LEN 0x00000008 +#define NV30_3D_VIEWPORT_CLIP_HORIZ_L__MASK 0x0000ffff +#define NV30_3D_VIEWPORT_CLIP_HORIZ_L__SHIFT 0 +#define NV30_3D_VIEWPORT_CLIP_HORIZ_R__MASK 0xffff0000 +#define NV30_3D_VIEWPORT_CLIP_HORIZ_R__SHIFT 16 + +#define NV30_3D_VIEWPORT_CLIP_VERT(i0) (0x000002c4 + 0x8*(i0)) +#define NV30_3D_VIEWPORT_CLIP_VERT__ESIZE 0x00000008 +#define NV30_3D_VIEWPORT_CLIP_VERT__LEN 0x00000008 +#define NV30_3D_VIEWPORT_CLIP_VERT_T__MASK 0x0000ffff +#define NV30_3D_VIEWPORT_CLIP_VERT_T__SHIFT 0 +#define NV30_3D_VIEWPORT_CLIP_VERT_D__MASK 0xffff0000 +#define NV30_3D_VIEWPORT_CLIP_VERT_D__SHIFT 16 + +#define NV30_3D_DITHER_ENABLE 0x00000300 + +#define NV30_3D_ALPHA_FUNC_ENABLE 0x00000304 + +#define NV30_3D_ALPHA_FUNC_FUNC 0x00000308 +#define NV30_3D_ALPHA_FUNC_FUNC_NEVER 0x00000200 +#define NV30_3D_ALPHA_FUNC_FUNC_LESS 0x00000201 +#define NV30_3D_ALPHA_FUNC_FUNC_EQUAL 0x00000202 +#define NV30_3D_ALPHA_FUNC_FUNC_LEQUAL 0x00000203 +#define NV30_3D_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV30_3D_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV30_3D_ALPHA_FUNC_FUNC_GEQUAL 0x00000206 +#define NV30_3D_ALPHA_FUNC_FUNC_ALWAYS 0x00000207 + +#define NV30_3D_ALPHA_FUNC_REF 0x0000030c + +#define NV30_3D_BLEND_FUNC_ENABLE 0x00000310 + +#define NV30_3D_BLEND_FUNC_SRC 0x00000314 +#define NV30_3D_BLEND_FUNC_SRC_RGB__MASK 0x0000ffff +#define NV30_3D_BLEND_FUNC_SRC_RGB__SHIFT 0 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ZERO 0x00000000 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE 0x00000001 +#define NV30_3D_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV30_3D_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV30_3D_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV30_3D_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV30_3D_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV30_3D_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV30_3D_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA__MASK 0xffff0000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA__SHIFT 16 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE 0x00010000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x03000000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x03020000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x03040000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x03060000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x03080000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x80010000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x80030000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 + +#define NV30_3D_BLEND_FUNC_DST 0x00000318 +#define NV30_3D_BLEND_FUNC_DST_RGB__MASK 0x0000ffff +#define NV30_3D_BLEND_FUNC_DST_RGB__SHIFT 0 +#define NV30_3D_BLEND_FUNC_DST_RGB_ZERO 0x00000000 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE 0x00000001 +#define NV30_3D_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV30_3D_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV30_3D_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV30_3D_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV30_3D_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV30_3D_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV30_3D_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV30_3D_BLEND_FUNC_DST_ALPHA__MASK 0xffff0000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA__SHIFT 16 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE 0x00010000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x03000000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x03020000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x03040000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x03060000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x03080000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x80010000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 + +#define NV30_3D_BLEND_COLOR 0x0000031c +#define NV30_3D_BLEND_COLOR_B__MASK 0x000000ff +#define NV30_3D_BLEND_COLOR_B__SHIFT 0 +#define NV30_3D_BLEND_COLOR_G__MASK 0x0000ff00 +#define NV30_3D_BLEND_COLOR_G__SHIFT 8 +#define NV30_3D_BLEND_COLOR_R__MASK 0x00ff0000 +#define NV30_3D_BLEND_COLOR_R__SHIFT 16 +#define NV30_3D_BLEND_COLOR_A__MASK 0xff000000 +#define NV30_3D_BLEND_COLOR_A__SHIFT 24 + +#define NV30_3D_BLEND_EQUATION 0x00000320 +#define NV30_3D_BLEND_EQUATION_FUNC_ADD 0x00008006 +#define NV30_3D_BLEND_EQUATION_MIN 0x00008007 +#define NV30_3D_BLEND_EQUATION_MAX 0x00008008 +#define NV30_3D_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a +#define NV30_3D_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NV40_3D_BLEND_EQUATION 0x00000320 +#define NV40_3D_BLEND_EQUATION_RGB__MASK 0x0000ffff +#define NV40_3D_BLEND_EQUATION_RGB__SHIFT 0 +#define NV40_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NV40_3D_BLEND_EQUATION_RGB_MIN 0x00008007 +#define NV40_3D_BLEND_EQUATION_RGB_MAX 0x00008008 +#define NV40_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NV40_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV40_3D_BLEND_EQUATION_ALPHA__MASK 0xffff0000 +#define NV40_3D_BLEND_EQUATION_ALPHA__SHIFT 16 +#define NV40_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x80060000 +#define NV40_3D_BLEND_EQUATION_ALPHA_MIN 0x80070000 +#define NV40_3D_BLEND_EQUATION_ALPHA_MAX 0x80080000 +#define NV40_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x800a0000 +#define NV40_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x800b0000 + +#define NV30_3D_COLOR_MASK 0x00000324 +#define NV30_3D_COLOR_MASK_B 0x000000ff +#define NV30_3D_COLOR_MASK_G 0x0000ff00 +#define NV30_3D_COLOR_MASK_R 0x00ff0000 +#define NV30_3D_COLOR_MASK_A 0xff000000 + +#define NV30_3D_STENCIL(i0) (0x00000328 + 0x20*(i0)) +#define NV30_3D_STENCIL__ESIZE 0x00000020 +#define NV30_3D_STENCIL__LEN 0x00000002 + +#define NV30_3D_STENCIL_ENABLE(i0) (0x00000328 + 0x20*(i0)) + +#define NV30_3D_STENCIL_MASK(i0) (0x0000032c + 0x20*(i0)) + +#define NV30_3D_STENCIL_FUNC_FUNC(i0) (0x00000330 + 0x20*(i0)) +#define NV30_3D_STENCIL_FUNC_FUNC_NEVER 0x00000200 +#define NV30_3D_STENCIL_FUNC_FUNC_LESS 0x00000201 +#define NV30_3D_STENCIL_FUNC_FUNC_EQUAL 0x00000202 +#define NV30_3D_STENCIL_FUNC_FUNC_LEQUAL 0x00000203 +#define NV30_3D_STENCIL_FUNC_FUNC_GREATER 0x00000204 +#define NV30_3D_STENCIL_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV30_3D_STENCIL_FUNC_FUNC_GEQUAL 0x00000206 +#define NV30_3D_STENCIL_FUNC_FUNC_ALWAYS 0x00000207 + +#define NV30_3D_STENCIL_FUNC_REF(i0) (0x00000334 + 0x20*(i0)) + +#define NV30_3D_STENCIL_FUNC_MASK(i0) (0x00000338 + 0x20*(i0)) + +#define NV30_3D_STENCIL_OP_FAIL(i0) (0x0000033c + 0x20*(i0)) +#define NV30_3D_STENCIL_OP_FAIL_ZERO 0x00000000 +#define NV30_3D_STENCIL_OP_FAIL_INVERT 0x0000150a +#define NV30_3D_STENCIL_OP_FAIL_KEEP 0x00001e00 +#define NV30_3D_STENCIL_OP_FAIL_REPLACE 0x00001e01 +#define NV30_3D_STENCIL_OP_FAIL_INCR 0x00001e02 +#define NV30_3D_STENCIL_OP_FAIL_DECR 0x00001e03 +#define NV30_3D_STENCIL_OP_FAIL_INCR_WRAP 0x00008507 +#define NV30_3D_STENCIL_OP_FAIL_DECR_WRAP 0x00008508 + +#define NV30_3D_STENCIL_OP_ZFAIL(i0) (0x00000340 + 0x20*(i0)) +#define NV30_3D_STENCIL_OP_ZFAIL_ZERO 0x00000000 +#define NV30_3D_STENCIL_OP_ZFAIL_INVERT 0x0000150a +#define NV30_3D_STENCIL_OP_ZFAIL_KEEP 0x00001e00 +#define NV30_3D_STENCIL_OP_ZFAIL_REPLACE 0x00001e01 +#define NV30_3D_STENCIL_OP_ZFAIL_INCR 0x00001e02 +#define NV30_3D_STENCIL_OP_ZFAIL_DECR 0x00001e03 +#define NV30_3D_STENCIL_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV30_3D_STENCIL_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NV30_3D_STENCIL_OP_ZPASS(i0) (0x00000344 + 0x20*(i0)) +#define NV30_3D_STENCIL_OP_ZPASS_ZERO 0x00000000 +#define NV30_3D_STENCIL_OP_ZPASS_INVERT 0x0000150a +#define NV30_3D_STENCIL_OP_ZPASS_KEEP 0x00001e00 +#define NV30_3D_STENCIL_OP_ZPASS_REPLACE 0x00001e01 +#define NV30_3D_STENCIL_OP_ZPASS_INCR 0x00001e02 +#define NV30_3D_STENCIL_OP_ZPASS_DECR 0x00001e03 +#define NV30_3D_STENCIL_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV30_3D_STENCIL_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NV30_3D_SHADE_MODEL 0x00000368 +#define NV30_3D_SHADE_MODEL_FLAT 0x00001d00 +#define NV30_3D_SHADE_MODEL_SMOOTH 0x00001d01 + +#define NV30_3D_FOG_ENABLE 0x0000036c + +#define NV30_3D_FOG_COLOR 0x00000370 +#define NV30_3D_FOG_COLOR_R__MASK 0x000000ff +#define NV30_3D_FOG_COLOR_R__SHIFT 0 +#define NV30_3D_FOG_COLOR_G__MASK 0x0000ff00 +#define NV30_3D_FOG_COLOR_G__SHIFT 8 +#define NV30_3D_FOG_COLOR_B__MASK 0x00ff0000 +#define NV30_3D_FOG_COLOR_B__SHIFT 16 +#define NV30_3D_FOG_COLOR_A__MASK 0xff000000 +#define NV30_3D_FOG_COLOR_A__SHIFT 24 + +#define NV40_3D_MRT_COLOR_MASK 0x00000370 +#define NV40_3D_MRT_COLOR_MASK_BUFFER1_A 0x00000010 +#define NV40_3D_MRT_COLOR_MASK_BUFFER1_R 0x00000020 +#define NV40_3D_MRT_COLOR_MASK_BUFFER1_G 0x00000040 +#define NV40_3D_MRT_COLOR_MASK_BUFFER1_B 0x00000080 +#define NV40_3D_MRT_COLOR_MASK_BUFFER2_A 0x00000100 +#define NV40_3D_MRT_COLOR_MASK_BUFFER2_R 0x00000200 +#define NV40_3D_MRT_COLOR_MASK_BUFFER2_G 0x00000400 +#define NV40_3D_MRT_COLOR_MASK_BUFFER2_B 0x00000800 +#define NV40_3D_MRT_COLOR_MASK_BUFFER3_A 0x00001000 +#define NV40_3D_MRT_COLOR_MASK_BUFFER3_R 0x00002000 +#define NV40_3D_MRT_COLOR_MASK_BUFFER3_G 0x00004000 +#define NV40_3D_MRT_COLOR_MASK_BUFFER3_B 0x00008000 + +#define NV30_3D_COLOR_LOGIC_OP_ENABLE 0x00000374 + +#define NV30_3D_COLOR_LOGIC_OP_OP 0x00000378 +#define NV30_3D_COLOR_LOGIC_OP_OP_CLEAR 0x00001500 +#define NV30_3D_COLOR_LOGIC_OP_OP_AND 0x00001501 +#define NV30_3D_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502 +#define NV30_3D_COLOR_LOGIC_OP_OP_COPY 0x00001503 +#define NV30_3D_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504 +#define NV30_3D_COLOR_LOGIC_OP_OP_NOOP 0x00001505 +#define NV30_3D_COLOR_LOGIC_OP_OP_XOR 0x00001506 +#define NV30_3D_COLOR_LOGIC_OP_OP_OR 0x00001507 +#define NV30_3D_COLOR_LOGIC_OP_OP_NOR 0x00001508 +#define NV30_3D_COLOR_LOGIC_OP_OP_EQUIV 0x00001509 +#define NV30_3D_COLOR_LOGIC_OP_OP_INVERT 0x0000150a +#define NV30_3D_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b +#define NV30_3D_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c +#define NV30_3D_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d +#define NV30_3D_COLOR_LOGIC_OP_OP_NAND 0x0000150e +#define NV30_3D_COLOR_LOGIC_OP_OP_SET 0x0000150f + +#define NV30_3D_NORMALIZE_ENABLE 0x0000037c + +#define NV35_3D_DEPTH_BOUNDS_TEST_ENABLE 0x00000380 + +#define NV35_3D_DEPTH_BOUNDS_TEST_ZMIN 0x00000384 + +#define NV35_3D_DEPTH_BOUNDS_TEST_ZMAX 0x00000388 + +#define NV30_3D_COLOR_MATERIAL 0x00000390 +#define NV30_3D_COLOR_MATERIAL_FRONT_EMISSION_ENABLE 0x00000001 +#define NV30_3D_COLOR_MATERIAL_FRONT_AMBIENT_ENABLE 0x00000004 +#define NV30_3D_COLOR_MATERIAL_FRONT_DIFFUSE_ENABLE 0x00000010 +#define NV30_3D_COLOR_MATERIAL_FRONT_SPECULAR_ENABLE 0x00000040 +#define NV30_3D_COLOR_MATERIAL_BACK_EMISSION_ENABLE 0x00000100 +#define NV30_3D_COLOR_MATERIAL_BACK_AMBIENT_ENABLE 0x00000400 +#define NV30_3D_COLOR_MATERIAL_BACK_DIFFUSE_ENABLE 0x00001000 +#define NV30_3D_COLOR_MATERIAL_BACK_SPECULAR_ENABLE 0x00004000 + +#define NV30_3D_DEPTH_RANGE_NEAR 0x00000394 + +#define NV30_3D_DEPTH_RANGE_FAR 0x00000398 + +#define NV30_3D_COLOR_MATERIAL_FRONT 0x000003a0 + + +#define NV30_3D_COLOR_MATERIAL_FRONT_R 0x000003a0 + +#define NV30_3D_COLOR_MATERIAL_FRONT_G 0x000003a4 + +#define NV30_3D_COLOR_MATERIAL_FRONT_B 0x000003a8 + +#define NV30_3D_COLOR_MATERIAL_FRONT_A 0x000003ac + +#define NV40_3D_MIPMAP_ROUNDING 0x000003b0 +#define NV40_3D_MIPMAP_ROUNDING_MODE__MASK 0x00100000 +#define NV40_3D_MIPMAP_ROUNDING_MODE__SHIFT 20 +#define NV40_3D_MIPMAP_ROUNDING_MODE_UP 0x00000000 +#define NV40_3D_MIPMAP_ROUNDING_MODE_DOWN 0x00100000 + +#define NV30_3D_LINE_WIDTH 0x000003b8 + +#define NV30_3D_LINE_SMOOTH_ENABLE 0x000003bc + + + +#define NV30_3D_TEX_GEN_MODE(i0, i1) (0x00000400 + 0x10*(i0) + 0x4*(i1)) +#define NV30_3D_TEX_GEN_MODE__ESIZE 0x00000004 +#define NV30_3D_TEX_GEN_MODE__LEN 0x00000004 +#define NV30_3D_TEX_GEN_MODE_FALSE 0x00000000 +#define NV30_3D_TEX_GEN_MODE_EYE_LINEAR 0x00002400 +#define NV30_3D_TEX_GEN_MODE_OBJECT_LINEAR 0x00002401 +#define NV30_3D_TEX_GEN_MODE_SPHERE_MAP 0x00002402 +#define NV30_3D_TEX_GEN_MODE_NORMAL_MAP 0x00008511 +#define NV30_3D_TEX_GEN_MODE_REFLECTION_MAP 0x00008512 + +#define NV30_3D_MODELVIEW_MATRIX(i0) (0x00000480 + 0x4*(i0)) +#define NV30_3D_MODELVIEW_MATRIX__ESIZE 0x00000004 +#define NV30_3D_MODELVIEW_MATRIX__LEN 0x00000010 + +#define NV30_3D_INVERSE_MODELVIEW_MATRIX(i0) (0x00000580 + 0x4*(i0)) +#define NV30_3D_INVERSE_MODELVIEW_MATRIX__ESIZE 0x00000004 +#define NV30_3D_INVERSE_MODELVIEW_MATRIX__LEN 0x0000000c + +#define NV30_3D_PROJECTION_MATRIX(i0) (0x00000680 + 0x4*(i0)) +#define NV30_3D_PROJECTION_MATRIX__ESIZE 0x00000004 +#define NV30_3D_PROJECTION_MATRIX__LEN 0x00000010 + + +#define NV30_3D_TEX_MATRIX(i0, i1) (0x000006c0 + 0x40*(i0) + 0x4*(i1)) +#define NV30_3D_TEX_MATRIX__ESIZE 0x00000004 +#define NV30_3D_TEX_MATRIX__LEN 0x00000010 + +#define NV30_3D_SCISSOR_HORIZ 0x000008c0 +#define NV30_3D_SCISSOR_HORIZ_X__MASK 0x0000ffff +#define NV30_3D_SCISSOR_HORIZ_X__SHIFT 0 +#define NV30_3D_SCISSOR_HORIZ_W__MASK 0xffff0000 +#define NV30_3D_SCISSOR_HORIZ_W__SHIFT 16 + +#define NV30_3D_SCISSOR_VERT 0x000008c4 +#define NV30_3D_SCISSOR_VERT_Y__MASK 0x0000ffff +#define NV30_3D_SCISSOR_VERT_Y__SHIFT 0 +#define NV30_3D_SCISSOR_VERT_H__MASK 0xffff0000 +#define NV30_3D_SCISSOR_VERT_H__SHIFT 16 + +#define NV30_3D_FOG_COORD_DIST 0x000008c8 + +#define NV30_3D_FOG_MODE 0x000008cc + +#define NV30_3D_FOG_EQUATION_CONSTANT 0x000008d0 + +#define NV30_3D_FOG_EQUATION_LINEAR 0x000008d4 + +#define NV30_3D_FOG_EQUATION_QUADRATIC 0x000008d8 + +#define NV30_3D_FP_ACTIVE_PROGRAM 0x000008e4 +#define NV30_3D_FP_ACTIVE_PROGRAM_DMA0 0x00000001 +#define NV30_3D_FP_ACTIVE_PROGRAM_DMA1 0x00000002 +#define NV30_3D_FP_ACTIVE_PROGRAM_OFFSET__MASK 0xfffffffc +#define NV30_3D_FP_ACTIVE_PROGRAM_OFFSET__SHIFT 2 + + +#define NV30_3D_RC_COLOR0 0x000008ec +#define NV30_3D_RC_COLOR0_B__MASK 0x000000ff +#define NV30_3D_RC_COLOR0_B__SHIFT 0 +#define NV30_3D_RC_COLOR0_G__MASK 0x0000ff00 +#define NV30_3D_RC_COLOR0_G__SHIFT 8 +#define NV30_3D_RC_COLOR0_R__MASK 0x00ff0000 +#define NV30_3D_RC_COLOR0_R__SHIFT 16 +#define NV30_3D_RC_COLOR0_A__MASK 0xff000000 +#define NV30_3D_RC_COLOR0_A__SHIFT 24 + +#define NV30_3D_RC_COLOR1 0x000008f0 +#define NV30_3D_RC_COLOR1_B__MASK 0x000000ff +#define NV30_3D_RC_COLOR1_B__SHIFT 0 +#define NV30_3D_RC_COLOR1_G__MASK 0x0000ff00 +#define NV30_3D_RC_COLOR1_G__SHIFT 8 +#define NV30_3D_RC_COLOR1_R__MASK 0x00ff0000 +#define NV30_3D_RC_COLOR1_R__SHIFT 16 +#define NV30_3D_RC_COLOR1_A__MASK 0xff000000 +#define NV30_3D_RC_COLOR1_A__SHIFT 24 + +#define NV30_3D_RC_FINAL0 0x000008f4 +#define NV30_3D_RC_FINAL0_D_INPUT__MASK 0x0000000f +#define NV30_3D_RC_FINAL0_D_INPUT__SHIFT 0 +#define NV30_3D_RC_FINAL0_D_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL0_D_INPUT_CONSTANT_COLOR0 0x00000001 +#define NV30_3D_RC_FINAL0_D_INPUT_CONSTANT_COLOR1 0x00000002 +#define NV30_3D_RC_FINAL0_D_INPUT_FOG 0x00000003 +#define NV30_3D_RC_FINAL0_D_INPUT_PRIMARY_COLOR 0x00000004 +#define NV30_3D_RC_FINAL0_D_INPUT_SECONDARY_COLOR 0x00000005 +#define NV30_3D_RC_FINAL0_D_INPUT_TEXTURE0 0x00000008 +#define NV30_3D_RC_FINAL0_D_INPUT_TEXTURE1 0x00000009 +#define NV30_3D_RC_FINAL0_D_INPUT_TEXTURE2 0x0000000a +#define NV30_3D_RC_FINAL0_D_INPUT_TEXTURE3 0x0000000b +#define NV30_3D_RC_FINAL0_D_INPUT_SPARE0 0x0000000c +#define NV30_3D_RC_FINAL0_D_INPUT_SPARE1 0x0000000d +#define NV30_3D_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0000000e +#define NV30_3D_RC_FINAL0_D_INPUT_E_TIMES_F 0x0000000f +#define NV30_3D_RC_FINAL0_D_COMPONENT_USAGE__MASK 0x00000010 +#define NV30_3D_RC_FINAL0_D_COMPONENT_USAGE__SHIFT 4 +#define NV30_3D_RC_FINAL0_D_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL0_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV30_3D_RC_FINAL0_D_MAPPING__MASK 0x000000e0 +#define NV30_3D_RC_FINAL0_D_MAPPING__SHIFT 5 +#define NV30_3D_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT 0x00000020 +#define NV30_3D_RC_FINAL0_D_MAPPING_EXPAND_NORMAL 0x00000040 +#define NV30_3D_RC_FINAL0_D_MAPPING_EXPAND_NEGATE 0x00000060 +#define NV30_3D_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL 0x00000080 +#define NV30_3D_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE 0x000000a0 +#define NV30_3D_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY 0x000000c0 +#define NV30_3D_RC_FINAL0_D_MAPPING_SIGNED_NEGATE 0x000000e0 +#define NV30_3D_RC_FINAL0_C_INPUT__MASK 0x00000f00 +#define NV30_3D_RC_FINAL0_C_INPUT__SHIFT 8 +#define NV30_3D_RC_FINAL0_C_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL0_C_INPUT_CONSTANT_COLOR0 0x00000100 +#define NV30_3D_RC_FINAL0_C_INPUT_CONSTANT_COLOR1 0x00000200 +#define NV30_3D_RC_FINAL0_C_INPUT_FOG 0x00000300 +#define NV30_3D_RC_FINAL0_C_INPUT_PRIMARY_COLOR 0x00000400 +#define NV30_3D_RC_FINAL0_C_INPUT_SECONDARY_COLOR 0x00000500 +#define NV30_3D_RC_FINAL0_C_INPUT_TEXTURE0 0x00000800 +#define NV30_3D_RC_FINAL0_C_INPUT_TEXTURE1 0x00000900 +#define NV30_3D_RC_FINAL0_C_INPUT_TEXTURE2 0x00000a00 +#define NV30_3D_RC_FINAL0_C_INPUT_TEXTURE3 0x00000b00 +#define NV30_3D_RC_FINAL0_C_INPUT_SPARE0 0x00000c00 +#define NV30_3D_RC_FINAL0_C_INPUT_SPARE1 0x00000d00 +#define NV30_3D_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00 +#define NV30_3D_RC_FINAL0_C_INPUT_E_TIMES_F 0x00000f00 +#define NV30_3D_RC_FINAL0_C_COMPONENT_USAGE__MASK 0x00001000 +#define NV30_3D_RC_FINAL0_C_COMPONENT_USAGE__SHIFT 12 +#define NV30_3D_RC_FINAL0_C_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL0_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV30_3D_RC_FINAL0_C_MAPPING__MASK 0x0000e000 +#define NV30_3D_RC_FINAL0_C_MAPPING__SHIFT 13 +#define NV30_3D_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT 0x00002000 +#define NV30_3D_RC_FINAL0_C_MAPPING_EXPAND_NORMAL 0x00004000 +#define NV30_3D_RC_FINAL0_C_MAPPING_EXPAND_NEGATE 0x00006000 +#define NV30_3D_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL 0x00008000 +#define NV30_3D_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE 0x0000a000 +#define NV30_3D_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY 0x0000c000 +#define NV30_3D_RC_FINAL0_C_MAPPING_SIGNED_NEGATE 0x0000e000 +#define NV30_3D_RC_FINAL0_B_INPUT__MASK 0x000f0000 +#define NV30_3D_RC_FINAL0_B_INPUT__SHIFT 16 +#define NV30_3D_RC_FINAL0_B_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL0_B_INPUT_CONSTANT_COLOR0 0x00010000 +#define NV30_3D_RC_FINAL0_B_INPUT_CONSTANT_COLOR1 0x00020000 +#define NV30_3D_RC_FINAL0_B_INPUT_FOG 0x00030000 +#define NV30_3D_RC_FINAL0_B_INPUT_PRIMARY_COLOR 0x00040000 +#define NV30_3D_RC_FINAL0_B_INPUT_SECONDARY_COLOR 0x00050000 +#define NV30_3D_RC_FINAL0_B_INPUT_TEXTURE0 0x00080000 +#define NV30_3D_RC_FINAL0_B_INPUT_TEXTURE1 0x00090000 +#define NV30_3D_RC_FINAL0_B_INPUT_TEXTURE2 0x000a0000 +#define NV30_3D_RC_FINAL0_B_INPUT_TEXTURE3 0x000b0000 +#define NV30_3D_RC_FINAL0_B_INPUT_SPARE0 0x000c0000 +#define NV30_3D_RC_FINAL0_B_INPUT_SPARE1 0x000d0000 +#define NV30_3D_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000e0000 +#define NV30_3D_RC_FINAL0_B_INPUT_E_TIMES_F 0x000f0000 +#define NV30_3D_RC_FINAL0_B_COMPONENT_USAGE__MASK 0x00100000 +#define NV30_3D_RC_FINAL0_B_COMPONENT_USAGE__SHIFT 20 +#define NV30_3D_RC_FINAL0_B_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL0_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV30_3D_RC_FINAL0_B_MAPPING__MASK 0x00e00000 +#define NV30_3D_RC_FINAL0_B_MAPPING__SHIFT 21 +#define NV30_3D_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT 0x00200000 +#define NV30_3D_RC_FINAL0_B_MAPPING_EXPAND_NORMAL 0x00400000 +#define NV30_3D_RC_FINAL0_B_MAPPING_EXPAND_NEGATE 0x00600000 +#define NV30_3D_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL 0x00800000 +#define NV30_3D_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE 0x00a00000 +#define NV30_3D_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY 0x00c00000 +#define NV30_3D_RC_FINAL0_B_MAPPING_SIGNED_NEGATE 0x00e00000 +#define NV30_3D_RC_FINAL0_A_INPUT__MASK 0x0f000000 +#define NV30_3D_RC_FINAL0_A_INPUT__SHIFT 24 +#define NV30_3D_RC_FINAL0_A_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL0_A_INPUT_CONSTANT_COLOR0 0x01000000 +#define NV30_3D_RC_FINAL0_A_INPUT_CONSTANT_COLOR1 0x02000000 +#define NV30_3D_RC_FINAL0_A_INPUT_FOG 0x03000000 +#define NV30_3D_RC_FINAL0_A_INPUT_PRIMARY_COLOR 0x04000000 +#define NV30_3D_RC_FINAL0_A_INPUT_SECONDARY_COLOR 0x05000000 +#define NV30_3D_RC_FINAL0_A_INPUT_TEXTURE0 0x08000000 +#define NV30_3D_RC_FINAL0_A_INPUT_TEXTURE1 0x09000000 +#define NV30_3D_RC_FINAL0_A_INPUT_TEXTURE2 0x0a000000 +#define NV30_3D_RC_FINAL0_A_INPUT_TEXTURE3 0x0b000000 +#define NV30_3D_RC_FINAL0_A_INPUT_SPARE0 0x0c000000 +#define NV30_3D_RC_FINAL0_A_INPUT_SPARE1 0x0d000000 +#define NV30_3D_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0e000000 +#define NV30_3D_RC_FINAL0_A_INPUT_E_TIMES_F 0x0f000000 +#define NV30_3D_RC_FINAL0_A_COMPONENT_USAGE__MASK 0x10000000 +#define NV30_3D_RC_FINAL0_A_COMPONENT_USAGE__SHIFT 28 +#define NV30_3D_RC_FINAL0_A_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL0_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV30_3D_RC_FINAL0_A_MAPPING__MASK 0xe0000000 +#define NV30_3D_RC_FINAL0_A_MAPPING__SHIFT 29 +#define NV30_3D_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT 0x20000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_EXPAND_NORMAL 0x40000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_EXPAND_NEGATE 0x60000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL 0x80000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE 0xa0000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY 0xc0000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_SIGNED_NEGATE 0xe0000000 + +#define NV30_3D_RC_FINAL1 0x000008f8 +#define NV30_3D_RC_FINAL1_COLOR_SUM_CLAMP 0x00000080 +#define NV30_3D_RC_FINAL1_G_INPUT__MASK 0x00000f00 +#define NV30_3D_RC_FINAL1_G_INPUT__SHIFT 8 +#define NV30_3D_RC_FINAL1_G_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL1_G_INPUT_CONSTANT_COLOR0 0x00000100 +#define NV30_3D_RC_FINAL1_G_INPUT_CONSTANT_COLOR1 0x00000200 +#define NV30_3D_RC_FINAL1_G_INPUT_FOG 0x00000300 +#define NV30_3D_RC_FINAL1_G_INPUT_PRIMARY_COLOR 0x00000400 +#define NV30_3D_RC_FINAL1_G_INPUT_SECONDARY_COLOR 0x00000500 +#define NV30_3D_RC_FINAL1_G_INPUT_TEXTURE0 0x00000800 +#define NV30_3D_RC_FINAL1_G_INPUT_TEXTURE1 0x00000900 +#define NV30_3D_RC_FINAL1_G_INPUT_TEXTURE2 0x00000a00 +#define NV30_3D_RC_FINAL1_G_INPUT_TEXTURE3 0x00000b00 +#define NV30_3D_RC_FINAL1_G_INPUT_SPARE0 0x00000c00 +#define NV30_3D_RC_FINAL1_G_INPUT_SPARE1 0x00000d00 +#define NV30_3D_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00 +#define NV30_3D_RC_FINAL1_G_INPUT_E_TIMES_F 0x00000f00 +#define NV30_3D_RC_FINAL1_G_COMPONENT_USAGE__MASK 0x00001000 +#define NV30_3D_RC_FINAL1_G_COMPONENT_USAGE__SHIFT 12 +#define NV30_3D_RC_FINAL1_G_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL1_G_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV30_3D_RC_FINAL1_G_MAPPING__MASK 0x0000e000 +#define NV30_3D_RC_FINAL1_G_MAPPING__SHIFT 13 +#define NV30_3D_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT 0x00002000 +#define NV30_3D_RC_FINAL1_G_MAPPING_EXPAND_NORMAL 0x00004000 +#define NV30_3D_RC_FINAL1_G_MAPPING_EXPAND_NEGATE 0x00006000 +#define NV30_3D_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL 0x00008000 +#define NV30_3D_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE 0x0000a000 +#define NV30_3D_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY 0x0000c000 +#define NV30_3D_RC_FINAL1_G_MAPPING_SIGNED_NEGATE 0x0000e000 +#define NV30_3D_RC_FINAL1_F_INPUT__MASK 0x000f0000 +#define NV30_3D_RC_FINAL1_F_INPUT__SHIFT 16 +#define NV30_3D_RC_FINAL1_F_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL1_F_INPUT_CONSTANT_COLOR0 0x00010000 +#define NV30_3D_RC_FINAL1_F_INPUT_CONSTANT_COLOR1 0x00020000 +#define NV30_3D_RC_FINAL1_F_INPUT_FOG 0x00030000 +#define NV30_3D_RC_FINAL1_F_INPUT_PRIMARY_COLOR 0x00040000 +#define NV30_3D_RC_FINAL1_F_INPUT_SECONDARY_COLOR 0x00050000 +#define NV30_3D_RC_FINAL1_F_INPUT_TEXTURE0 0x00080000 +#define NV30_3D_RC_FINAL1_F_INPUT_TEXTURE1 0x00090000 +#define NV30_3D_RC_FINAL1_F_INPUT_TEXTURE2 0x000a0000 +#define NV30_3D_RC_FINAL1_F_INPUT_TEXTURE3 0x000b0000 +#define NV30_3D_RC_FINAL1_F_INPUT_SPARE0 0x000c0000 +#define NV30_3D_RC_FINAL1_F_INPUT_SPARE1 0x000d0000 +#define NV30_3D_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000e0000 +#define NV30_3D_RC_FINAL1_F_INPUT_E_TIMES_F 0x000f0000 +#define NV30_3D_RC_FINAL1_F_COMPONENT_USAGE__MASK 0x00100000 +#define NV30_3D_RC_FINAL1_F_COMPONENT_USAGE__SHIFT 20 +#define NV30_3D_RC_FINAL1_F_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL1_F_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV30_3D_RC_FINAL1_F_MAPPING__MASK 0x00e00000 +#define NV30_3D_RC_FINAL1_F_MAPPING__SHIFT 21 +#define NV30_3D_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT 0x00200000 +#define NV30_3D_RC_FINAL1_F_MAPPING_EXPAND_NORMAL 0x00400000 +#define NV30_3D_RC_FINAL1_F_MAPPING_EXPAND_NEGATE 0x00600000 +#define NV30_3D_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL 0x00800000 +#define NV30_3D_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE 0x00a00000 +#define NV30_3D_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY 0x00c00000 +#define NV30_3D_RC_FINAL1_F_MAPPING_SIGNED_NEGATE 0x00e00000 +#define NV30_3D_RC_FINAL1_E_INPUT__MASK 0x0f000000 +#define NV30_3D_RC_FINAL1_E_INPUT__SHIFT 24 +#define NV30_3D_RC_FINAL1_E_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL1_E_INPUT_CONSTANT_COLOR0 0x01000000 +#define NV30_3D_RC_FINAL1_E_INPUT_CONSTANT_COLOR1 0x02000000 +#define NV30_3D_RC_FINAL1_E_INPUT_FOG 0x03000000 +#define NV30_3D_RC_FINAL1_E_INPUT_PRIMARY_COLOR 0x04000000 +#define NV30_3D_RC_FINAL1_E_INPUT_SECONDARY_COLOR 0x05000000 +#define NV30_3D_RC_FINAL1_E_INPUT_TEXTURE0 0x08000000 +#define NV30_3D_RC_FINAL1_E_INPUT_TEXTURE1 0x09000000 +#define NV30_3D_RC_FINAL1_E_INPUT_TEXTURE2 0x0a000000 +#define NV30_3D_RC_FINAL1_E_INPUT_TEXTURE3 0x0b000000 +#define NV30_3D_RC_FINAL1_E_INPUT_SPARE0 0x0c000000 +#define NV30_3D_RC_FINAL1_E_INPUT_SPARE1 0x0d000000 +#define NV30_3D_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0e000000 +#define NV30_3D_RC_FINAL1_E_INPUT_E_TIMES_F 0x0f000000 +#define NV30_3D_RC_FINAL1_E_COMPONENT_USAGE__MASK 0x10000000 +#define NV30_3D_RC_FINAL1_E_COMPONENT_USAGE__SHIFT 28 +#define NV30_3D_RC_FINAL1_E_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL1_E_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV30_3D_RC_FINAL1_E_MAPPING__MASK 0xe0000000 +#define NV30_3D_RC_FINAL1_E_MAPPING__SHIFT 29 +#define NV30_3D_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT 0x20000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_EXPAND_NORMAL 0x40000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_EXPAND_NEGATE 0x60000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL 0x80000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE 0xa0000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY 0xc0000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_SIGNED_NEGATE 0xe0000000 + +#define NV30_3D_RC_ENABLE 0x000008fc +#define NV30_3D_RC_ENABLE_NUM_COMBINERS__MASK 0x0000000f +#define NV30_3D_RC_ENABLE_NUM_COMBINERS__SHIFT 0 +#define NV30_3D_RC_ENABLE_STAGE_CONSTANT_COLOR0 0x0000f000 +#define NV30_3D_RC_ENABLE_STAGE_CONSTANT_COLOR1 0x000f0000 + + +#define NV30_3D_RC_IN_ALPHA(i0) (0x00000900 + 0x20*(i0)) +#define NV30_3D_RC_IN_ALPHA_D_INPUT__MASK 0x0000000f +#define NV30_3D_RC_IN_ALPHA_D_INPUT__SHIFT 0 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0 0x00000001 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1 0x00000002 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_FOG 0x00000003 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR 0x00000004 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR 0x00000005 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_TEXTURE0 0x00000008 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_TEXTURE1 0x00000009 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_TEXTURE2 0x0000000a +#define NV30_3D_RC_IN_ALPHA_D_INPUT_TEXTURE3 0x0000000b +#define NV30_3D_RC_IN_ALPHA_D_INPUT_SPARE0 0x0000000c +#define NV30_3D_RC_IN_ALPHA_D_INPUT_SPARE1 0x0000000d +#define NV30_3D_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0000000e +#define NV30_3D_RC_IN_ALPHA_D_INPUT_E_TIMES_F 0x0000000f +#define NV30_3D_RC_IN_ALPHA_D_COMPONENT_USAGE__MASK 0x00000010 +#define NV30_3D_RC_IN_ALPHA_D_COMPONENT_USAGE__SHIFT 4 +#define NV30_3D_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE 0x00000000 +#define NV30_3D_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING__MASK 0x000000e0 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING__SHIFT 5 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT 0x00000020 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL 0x00000040 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE 0x00000060 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL 0x00000080 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE 0x000000a0 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY 0x000000c0 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE 0x000000e0 +#define NV30_3D_RC_IN_ALPHA_C_INPUT__MASK 0x00000f00 +#define NV30_3D_RC_IN_ALPHA_C_INPUT__SHIFT 8 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0 0x00000100 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1 0x00000200 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_FOG 0x00000300 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR 0x00000400 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR 0x00000500 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_TEXTURE0 0x00000800 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_TEXTURE1 0x00000900 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_TEXTURE2 0x00000a00 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_TEXTURE3 0x00000b00 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_SPARE0 0x00000c00 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_SPARE1 0x00000d00 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_E_TIMES_F 0x00000f00 +#define NV30_3D_RC_IN_ALPHA_C_COMPONENT_USAGE__MASK 0x00001000 +#define NV30_3D_RC_IN_ALPHA_C_COMPONENT_USAGE__SHIFT 12 +#define NV30_3D_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE 0x00000000 +#define NV30_3D_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING__MASK 0x0000e000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING__SHIFT 13 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT 0x00002000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL 0x00004000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE 0x00006000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL 0x00008000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE 0x0000a000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY 0x0000c000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE 0x0000e000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT__MASK 0x000f0000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT__SHIFT 16 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0 0x00010000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1 0x00020000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_FOG 0x00030000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR 0x00040000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR 0x00050000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_TEXTURE0 0x00080000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_TEXTURE1 0x00090000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_TEXTURE2 0x000a0000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_TEXTURE3 0x000b0000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_SPARE0 0x000c0000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_SPARE1 0x000d0000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000e0000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_E_TIMES_F 0x000f0000 +#define NV30_3D_RC_IN_ALPHA_B_COMPONENT_USAGE__MASK 0x00100000 +#define NV30_3D_RC_IN_ALPHA_B_COMPONENT_USAGE__SHIFT 20 +#define NV30_3D_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE 0x00000000 +#define NV30_3D_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING__MASK 0x00e00000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING__SHIFT 21 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT 0x00200000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL 0x00400000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE 0x00600000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL 0x00800000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE 0x00a00000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY 0x00c00000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE 0x00e00000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT__MASK 0x0f000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT__SHIFT 24 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0 0x01000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1 0x02000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_FOG 0x03000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR 0x04000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR 0x05000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_TEXTURE0 0x08000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_TEXTURE1 0x09000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_TEXTURE2 0x0a000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_TEXTURE3 0x0b000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_SPARE0 0x0c000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_SPARE1 0x0d000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0e000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_E_TIMES_F 0x0f000000 +#define NV30_3D_RC_IN_ALPHA_A_COMPONENT_USAGE__MASK 0x10000000 +#define NV30_3D_RC_IN_ALPHA_A_COMPONENT_USAGE__SHIFT 28 +#define NV30_3D_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE 0x00000000 +#define NV30_3D_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING__MASK 0xe0000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING__SHIFT 29 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT 0x20000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL 0x40000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE 0x60000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL 0x80000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE 0xa0000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY 0xc0000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE 0xe0000000 + +#define NV30_3D_RC_IN_RGB(i0) (0x00000904 + 0x20*(i0)) +#define NV30_3D_RC_IN_RGB_D_INPUT__MASK 0x0000000f +#define NV30_3D_RC_IN_RGB_D_INPUT__SHIFT 0 +#define NV30_3D_RC_IN_RGB_D_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0 0x00000001 +#define NV30_3D_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1 0x00000002 +#define NV30_3D_RC_IN_RGB_D_INPUT_FOG 0x00000003 +#define NV30_3D_RC_IN_RGB_D_INPUT_PRIMARY_COLOR 0x00000004 +#define NV30_3D_RC_IN_RGB_D_INPUT_SECONDARY_COLOR 0x00000005 +#define NV30_3D_RC_IN_RGB_D_INPUT_TEXTURE0 0x00000008 +#define NV30_3D_RC_IN_RGB_D_INPUT_TEXTURE1 0x00000009 +#define NV30_3D_RC_IN_RGB_D_INPUT_TEXTURE2 0x0000000a +#define NV30_3D_RC_IN_RGB_D_INPUT_TEXTURE3 0x0000000b +#define NV30_3D_RC_IN_RGB_D_INPUT_SPARE0 0x0000000c +#define NV30_3D_RC_IN_RGB_D_INPUT_SPARE1 0x0000000d +#define NV30_3D_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0000000e +#define NV30_3D_RC_IN_RGB_D_INPUT_E_TIMES_F 0x0000000f +#define NV30_3D_RC_IN_RGB_D_COMPONENT_USAGE__MASK 0x00000010 +#define NV30_3D_RC_IN_RGB_D_COMPONENT_USAGE__SHIFT 4 +#define NV30_3D_RC_IN_RGB_D_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV30_3D_RC_IN_RGB_D_MAPPING__MASK 0x000000e0 +#define NV30_3D_RC_IN_RGB_D_MAPPING__SHIFT 5 +#define NV30_3D_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT 0x00000020 +#define NV30_3D_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL 0x00000040 +#define NV30_3D_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE 0x00000060 +#define NV30_3D_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL 0x00000080 +#define NV30_3D_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE 0x000000a0 +#define NV30_3D_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY 0x000000c0 +#define NV30_3D_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE 0x000000e0 +#define NV30_3D_RC_IN_RGB_C_INPUT__MASK 0x00000f00 +#define NV30_3D_RC_IN_RGB_C_INPUT__SHIFT 8 +#define NV30_3D_RC_IN_RGB_C_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0 0x00000100 +#define NV30_3D_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1 0x00000200 +#define NV30_3D_RC_IN_RGB_C_INPUT_FOG 0x00000300 +#define NV30_3D_RC_IN_RGB_C_INPUT_PRIMARY_COLOR 0x00000400 +#define NV30_3D_RC_IN_RGB_C_INPUT_SECONDARY_COLOR 0x00000500 +#define NV30_3D_RC_IN_RGB_C_INPUT_TEXTURE0 0x00000800 +#define NV30_3D_RC_IN_RGB_C_INPUT_TEXTURE1 0x00000900 +#define NV30_3D_RC_IN_RGB_C_INPUT_TEXTURE2 0x00000a00 +#define NV30_3D_RC_IN_RGB_C_INPUT_TEXTURE3 0x00000b00 +#define NV30_3D_RC_IN_RGB_C_INPUT_SPARE0 0x00000c00 +#define NV30_3D_RC_IN_RGB_C_INPUT_SPARE1 0x00000d00 +#define NV30_3D_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00 +#define NV30_3D_RC_IN_RGB_C_INPUT_E_TIMES_F 0x00000f00 +#define NV30_3D_RC_IN_RGB_C_COMPONENT_USAGE__MASK 0x00001000 +#define NV30_3D_RC_IN_RGB_C_COMPONENT_USAGE__SHIFT 12 +#define NV30_3D_RC_IN_RGB_C_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV30_3D_RC_IN_RGB_C_MAPPING__MASK 0x0000e000 +#define NV30_3D_RC_IN_RGB_C_MAPPING__SHIFT 13 +#define NV30_3D_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT 0x00002000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL 0x00004000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE 0x00006000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL 0x00008000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE 0x0000a000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY 0x0000c000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE 0x0000e000 +#define NV30_3D_RC_IN_RGB_B_INPUT__MASK 0x000f0000 +#define NV30_3D_RC_IN_RGB_B_INPUT__SHIFT 16 +#define NV30_3D_RC_IN_RGB_B_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0 0x00010000 +#define NV30_3D_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1 0x00020000 +#define NV30_3D_RC_IN_RGB_B_INPUT_FOG 0x00030000 +#define NV30_3D_RC_IN_RGB_B_INPUT_PRIMARY_COLOR 0x00040000 +#define NV30_3D_RC_IN_RGB_B_INPUT_SECONDARY_COLOR 0x00050000 +#define NV30_3D_RC_IN_RGB_B_INPUT_TEXTURE0 0x00080000 +#define NV30_3D_RC_IN_RGB_B_INPUT_TEXTURE1 0x00090000 +#define NV30_3D_RC_IN_RGB_B_INPUT_TEXTURE2 0x000a0000 +#define NV30_3D_RC_IN_RGB_B_INPUT_TEXTURE3 0x000b0000 +#define NV30_3D_RC_IN_RGB_B_INPUT_SPARE0 0x000c0000 +#define NV30_3D_RC_IN_RGB_B_INPUT_SPARE1 0x000d0000 +#define NV30_3D_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000e0000 +#define NV30_3D_RC_IN_RGB_B_INPUT_E_TIMES_F 0x000f0000 +#define NV30_3D_RC_IN_RGB_B_COMPONENT_USAGE__MASK 0x00100000 +#define NV30_3D_RC_IN_RGB_B_COMPONENT_USAGE__SHIFT 20 +#define NV30_3D_RC_IN_RGB_B_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV30_3D_RC_IN_RGB_B_MAPPING__MASK 0x00e00000 +#define NV30_3D_RC_IN_RGB_B_MAPPING__SHIFT 21 +#define NV30_3D_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT 0x00200000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL 0x00400000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE 0x00600000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL 0x00800000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE 0x00a00000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY 0x00c00000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE 0x00e00000 +#define NV30_3D_RC_IN_RGB_A_INPUT__MASK 0x0f000000 +#define NV30_3D_RC_IN_RGB_A_INPUT__SHIFT 24 +#define NV30_3D_RC_IN_RGB_A_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0 0x01000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1 0x02000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_FOG 0x03000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_PRIMARY_COLOR 0x04000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_SECONDARY_COLOR 0x05000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_TEXTURE0 0x08000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_TEXTURE1 0x09000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_TEXTURE2 0x0a000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_TEXTURE3 0x0b000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_SPARE0 0x0c000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_SPARE1 0x0d000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0e000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_E_TIMES_F 0x0f000000 +#define NV30_3D_RC_IN_RGB_A_COMPONENT_USAGE__MASK 0x10000000 +#define NV30_3D_RC_IN_RGB_A_COMPONENT_USAGE__SHIFT 28 +#define NV30_3D_RC_IN_RGB_A_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING__MASK 0xe0000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING__SHIFT 29 +#define NV30_3D_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT 0x20000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL 0x40000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE 0x60000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL 0x80000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE 0xa0000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY 0xc0000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE 0xe0000000 + +#define NV30_3D_RC_CONSTANT_COLOR0(i0) (0x00000908 + 0x20*(i0)) +#define NV30_3D_RC_CONSTANT_COLOR0_B__MASK 0x000000ff +#define NV30_3D_RC_CONSTANT_COLOR0_B__SHIFT 0 +#define NV30_3D_RC_CONSTANT_COLOR0_G__MASK 0x0000ff00 +#define NV30_3D_RC_CONSTANT_COLOR0_G__SHIFT 8 +#define NV30_3D_RC_CONSTANT_COLOR0_R__MASK 0x00ff0000 +#define NV30_3D_RC_CONSTANT_COLOR0_R__SHIFT 16 +#define NV30_3D_RC_CONSTANT_COLOR0_A__MASK 0xff000000 +#define NV30_3D_RC_CONSTANT_COLOR0_A__SHIFT 24 + +#define NV30_3D_RC_CONSTANT_COLOR1(i0) (0x0000090c + 0x20*(i0)) +#define NV30_3D_RC_CONSTANT_COLOR1_B__MASK 0x000000ff +#define NV30_3D_RC_CONSTANT_COLOR1_B__SHIFT 0 +#define NV30_3D_RC_CONSTANT_COLOR1_G__MASK 0x0000ff00 +#define NV30_3D_RC_CONSTANT_COLOR1_G__SHIFT 8 +#define NV30_3D_RC_CONSTANT_COLOR1_R__MASK 0x00ff0000 +#define NV30_3D_RC_CONSTANT_COLOR1_R__SHIFT 16 +#define NV30_3D_RC_CONSTANT_COLOR1_A__MASK 0xff000000 +#define NV30_3D_RC_CONSTANT_COLOR1_A__SHIFT 24 + +#define NV30_3D_RC_OUT_ALPHA(i0) (0x00000910 + 0x20*(i0)) +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT__MASK 0x0000000f +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT__SHIFT 0 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_ZERO 0x00000000 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0 0x00000001 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1 0x00000002 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_FOG 0x00000003 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR 0x00000004 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR 0x00000005 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0 0x00000008 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1 0x00000009 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE2 0x0000000a +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE3 0x0000000b +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_SPARE0 0x0000000c +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_SPARE1 0x0000000d +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0000000e +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F 0x0000000f +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT__MASK 0x000000f0 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT__SHIFT 4 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_ZERO 0x00000000 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0 0x00000010 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1 0x00000020 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_FOG 0x00000030 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR 0x00000040 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR 0x00000050 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0 0x00000080 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1 0x00000090 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE2 0x000000a0 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE3 0x000000b0 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_SPARE0 0x000000c0 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_SPARE1 0x000000d0 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000000e0 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F 0x000000f0 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT__MASK 0x00000f00 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT__SHIFT 8 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_ZERO 0x00000000 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0 0x00000100 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1 0x00000200 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_FOG 0x00000300 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR 0x00000400 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR 0x00000500 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0 0x00000800 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1 0x00000900 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE2 0x00000a00 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE3 0x00000b00 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0 0x00000c00 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1 0x00000d00 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F 0x00000f00 +#define NV30_3D_RC_OUT_ALPHA_CD_DOT_PRODUCT 0x00001000 +#define NV30_3D_RC_OUT_ALPHA_AB_DOT_PRODUCT 0x00002000 +#define NV30_3D_RC_OUT_ALPHA_MUX_SUM 0x00004000 +#define NV30_3D_RC_OUT_ALPHA_BIAS__MASK 0x00008000 +#define NV30_3D_RC_OUT_ALPHA_BIAS__SHIFT 15 +#define NV30_3D_RC_OUT_ALPHA_BIAS_NONE 0x00000000 +#define NV30_3D_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF 0x00008000 +#define NV30_3D_RC_OUT_ALPHA_SCALE__MASK 0x00030000 +#define NV30_3D_RC_OUT_ALPHA_SCALE__SHIFT 16 +#define NV30_3D_RC_OUT_ALPHA_SCALE_NONE 0x00000000 +#define NV30_3D_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO 0x00010000 +#define NV30_3D_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR 0x00020000 +#define NV30_3D_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF 0x00030000 + +#define NV30_3D_RC_OUT_RGB(i0) (0x00000914 + 0x20*(i0)) +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT__MASK 0x0000000f +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT__SHIFT 0 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_ZERO 0x00000000 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0 0x00000001 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1 0x00000002 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_FOG 0x00000003 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR 0x00000004 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR 0x00000005 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE0 0x00000008 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE1 0x00000009 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE2 0x0000000a +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE3 0x0000000b +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_SPARE0 0x0000000c +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_SPARE1 0x0000000d +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0000000e +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F 0x0000000f +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT__MASK 0x000000f0 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT__SHIFT 4 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_ZERO 0x00000000 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0 0x00000010 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1 0x00000020 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_FOG 0x00000030 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR 0x00000040 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR 0x00000050 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE0 0x00000080 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE1 0x00000090 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE2 0x000000a0 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE3 0x000000b0 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_SPARE0 0x000000c0 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_SPARE1 0x000000d0 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000000e0 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F 0x000000f0 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT__MASK 0x00000f00 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT__SHIFT 8 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_ZERO 0x00000000 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0 0x00000100 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1 0x00000200 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_FOG 0x00000300 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR 0x00000400 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR 0x00000500 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0 0x00000800 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1 0x00000900 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE2 0x00000a00 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE3 0x00000b00 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE0 0x00000c00 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE1 0x00000d00 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F 0x00000f00 +#define NV30_3D_RC_OUT_RGB_CD_DOT_PRODUCT 0x00001000 +#define NV30_3D_RC_OUT_RGB_AB_DOT_PRODUCT 0x00002000 +#define NV30_3D_RC_OUT_RGB_MUX_SUM 0x00004000 +#define NV30_3D_RC_OUT_RGB_BIAS__MASK 0x00008000 +#define NV30_3D_RC_OUT_RGB_BIAS__SHIFT 15 +#define NV30_3D_RC_OUT_RGB_BIAS_NONE 0x00000000 +#define NV30_3D_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF 0x00008000 +#define NV30_3D_RC_OUT_RGB_SCALE__MASK 0x00030000 +#define NV30_3D_RC_OUT_RGB_SCALE__SHIFT 16 +#define NV30_3D_RC_OUT_RGB_SCALE_NONE 0x00000000 +#define NV30_3D_RC_OUT_RGB_SCALE_SCALE_BY_TWO 0x00010000 +#define NV30_3D_RC_OUT_RGB_SCALE_SCALE_BY_FOUR 0x00020000 +#define NV30_3D_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF 0x00030000 + +#define NV30_3D_VIEWPORT_HORIZ 0x00000a00 +#define NV30_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff +#define NV30_3D_VIEWPORT_HORIZ_X__SHIFT 0 +#define NV30_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000 +#define NV30_3D_VIEWPORT_HORIZ_W__SHIFT 16 + +#define NV30_3D_VIEWPORT_VERT 0x00000a04 +#define NV30_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff +#define NV30_3D_VIEWPORT_VERT_Y__SHIFT 0 +#define NV30_3D_VIEWPORT_VERT_H__MASK 0xffff0000 +#define NV30_3D_VIEWPORT_VERT_H__SHIFT 16 + +#define NV30_3D_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION 0x00000a10 + + +#define NV30_3D_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x00000a10 + +#define NV30_3D_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x00000a14 + +#define NV30_3D_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x00000a18 + +#define NV30_3D_VIEWPORT_TRANSLATE 0x00000a20 + + +#define NV30_3D_VIEWPORT_TRANSLATE_X 0x00000a20 + +#define NV30_3D_VIEWPORT_TRANSLATE_Y 0x00000a24 + +#define NV30_3D_VIEWPORT_TRANSLATE_Z 0x00000a28 + +#define NV30_3D_VIEWPORT_TRANSLATE_W 0x00000a2c + +#define NV30_3D_VIEWPORT_SCALE 0x00000a30 + + +#define NV30_3D_VIEWPORT_SCALE_X 0x00000a30 + +#define NV30_3D_VIEWPORT_SCALE_Y 0x00000a34 + +#define NV30_3D_VIEWPORT_SCALE_Z 0x00000a38 + +#define NV30_3D_VIEWPORT_SCALE_W 0x00000a3c + +#define NV30_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000a60 + +#define NV30_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000a64 + +#define NV30_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000a68 + +#define NV30_3D_DEPTH_FUNC 0x00000a6c +#define NV30_3D_DEPTH_FUNC_NEVER 0x00000200 +#define NV30_3D_DEPTH_FUNC_LESS 0x00000201 +#define NV30_3D_DEPTH_FUNC_EQUAL 0x00000202 +#define NV30_3D_DEPTH_FUNC_LEQUAL 0x00000203 +#define NV30_3D_DEPTH_FUNC_GREATER 0x00000204 +#define NV30_3D_DEPTH_FUNC_NOTEQUAL 0x00000205 +#define NV30_3D_DEPTH_FUNC_GEQUAL 0x00000206 +#define NV30_3D_DEPTH_FUNC_ALWAYS 0x00000207 + +#define NV30_3D_DEPTH_WRITE_ENABLE 0x00000a70 + +#define NV30_3D_DEPTH_TEST_ENABLE 0x00000a74 + +#define NV30_3D_POLYGON_OFFSET_FACTOR 0x00000a78 + +#define NV30_3D_POLYGON_OFFSET_UNITS 0x00000a7c + +#define NV30_3D_VTX_ATTR_3I_XY(i0) (0x00000a80 + 0x8*(i0)) +#define NV30_3D_VTX_ATTR_3I_XY__ESIZE 0x00000008 +#define NV30_3D_VTX_ATTR_3I_XY__LEN 0x00000010 +#define NV30_3D_VTX_ATTR_3I_XY_X__MASK 0x0000ffff +#define NV30_3D_VTX_ATTR_3I_XY_X__SHIFT 0 +#define NV30_3D_VTX_ATTR_3I_XY_Y__MASK 0xffff0000 +#define NV30_3D_VTX_ATTR_3I_XY_Y__SHIFT 16 + +#define NV30_3D_VTX_ATTR_3I_Z(i0) (0x00000a84 + 0x8*(i0)) +#define NV30_3D_VTX_ATTR_3I_Z__ESIZE 0x00000008 +#define NV30_3D_VTX_ATTR_3I_Z__LEN 0x00000010 +#define NV30_3D_VTX_ATTR_3I_Z_Z__MASK 0x0000ffff +#define NV30_3D_VTX_ATTR_3I_Z_Z__SHIFT 0 + +#define NV30_3D_TEX_FILTER_OPTIMIZATION(i0) (0x00000b00 + 0x4*(i0)) +#define NV30_3D_TEX_FILTER_OPTIMIZATION__ESIZE 0x00000004 +#define NV30_3D_TEX_FILTER_OPTIMIZATION__LEN 0x00000004 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR__MASK 0x0000001f +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR__SHIFT 0 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR_OFF 0x00000000 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR_HIGH_QUALITY 0x00000004 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR_QUALITY 0x00000006 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR_PERFORMANCE 0x00000008 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR_HIGH_PERFORMANCE 0x00000018 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_ANISO_SAMPLE__MASK 0x000001c0 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_ANISO_SAMPLE__SHIFT 6 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_ANISO_SAMPLE_OFF 0x00000000 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_ANISO_SAMPLE_HIGH_QUALITY 0x000000c0 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_ANISO_SAMPLE_QUALITY 0x000001c0 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_ANISO_SAMPLE_PERFORMANCE 0x00000140 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_UNKNOWN__MASK 0x00007c00 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_UNKNOWN__SHIFT 10 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_UNKNOWN_OFF 0x00000000 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_UNKNOWN_PARTIAL 0x00002c00 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_UNKNOWN_FULL 0x00007c00 + +#define NV40_3D_UNK0B40(i0) (0x00000b40 + 0x4*(i0)) +#define NV40_3D_UNK0B40__ESIZE 0x00000004 +#define NV40_3D_UNK0B40__LEN 0x00000008 + +#define NV30_3D_VP_UPLOAD_INST(i0) (0x00000b80 + 0x4*(i0)) +#define NV30_3D_VP_UPLOAD_INST__ESIZE 0x00000004 +#define NV30_3D_VP_UPLOAD_INST__LEN 0x00000004 + + +#define NV30_3D_TEX_CLIP_PLANE(i0) (0x00000e00 + 0x10*(i0)) + + +#define NV30_3D_TEX_CLIP_PLANE_X(i0) (0x00000e00 + 0x10*(i0)) + +#define NV30_3D_TEX_CLIP_PLANE_Y(i0) (0x00000e04 + 0x10*(i0)) + +#define NV30_3D_TEX_CLIP_PLANE_Z(i0) (0x00000e08 + 0x10*(i0)) + +#define NV30_3D_TEX_CLIP_PLANE_W(i0) (0x00000e0c + 0x10*(i0)) + +#define NV30_3D_LIGHT 0x00001000 + + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT(i0) (0x00001000 + 0x40*(i0)) + + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(i0) (0x00001000 + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(i0) (0x00001004 + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(i0) (0x00001008 + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE(i0) (0x0000100c + 0x40*(i0)) + + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(i0) (0x0000100c + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(i0) (0x00001010 + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(i0) (0x00001014 + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR(i0) (0x00001018 + 0x40*(i0)) + + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(i0) (0x00001018 + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(i0) (0x0000101c + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(i0) (0x00001020 + 0x40*(i0)) + +#define NV30_3D_LIGHT_UNK24(i0) (0x00001024 + 0x40*(i0)) + +#define NV30_3D_LIGHT_HALF_VECTOR(i0) (0x00001028 + 0x40*(i0)) + + +#define NV30_3D_LIGHT_HALF_VECTOR_X(i0) (0x00001028 + 0x40*(i0)) + +#define NV30_3D_LIGHT_HALF_VECTOR_Y(i0) (0x0000102c + 0x40*(i0)) + +#define NV30_3D_LIGHT_HALF_VECTOR_Z(i0) (0x00001030 + 0x40*(i0)) + +#define NV30_3D_LIGHT_DIRECTION(i0) (0x00001034 + 0x40*(i0)) + + +#define NV30_3D_LIGHT_DIRECTION_X(i0) (0x00001034 + 0x40*(i0)) + +#define NV30_3D_LIGHT_DIRECTION_Y(i0) (0x00001038 + 0x40*(i0)) + +#define NV30_3D_LIGHT_DIRECTION_Z(i0) (0x0000103c + 0x40*(i0)) + + +#define NV30_3D_LIGHT_SPOT_CUTOFF_A(i0) (0x00001200 + 0x40*(i0)) + +#define NV30_3D_LIGHT_SPOT_CUTOFF_B(i0) (0x00001204 + 0x40*(i0)) + +#define NV30_3D_LIGHT_SPOT_CUTOFF_C(i0) (0x00001208 + 0x40*(i0)) + +#define NV30_3D_LIGHT_SPOT_DIR(i0) (0x0000120c + 0x40*(i0)) + + +#define NV30_3D_LIGHT_SPOT_DIR_X(i0) (0x0000120c + 0x40*(i0)) + +#define NV30_3D_LIGHT_SPOT_DIR_Y(i0) (0x00001210 + 0x40*(i0)) + +#define NV30_3D_LIGHT_SPOT_DIR_Z(i0) (0x00001214 + 0x40*(i0)) + +#define NV30_3D_LIGHT_SPOT_CUTOFF_D(i0) (0x00001218 + 0x40*(i0)) + +#define NV30_3D_LIGHT_POSITION(i0) (0x0000121c + 0x40*(i0)) + + +#define NV30_3D_LIGHT_POSITION_X(i0) (0x0000121c + 0x40*(i0)) + +#define NV30_3D_LIGHT_POSITION_Y(i0) (0x00001220 + 0x40*(i0)) + +#define NV30_3D_LIGHT_POSITION_Z(i0) (0x00001224 + 0x40*(i0)) + +#define NV30_3D_LIGHT_ATTENUATION(i0) (0x00001228 + 0x40*(i0)) + +#define NV30_3D_LIGHT_ATTENUATION_CONSTANT(i0) (0x00001228 + 0x40*(i0)) + +#define NV30_3D_LIGHT_ATTENUATION_LINEAR(i0) (0x0000122c + 0x40*(i0)) + +#define NV30_3D_LIGHT_ATTENUATION_QUADRATIC(i0) (0x00001230 + 0x40*(i0)) + +#define NV30_3D_FRONT_MATERIAL_SHININESS(i0) (0x00001400 + 0x4*(i0)) +#define NV30_3D_FRONT_MATERIAL_SHININESS__ESIZE 0x00000004 +#define NV30_3D_FRONT_MATERIAL_SHININESS__LEN 0x00000006 + +#define NV30_3D_ENABLED_LIGHTS 0x00001420 + +#define NV30_3D_VERTEX_TWO_SIDE_ENABLE 0x0000142c + +#define NV30_3D_FP_REG_CONTROL 0x00001450 +#define NV30_3D_FP_REG_CONTROL_UNK0__MASK 0x0000ffff +#define NV30_3D_FP_REG_CONTROL_UNK0__SHIFT 0 +#define NV30_3D_FP_REG_CONTROL_UNK1__MASK 0xffff0000 +#define NV30_3D_FP_REG_CONTROL_UNK1__SHIFT 16 + +#define NV30_3D_FLATSHADE_FIRST 0x00001454 + +#define NV30_3D_EDGEFLAG 0x0000145c +#define NV30_3D_EDGEFLAG_ENABLE 0x00000001 + +#define NV30_3D_VP_CLIP_PLANES_ENABLE 0x00001478 +#define NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0 0x00000002 +#define NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE1 0x00000020 +#define NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE2 0x00000200 +#define NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE3 0x00002000 +#define NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE4 0x00020000 +#define NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE5 0x00200000 + +#define NV30_3D_POLYGON_STIPPLE_ENABLE 0x0000147c + +#define NV30_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001480 + 0x4*(i0)) +#define NV30_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004 +#define NV30_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020 + +#define NV30_3D_VTX_ATTR_3F(i0) (0x00001500 + 0x10*(i0)) +#define NV30_3D_VTX_ATTR_3F__ESIZE 0x00000010 +#define NV30_3D_VTX_ATTR_3F__LEN 0x00000010 + + +#define NV30_3D_VTX_ATTR_3F_X(i0) (0x00001500 + 0x10*(i0)) + +#define NV30_3D_VTX_ATTR_3F_Y(i0) (0x00001504 + 0x10*(i0)) + +#define NV30_3D_VTX_ATTR_3F_Z(i0) (0x00001508 + 0x10*(i0)) + + +#define NV30_3D_VP_CLIP_PLANE(i0, i1) (0x00001600 + 0x10*(i0) + 0x4*(i1)) +#define NV30_3D_VP_CLIP_PLANE__ESIZE 0x00000004 +#define NV30_3D_VP_CLIP_PLANE__LEN 0x00000004 + +#define NV30_3D_VTXBUF(i0) (0x00001680 + 0x4*(i0)) +#define NV30_3D_VTXBUF__ESIZE 0x00000004 +#define NV30_3D_VTXBUF__LEN 0x00000010 +#define NV30_3D_VTXBUF_OFFSET__MASK 0x0fffffff +#define NV30_3D_VTXBUF_OFFSET__SHIFT 0 +#define NV30_3D_VTXBUF_DMA1 0x80000000 + +#define NV40_3D_VTX_CACHE_INVALIDATE 0x00001714 + +#define NV30_3D_VTXFMT(i0) (0x00001740 + 0x4*(i0)) +#define NV30_3D_VTXFMT__ESIZE 0x00000004 +#define NV30_3D_VTXFMT__LEN 0x00000010 +#define NV30_3D_VTXFMT_TYPE__MASK 0x0000000f +#define NV30_3D_VTXFMT_TYPE__SHIFT 0 +#define NV30_3D_VTXFMT_TYPE_B8G8R8A8_UNORM 0x00000000 +#define NV30_3D_VTXFMT_TYPE_V16_SNORM 0x00000001 +#define NV30_3D_VTXFMT_TYPE_V32_FLOAT 0x00000002 +#define NV30_3D_VTXFMT_TYPE_V16_FLOAT 0x00000003 +#define NV30_3D_VTXFMT_TYPE_U8_UNORM 0x00000004 +#define NV30_3D_VTXFMT_TYPE_V16_SSCALED 0x00000005 +#define NV30_3D_VTXFMT_TYPE_U8_USCALED 0x00000007 +#define NV30_3D_VTXFMT_SIZE__MASK 0x000000f0 +#define NV30_3D_VTXFMT_SIZE__SHIFT 4 +#define NV30_3D_VTXFMT_STRIDE__MASK 0x0000ff00 +#define NV30_3D_VTXFMT_STRIDE__SHIFT 8 + +#define NV30_3D_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION 0x000017a0 + + +#define NV30_3D_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000017a0 + +#define NV30_3D_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000017a4 + +#define NV30_3D_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000017a8 + +#define NV30_3D_COLOR_MATERIAL_BACK 0x000017b0 + + +#define NV30_3D_COLOR_MATERIAL_BACK_R 0x000017b0 + +#define NV30_3D_COLOR_MATERIAL_BACK_G 0x000017b4 + +#define NV30_3D_COLOR_MATERIAL_BACK_B 0x000017b8 + +#define NV30_3D_COLOR_MATERIAL_BACK_A 0x000017bc + +#define NV30_3D_QUERY_RESET 0x000017c8 + +#define NV30_3D_QUERY_ENABLE 0x000017cc + +#define NV30_3D_QUERY_GET 0x00001800 +#define NV30_3D_QUERY_GET_OFFSET__MASK 0x00ffffff +#define NV30_3D_QUERY_GET_OFFSET__SHIFT 0 +#define NV30_3D_QUERY_GET_UNK24__MASK 0xff000000 +#define NV30_3D_QUERY_GET_UNK24__SHIFT 24 + +#define NV30_3D_VERTEX_BEGIN_END 0x00001808 +#define NV30_3D_VERTEX_BEGIN_END_STOP 0x00000000 +#define NV30_3D_VERTEX_BEGIN_END_POINTS 0x00000001 +#define NV30_3D_VERTEX_BEGIN_END_LINES 0x00000002 +#define NV30_3D_VERTEX_BEGIN_END_LINE_LOOP 0x00000003 +#define NV30_3D_VERTEX_BEGIN_END_LINE_STRIP 0x00000004 +#define NV30_3D_VERTEX_BEGIN_END_TRIANGLES 0x00000005 +#define NV30_3D_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006 +#define NV30_3D_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007 +#define NV30_3D_VERTEX_BEGIN_END_QUADS 0x00000008 +#define NV30_3D_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009 +#define NV30_3D_VERTEX_BEGIN_END_POLYGON 0x0000000a + +#define NV30_3D_VB_ELEMENT_U16 0x0000180c +#define NV30_3D_VB_ELEMENT_U16_0__MASK 0x0000ffff +#define NV30_3D_VB_ELEMENT_U16_0__SHIFT 0 +#define NV30_3D_VB_ELEMENT_U16_1__MASK 0xffff0000 +#define NV30_3D_VB_ELEMENT_U16_1__SHIFT 16 + +#define NV30_3D_VB_ELEMENT_U32 0x00001810 + +#define NV30_3D_VB_VERTEX_BATCH 0x00001814 +#define NV30_3D_VB_VERTEX_BATCH_OFFSET__MASK 0x00ffffff +#define NV30_3D_VB_VERTEX_BATCH_OFFSET__SHIFT 0 +#define NV30_3D_VB_VERTEX_BATCH_COUNT__MASK 0xff000000 +#define NV30_3D_VB_VERTEX_BATCH_COUNT__SHIFT 24 + +#define NV30_3D_VERTEX_DATA 0x00001818 + +#define NV30_3D_IDXBUF_OFFSET 0x0000181c + +#define NV30_3D_IDXBUF_FORMAT 0x00001820 +#define NV30_3D_IDXBUF_FORMAT_DMA1 0x00000001 +#define NV30_3D_IDXBUF_FORMAT_TYPE__MASK 0x000000f0 +#define NV30_3D_IDXBUF_FORMAT_TYPE__SHIFT 4 +#define NV30_3D_IDXBUF_FORMAT_TYPE_U32 0x00000000 +#define NV30_3D_IDXBUF_FORMAT_TYPE_U16 0x00000010 + +#define NV30_3D_VB_INDEX_BATCH 0x00001824 +#define NV30_3D_VB_INDEX_BATCH_START__MASK 0x00ffffff +#define NV30_3D_VB_INDEX_BATCH_START__SHIFT 0 +#define NV30_3D_VB_INDEX_BATCH_COUNT__MASK 0xff000000 +#define NV30_3D_VB_INDEX_BATCH_COUNT__SHIFT 24 + +#define NV30_3D_POLYGON_MODE_FRONT 0x00001828 +#define NV30_3D_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV30_3D_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV30_3D_POLYGON_MODE_FRONT_FILL 0x00001b02 + +#define NV30_3D_POLYGON_MODE_BACK 0x0000182c +#define NV30_3D_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV30_3D_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV30_3D_POLYGON_MODE_BACK_FILL 0x00001b02 + +#define NV30_3D_CULL_FACE 0x00001830 +#define NV30_3D_CULL_FACE_FRONT 0x00000404 +#define NV30_3D_CULL_FACE_BACK 0x00000405 +#define NV30_3D_CULL_FACE_FRONT_AND_BACK 0x00000408 + +#define NV30_3D_FRONT_FACE 0x00001834 +#define NV30_3D_FRONT_FACE_CW 0x00000900 +#define NV30_3D_FRONT_FACE_CCW 0x00000901 + +#define NV30_3D_POLYGON_SMOOTH_ENABLE 0x00001838 + +#define NV30_3D_CULL_FACE_ENABLE 0x0000183c + +#define NV30_3D_TEX_PALETTE_OFFSET(i0) (0x00001840 + 0x4*(i0)) +#define NV30_3D_TEX_PALETTE_OFFSET__ESIZE 0x00000004 +#define NV30_3D_TEX_PALETTE_OFFSET__LEN 0x00000008 + +#define NV40_3D_TEX_SIZE1(i0) (0x00001840 + 0x4*(i0)) +#define NV40_3D_TEX_SIZE1__ESIZE 0x00000004 +#define NV40_3D_TEX_SIZE1__LEN 0x00000008 +#define NV40_3D_TEX_SIZE1_DEPTH__MASK 0xfff00000 +#define NV40_3D_TEX_SIZE1_DEPTH__SHIFT 20 +#define NV40_3D_TEX_SIZE1_PITCH__MASK 0x0000ffff +#define NV40_3D_TEX_SIZE1_PITCH__SHIFT 0 + +#define NV30_3D_VTX_ATTR_2F(i0) (0x00001880 + 0x8*(i0)) +#define NV30_3D_VTX_ATTR_2F__ESIZE 0x00000008 +#define NV30_3D_VTX_ATTR_2F__LEN 0x00000010 + + +#define NV30_3D_VTX_ATTR_2F_X(i0) (0x00001880 + 0x8*(i0)) + +#define NV30_3D_VTX_ATTR_2F_Y(i0) (0x00001884 + 0x8*(i0)) + +#define NV30_3D_VTX_ATTR_2I(i0) (0x00001900 + 0x4*(i0)) +#define NV30_3D_VTX_ATTR_2I__ESIZE 0x00000004 +#define NV30_3D_VTX_ATTR_2I__LEN 0x00000010 +#define NV30_3D_VTX_ATTR_2I_X__MASK 0x0000ffff +#define NV30_3D_VTX_ATTR_2I_X__SHIFT 0 +#define NV30_3D_VTX_ATTR_2I_Y__MASK 0xffff0000 +#define NV30_3D_VTX_ATTR_2I_Y__SHIFT 16 + +#define NV30_3D_VTX_ATTR_4UB(i0) (0x00001940 + 0x4*(i0)) +#define NV30_3D_VTX_ATTR_4UB__ESIZE 0x00000004 +#define NV30_3D_VTX_ATTR_4UB__LEN 0x00000010 +#define NV30_3D_VTX_ATTR_4UB_X__MASK 0x000000ff +#define NV30_3D_VTX_ATTR_4UB_X__SHIFT 0 +#define NV30_3D_VTX_ATTR_4UB_Y__MASK 0x0000ff00 +#define NV30_3D_VTX_ATTR_4UB_Y__SHIFT 8 +#define NV30_3D_VTX_ATTR_4UB_Z__MASK 0x00ff0000 +#define NV30_3D_VTX_ATTR_4UB_Z__SHIFT 16 +#define NV30_3D_VTX_ATTR_4UB_W__MASK 0xff000000 +#define NV30_3D_VTX_ATTR_4UB_W__SHIFT 24 + +#define NV30_3D_VTX_ATTR_4I(i0) (0x00001980 + 0x8*(i0)) +#define NV30_3D_VTX_ATTR_4I__ESIZE 0x00000008 +#define NV30_3D_VTX_ATTR_4I__LEN 0x00000010 + +#define NV30_3D_VTX_ATTR_4I_XY(i0) (0x00001980 + 0x8*(i0)) +#define NV30_3D_VTX_ATTR_4I_XY_X__MASK 0x0000ffff +#define NV30_3D_VTX_ATTR_4I_XY_X__SHIFT 0 +#define NV30_3D_VTX_ATTR_4I_XY_Y__MASK 0xffff0000 +#define NV30_3D_VTX_ATTR_4I_XY_Y__SHIFT 16 + +#define NV30_3D_VTX_ATTR_4I_ZW(i0) (0x00001984 + 0x8*(i0)) +#define NV30_3D_VTX_ATTR_4I_ZW_Z__MASK 0x0000ffff +#define NV30_3D_VTX_ATTR_4I_ZW_Z__SHIFT 0 +#define NV30_3D_VTX_ATTR_4I_ZW_W__MASK 0xffff0000 +#define NV30_3D_VTX_ATTR_4I_ZW_W__SHIFT 16 + +#define NV30_3D_TEX_OFFSET(i0) (0x00001a00 + 0x20*(i0)) +#define NV30_3D_TEX_OFFSET__ESIZE 0x00000020 +#define NV30_3D_TEX_OFFSET__LEN 0x00000008 + +#define NV30_3D_TEX_FORMAT(i0) (0x00001a04 + 0x20*(i0)) +#define NV30_3D_TEX_FORMAT__ESIZE 0x00000020 +#define NV30_3D_TEX_FORMAT__LEN 0x00000008 +#define NV30_3D_TEX_FORMAT_DMA0 0x00000001 +#define NV30_3D_TEX_FORMAT_DMA1 0x00000002 +#define NV30_3D_TEX_FORMAT_CUBIC 0x00000004 +#define NV30_3D_TEX_FORMAT_NO_BORDER 0x00000008 +#define NV30_3D_TEX_FORMAT_DIMS__MASK 0x000000f0 +#define NV30_3D_TEX_FORMAT_DIMS__SHIFT 4 +#define NV30_3D_TEX_FORMAT_DIMS_1D 0x00000010 +#define NV30_3D_TEX_FORMAT_DIMS_2D 0x00000020 +#define NV30_3D_TEX_FORMAT_DIMS_3D 0x00000030 +#define NV30_3D_TEX_FORMAT_FORMAT__MASK 0x0000ff00 +#define NV30_3D_TEX_FORMAT_FORMAT__SHIFT 8 +#define NV30_3D_TEX_FORMAT_FORMAT_L8 0x00000000 +#define NV30_3D_TEX_FORMAT_FORMAT_I8 0x00000100 +#define NV30_3D_TEX_FORMAT_FORMAT_A1R5G5B5 0x00000200 +#define NV30_3D_TEX_FORMAT_FORMAT_A4R4G4B4 0x00000400 +#define NV30_3D_TEX_FORMAT_FORMAT_R5G6B5 0x00000500 +#define NV30_3D_TEX_FORMAT_FORMAT_A8R8G8B8 0x00000600 +#define NV30_3D_TEX_FORMAT_FORMAT_X8R8G8B8 0x00000700 +#define NV30_3D_TEX_FORMAT_FORMAT_INDEX8 0x00000b00 +#define NV30_3D_TEX_FORMAT_FORMAT_DXT1 0x00000c00 +#define NV30_3D_TEX_FORMAT_FORMAT_DXT3 0x00000e00 +#define NV30_3D_TEX_FORMAT_FORMAT_DXT5 0x00000f00 +#define NV30_3D_TEX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00001000 +#define NV30_3D_TEX_FORMAT_FORMAT_R5G6B5_RECT 0x00001100 +#define NV30_3D_TEX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00001200 +#define NV30_3D_TEX_FORMAT_FORMAT_L8_RECT 0x00001300 +#define NV30_3D_TEX_FORMAT_FORMAT_DSDT8_RECT 0x00001700 +#define NV30_3D_TEX_FORMAT_FORMAT_A8L8 0x00001a00 +#define NV30_3D_TEX_FORMAT_FORMAT_I8_RECT 0x00001b00 +#define NV30_3D_TEX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00001d00 +#define NV30_3D_TEX_FORMAT_FORMAT_R8G8B8_RECT 0x00001e00 +#define NV30_3D_TEX_FORMAT_FORMAT_A8L8_RECT 0x00002000 +#define NV30_3D_TEX_FORMAT_FORMAT_Z24 0x00002a00 +#define NV30_3D_TEX_FORMAT_FORMAT_Z24_RECT 0x00002b00 +#define NV30_3D_TEX_FORMAT_FORMAT_Z16 0x00002c00 +#define NV30_3D_TEX_FORMAT_FORMAT_Z16_RECT 0x00002d00 +#define NV30_3D_TEX_FORMAT_FORMAT_DSDT8 0x00002800 +#define NV30_3D_TEX_FORMAT_FORMAT_HILO16 0x00003300 +#define NV30_3D_TEX_FORMAT_FORMAT_HILO16_RECT 0x00003600 +#define NV30_3D_TEX_FORMAT_FORMAT_HILO8 0x00004400 +#define NV30_3D_TEX_FORMAT_FORMAT_SIGNED_HILO8 0x00004500 +#define NV30_3D_TEX_FORMAT_FORMAT_HILO8_RECT 0x00004600 +#define NV30_3D_TEX_FORMAT_FORMAT_SIGNED_HILO8_RECT 0x00004700 +#define NV30_3D_TEX_FORMAT_FORMAT_A16 0x00003200 +#define NV30_3D_TEX_FORMAT_FORMAT_A16_RECT 0x00003500 +#define NV30_3D_TEX_FORMAT_FORMAT_UNK3F 0x00003f00 +#define NV30_3D_TEX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00004a00 +#define NV30_3D_TEX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00004b00 +#define NV30_3D_TEX_FORMAT_FORMAT_FLOAT_R32_NV 0x00004c00 +#define NV40_3D_TEX_FORMAT_FORMAT__MASK 0x00001f00 +#define NV40_3D_TEX_FORMAT_FORMAT__SHIFT 8 +#define NV40_3D_TEX_FORMAT_FORMAT_L8 0x00000100 +#define NV40_3D_TEX_FORMAT_FORMAT_A1R5G5B5 0x00000200 +#define NV40_3D_TEX_FORMAT_FORMAT_A4R4G4B4 0x00000300 +#define NV40_3D_TEX_FORMAT_FORMAT_R5G6B5 0x00000400 +#define NV40_3D_TEX_FORMAT_FORMAT_A8R8G8B8 0x00000500 +#define NV40_3D_TEX_FORMAT_FORMAT_DXT1 0x00000600 +#define NV40_3D_TEX_FORMAT_FORMAT_DXT3 0x00000700 +#define NV40_3D_TEX_FORMAT_FORMAT_DXT5 0x00000800 +#define NV40_3D_TEX_FORMAT_FORMAT_A8L8 0x00000b00 +#define NV40_3D_TEX_FORMAT_FORMAT_Z24 0x00001000 +#define NV40_3D_TEX_FORMAT_FORMAT_Z16 0x00001200 +#define NV40_3D_TEX_FORMAT_FORMAT_A16 0x00001400 +#define NV40_3D_TEX_FORMAT_FORMAT_A16L16 0x00001500 +#define NV40_3D_TEX_FORMAT_FORMAT_HILO8 0x00001800 +#define NV40_3D_TEX_FORMAT_FORMAT_RGBA16F 0x00001a00 +#define NV40_3D_TEX_FORMAT_FORMAT_RGBA32F 0x00001b00 +#define NV40_3D_TEX_FORMAT_LINEAR 0x00002000 +#define NV40_3D_TEX_FORMAT_RECT 0x00004000 +#define NV40_3D_TEX_FORMAT_MIPMAP_COUNT__MASK 0x000f0000 +#define NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT 16 +#define NV30_3D_TEX_FORMAT_MIPMAP 0x00080000 +#define NV30_3D_TEX_FORMAT_BASE_SIZE_U__MASK 0x00f00000 +#define NV30_3D_TEX_FORMAT_BASE_SIZE_U__SHIFT 20 +#define NV30_3D_TEX_FORMAT_BASE_SIZE_V__MASK 0x0f000000 +#define NV30_3D_TEX_FORMAT_BASE_SIZE_V__SHIFT 24 +#define NV30_3D_TEX_FORMAT_BASE_SIZE_W__MASK 0xf0000000 +#define NV30_3D_TEX_FORMAT_BASE_SIZE_W__SHIFT 28 + +#define NV30_3D_TEX_WRAP(i0) (0x00001a08 + 0x20*(i0)) +#define NV30_3D_TEX_WRAP__ESIZE 0x00000020 +#define NV30_3D_TEX_WRAP__LEN 0x00000008 +#define NV30_3D_TEX_WRAP_S__MASK 0x000000ff +#define NV30_3D_TEX_WRAP_S__SHIFT 0 +#define NV30_3D_TEX_WRAP_S_REPEAT 0x00000001 +#define NV30_3D_TEX_WRAP_S_MIRRORED_REPEAT 0x00000002 +#define NV30_3D_TEX_WRAP_S_CLAMP_TO_EDGE 0x00000003 +#define NV30_3D_TEX_WRAP_S_CLAMP_TO_BORDER 0x00000004 +#define NV30_3D_TEX_WRAP_S_CLAMP 0x00000005 +#define NV40_3D_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE 0x00000006 +#define NV40_3D_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER 0x00000007 +#define NV40_3D_TEX_WRAP_S_MIRROR_CLAMP 0x00000008 +#define NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION__MASK 0x00000070 +#define NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION__SHIFT 4 +#define NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION_OFF 0x00000000 +#define NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION_QUALITY 0x00000020 +#define NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION_PERFORMANCE 0x00000030 +#define NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION_HIGH_PERFORMANCE 0x00000070 +#define NV30_3D_TEX_WRAP_T__MASK 0x00000f00 +#define NV30_3D_TEX_WRAP_T__SHIFT 8 +#define NV30_3D_TEX_WRAP_T_REPEAT 0x00000100 +#define NV30_3D_TEX_WRAP_T_MIRRORED_REPEAT 0x00000200 +#define NV30_3D_TEX_WRAP_T_CLAMP_TO_EDGE 0x00000300 +#define NV30_3D_TEX_WRAP_T_CLAMP_TO_BORDER 0x00000400 +#define NV30_3D_TEX_WRAP_T_CLAMP 0x00000500 +#define NV40_3D_TEX_WRAP_T_MIRROR_CLAMP_TO_EDGE 0x00000600 +#define NV40_3D_TEX_WRAP_T_MIRROR_CLAMP_TO_BORDER 0x00000700 +#define NV40_3D_TEX_WRAP_T_MIRROR_CLAMP 0x00000800 +#define NV30_3D_TEX_WRAP_EXPAND_NORMAL 0x0000f000 +#define NV30_3D_TEX_WRAP_R__MASK 0x000f0000 +#define NV30_3D_TEX_WRAP_R__SHIFT 16 +#define NV30_3D_TEX_WRAP_R_REPEAT 0x00010000 +#define NV30_3D_TEX_WRAP_R_MIRRORED_REPEAT 0x00020000 +#define NV30_3D_TEX_WRAP_R_CLAMP_TO_EDGE 0x00030000 +#define NV30_3D_TEX_WRAP_R_CLAMP_TO_BORDER 0x00040000 +#define NV30_3D_TEX_WRAP_R_CLAMP 0x00050000 +#define NV40_3D_TEX_WRAP_R_MIRROR_CLAMP_TO_EDGE 0x00060000 +#define NV40_3D_TEX_WRAP_R_MIRROR_CLAMP_TO_BORDER 0x00070000 +#define NV40_3D_TEX_WRAP_R_MIRROR_CLAMP 0x00080000 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER__MASK 0x00f00000 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER__SHIFT 20 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER_NONE 0x00000000 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER_RED 0x00100000 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER_GREEN 0x00200000 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER_BLUE 0x00400000 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER_ALL 0x00f00000 +#define NV30_3D_TEX_WRAP_RCOMP__MASK 0xf0000000 +#define NV30_3D_TEX_WRAP_RCOMP__SHIFT 28 +#define NV30_3D_TEX_WRAP_RCOMP_NEVER 0x00000000 +#define NV30_3D_TEX_WRAP_RCOMP_GREATER 0x10000000 +#define NV30_3D_TEX_WRAP_RCOMP_EQUAL 0x20000000 +#define NV30_3D_TEX_WRAP_RCOMP_GEQUAL 0x30000000 +#define NV30_3D_TEX_WRAP_RCOMP_LESS 0x40000000 +#define NV30_3D_TEX_WRAP_RCOMP_NOTEQUAL 0x50000000 +#define NV30_3D_TEX_WRAP_RCOMP_LEQUAL 0x60000000 +#define NV30_3D_TEX_WRAP_RCOMP_ALWAYS 0x70000000 + +#define NV30_3D_TEX_ENABLE(i0) (0x00001a0c + 0x20*(i0)) +#define NV30_3D_TEX_ENABLE__ESIZE 0x00000020 +#define NV30_3D_TEX_ENABLE__LEN 0x00000008 +#define NV30_3D_TEX_ENABLE_ANISO__MASK 0x00000030 +#define NV30_3D_TEX_ENABLE_ANISO__SHIFT 4 +#define NV30_3D_TEX_ENABLE_ANISO_NONE 0x00000000 +#define NV30_3D_TEX_ENABLE_ANISO_2X 0x00000010 +#define NV30_3D_TEX_ENABLE_ANISO_4X 0x00000020 +#define NV30_3D_TEX_ENABLE_ANISO_8X 0x00000030 +#define NV40_3D_TEX_ENABLE_ANISO__MASK 0x000000f0 +#define NV40_3D_TEX_ENABLE_ANISO__SHIFT 4 +#define NV40_3D_TEX_ENABLE_ANISO_NONE 0x00000000 +#define NV40_3D_TEX_ENABLE_ANISO_2X 0x00000010 +#define NV40_3D_TEX_ENABLE_ANISO_4X 0x00000020 +#define NV40_3D_TEX_ENABLE_ANISO_6X 0x00000030 +#define NV40_3D_TEX_ENABLE_ANISO_8X 0x00000040 +#define NV40_3D_TEX_ENABLE_ANISO_10X 0x00000050 +#define NV40_3D_TEX_ENABLE_ANISO_12X 0x00000060 +#define NV40_3D_TEX_ENABLE_ANISO_16X 0x00000070 +#define NV30_3D_TEX_ENABLE_MIPMAP_MAX_LOD__MASK 0x0003c000 +#define NV30_3D_TEX_ENABLE_MIPMAP_MAX_LOD__SHIFT 14 +#define NV40_3D_TEX_ENABLE_MIPMAP_MAX_LOD__MASK 0x00038000 +#define NV40_3D_TEX_ENABLE_MIPMAP_MAX_LOD__SHIFT 15 +#define NV30_3D_TEX_ENABLE_MIPMAP_MIN_LOD__MASK 0x3c000000 +#define NV30_3D_TEX_ENABLE_MIPMAP_MIN_LOD__SHIFT 26 +#define NV40_3D_TEX_ENABLE_MIPMAP_MIN_LOD__MASK 0x38000000 +#define NV40_3D_TEX_ENABLE_MIPMAP_MIN_LOD__SHIFT 27 +#define NV30_3D_TEX_ENABLE_ENABLE 0x40000000 +#define NV40_3D_TEX_ENABLE_ENABLE 0x80000000 + +#define NV30_3D_TEX_SWIZZLE(i0) (0x00001a10 + 0x20*(i0)) +#define NV30_3D_TEX_SWIZZLE__ESIZE 0x00000020 +#define NV30_3D_TEX_SWIZZLE__LEN 0x00000008 +#define NV30_3D_TEX_SWIZZLE_S1_W__MASK 0x00000003 +#define NV30_3D_TEX_SWIZZLE_S1_W__SHIFT 0 +#define NV30_3D_TEX_SWIZZLE_S1_W_W 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S1_W_Z 0x00000001 +#define NV30_3D_TEX_SWIZZLE_S1_W_Y 0x00000002 +#define NV30_3D_TEX_SWIZZLE_S1_W_X 0x00000003 +#define NV30_3D_TEX_SWIZZLE_S1_Z__MASK 0x0000000c +#define NV30_3D_TEX_SWIZZLE_S1_Z__SHIFT 2 +#define NV30_3D_TEX_SWIZZLE_S1_Z_W 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S1_Z_Z 0x00000004 +#define NV30_3D_TEX_SWIZZLE_S1_Z_Y 0x00000008 +#define NV30_3D_TEX_SWIZZLE_S1_Z_X 0x0000000c +#define NV30_3D_TEX_SWIZZLE_S1_Y__MASK 0x00000030 +#define NV30_3D_TEX_SWIZZLE_S1_Y__SHIFT 4 +#define NV30_3D_TEX_SWIZZLE_S1_Y_W 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S1_Y_Z 0x00000010 +#define NV30_3D_TEX_SWIZZLE_S1_Y_Y 0x00000020 +#define NV30_3D_TEX_SWIZZLE_S1_Y_X 0x00000030 +#define NV30_3D_TEX_SWIZZLE_S1_X__MASK 0x000000c0 +#define NV30_3D_TEX_SWIZZLE_S1_X__SHIFT 6 +#define NV30_3D_TEX_SWIZZLE_S1_X_W 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S1_X_Z 0x00000040 +#define NV30_3D_TEX_SWIZZLE_S1_X_Y 0x00000080 +#define NV30_3D_TEX_SWIZZLE_S1_X_X 0x000000c0 +#define NV30_3D_TEX_SWIZZLE_S0_W__MASK 0x00000300 +#define NV30_3D_TEX_SWIZZLE_S0_W__SHIFT 8 +#define NV30_3D_TEX_SWIZZLE_S0_W_ZERO 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S0_W_ONE 0x00000100 +#define NV30_3D_TEX_SWIZZLE_S0_W_S1 0x00000200 +#define NV30_3D_TEX_SWIZZLE_S0_Z__MASK 0x00000c00 +#define NV30_3D_TEX_SWIZZLE_S0_Z__SHIFT 10 +#define NV30_3D_TEX_SWIZZLE_S0_Z_ZERO 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S0_Z_ONE 0x00000400 +#define NV30_3D_TEX_SWIZZLE_S0_Z_S1 0x00000800 +#define NV30_3D_TEX_SWIZZLE_S0_Y__MASK 0x00003000 +#define NV30_3D_TEX_SWIZZLE_S0_Y__SHIFT 12 +#define NV30_3D_TEX_SWIZZLE_S0_Y_ZERO 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S0_Y_ONE 0x00001000 +#define NV30_3D_TEX_SWIZZLE_S0_Y_S1 0x00002000 +#define NV30_3D_TEX_SWIZZLE_S0_X__MASK 0x0000c000 +#define NV30_3D_TEX_SWIZZLE_S0_X__SHIFT 14 +#define NV30_3D_TEX_SWIZZLE_S0_X_ZERO 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S0_X_ONE 0x00004000 +#define NV30_3D_TEX_SWIZZLE_S0_X_S1 0x00008000 +#define NV30_3D_TEX_SWIZZLE_RECT_PITCH__MASK 0xffff0000 +#define NV30_3D_TEX_SWIZZLE_RECT_PITCH__SHIFT 16 + +#define NV30_3D_TEX_FILTER(i0) (0x00001a14 + 0x20*(i0)) +#define NV30_3D_TEX_FILTER__ESIZE 0x00000020 +#define NV30_3D_TEX_FILTER__LEN 0x00000008 +#define NV30_3D_TEX_FILTER_LOD_BIAS__MASK 0x00000f00 +#define NV30_3D_TEX_FILTER_LOD_BIAS__SHIFT 8 +#define NV30_3D_TEX_FILTER_MIN__MASK 0x000f0000 +#define NV30_3D_TEX_FILTER_MIN__SHIFT 16 +#define NV30_3D_TEX_FILTER_MIN_NEAREST 0x00010000 +#define NV30_3D_TEX_FILTER_MIN_LINEAR 0x00020000 +#define NV30_3D_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST 0x00030000 +#define NV30_3D_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST 0x00040000 +#define NV30_3D_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR 0x00050000 +#define NV30_3D_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR 0x00060000 +#define NV30_3D_TEX_FILTER_MAG__MASK 0x0f000000 +#define NV30_3D_TEX_FILTER_MAG__SHIFT 24 +#define NV30_3D_TEX_FILTER_MAG_NEAREST 0x01000000 +#define NV30_3D_TEX_FILTER_MAG_LINEAR 0x02000000 +#define NV30_3D_TEX_FILTER_SIGNED_BLUE 0x10000000 +#define NV30_3D_TEX_FILTER_SIGNED_GREEN 0x20000000 +#define NV30_3D_TEX_FILTER_SIGNED_RED 0x40000000 +#define NV30_3D_TEX_FILTER_SIGNED_ALPHA 0x80000000 + +#define NV30_3D_TEX_NPOT_SIZE(i0) (0x00001a18 + 0x20*(i0)) +#define NV30_3D_TEX_NPOT_SIZE__ESIZE 0x00000020 +#define NV30_3D_TEX_NPOT_SIZE__LEN 0x00000008 +#define NV30_3D_TEX_NPOT_SIZE_H__MASK 0x0000ffff +#define NV30_3D_TEX_NPOT_SIZE_H__SHIFT 0 +#define NV30_3D_TEX_NPOT_SIZE_W__MASK 0xffff0000 +#define NV30_3D_TEX_NPOT_SIZE_W__SHIFT 16 + +#define NV30_3D_TEX_BORDER_COLOR(i0) (0x00001a1c + 0x20*(i0)) +#define NV30_3D_TEX_BORDER_COLOR__ESIZE 0x00000020 +#define NV30_3D_TEX_BORDER_COLOR__LEN 0x00000008 +#define NV30_3D_TEX_BORDER_COLOR_B__MASK 0x000000ff +#define NV30_3D_TEX_BORDER_COLOR_B__SHIFT 0 +#define NV30_3D_TEX_BORDER_COLOR_G__MASK 0x0000ff00 +#define NV30_3D_TEX_BORDER_COLOR_G__SHIFT 8 +#define NV30_3D_TEX_BORDER_COLOR_R__MASK 0x00ff0000 +#define NV30_3D_TEX_BORDER_COLOR_R__SHIFT 16 +#define NV30_3D_TEX_BORDER_COLOR_A__MASK 0xff000000 +#define NV30_3D_TEX_BORDER_COLOR_A__SHIFT 24 + +#define NV30_3D_VTX_ATTR_4F(i0) (0x00001c00 + 0x10*(i0)) +#define NV30_3D_VTX_ATTR_4F__ESIZE 0x00000010 +#define NV30_3D_VTX_ATTR_4F__LEN 0x00000010 + + +#define NV30_3D_VTX_ATTR_4F_X(i0) (0x00001c00 + 0x10*(i0)) + +#define NV30_3D_VTX_ATTR_4F_Y(i0) (0x00001c04 + 0x10*(i0)) + +#define NV30_3D_VTX_ATTR_4F_Z(i0) (0x00001c08 + 0x10*(i0)) + +#define NV30_3D_VTX_ATTR_4F_W(i0) (0x00001c0c + 0x10*(i0)) + +#define NV30_3D_FP_CONTROL 0x00001d60 +#define NV30_3D_FP_CONTROL_USED_REGS_MINUS1_DIV2__MASK 0x0000000f +#define NV30_3D_FP_CONTROL_USED_REGS_MINUS1_DIV2__SHIFT 0 +#define NV30_3D_FP_CONTROL_USES_KIL 0x00000080 +#define NV40_3D_FP_CONTROL_KIL 0x00000080 +#define NV40_3D_FP_CONTROL_TEMP_COUNT__MASK 0xff000000 +#define NV40_3D_FP_CONTROL_TEMP_COUNT__SHIFT 24 + +#define NV30_3D_FENCE_OFFSET 0x00001d6c + +#define NV30_3D_FENCE_VALUE 0x00001d70 + +#define NV30_3D_DEPTH_CONTROL 0x00001d78 +#define NV30_3D_DEPTH_CONTROL_CLAMP 0x000000f0 + +#define NV30_3D_MULTISAMPLE_CONTROL 0x00001d7c +#define NV30_3D_MULTISAMPLE_CONTROL_ENABLE 0x00000001 +#define NV30_3D_MULTISAMPLE_CONTROL_SAMPLE_ALPHA_TO_COVERAGE 0x00000010 +#define NV30_3D_MULTISAMPLE_CONTROL_SAMPLE_ALPHA_TO_ONE 0x00000100 +#define NV30_3D_MULTISAMPLE_CONTROL_SAMPLE_COVERAGE__MASK 0xffff0000 +#define NV30_3D_MULTISAMPLE_CONTROL_SAMPLE_COVERAGE__SHIFT 16 + +#define NV30_3D_COORD_CONVENTIONS 0x00001d88 +#define NV30_3D_COORD_CONVENTIONS_HEIGHT__MASK 0x00000fff +#define NV30_3D_COORD_CONVENTIONS_HEIGHT__SHIFT 0 +#define NV30_3D_COORD_CONVENTIONS_ORIGIN__MASK 0x00001000 +#define NV30_3D_COORD_CONVENTIONS_ORIGIN__SHIFT 12 +#define NV30_3D_COORD_CONVENTIONS_ORIGIN_NORMAL 0x00000000 +#define NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED 0x00001000 +#define NV30_3D_COORD_CONVENTIONS_CENTER__MASK 0x00010000 +#define NV30_3D_COORD_CONVENTIONS_CENTER__SHIFT 16 +#define NV30_3D_COORD_CONVENTIONS_CENTER_HALF_INTEGER 0x00000000 +#define NV30_3D_COORD_CONVENTIONS_CENTER_INTEGER 0x00010000 + +#define NV30_3D_CLEAR_DEPTH_VALUE 0x00001d8c + +#define NV30_3D_CLEAR_COLOR_VALUE 0x00001d90 +#define NV30_3D_CLEAR_COLOR_VALUE_B__MASK 0x000000ff +#define NV30_3D_CLEAR_COLOR_VALUE_B__SHIFT 0 +#define NV30_3D_CLEAR_COLOR_VALUE_G__MASK 0x0000ff00 +#define NV30_3D_CLEAR_COLOR_VALUE_G__SHIFT 8 +#define NV30_3D_CLEAR_COLOR_VALUE_R__MASK 0x00ff0000 +#define NV30_3D_CLEAR_COLOR_VALUE_R__SHIFT 16 +#define NV30_3D_CLEAR_COLOR_VALUE_A__MASK 0xff000000 +#define NV30_3D_CLEAR_COLOR_VALUE_A__SHIFT 24 + +#define NV30_3D_CLEAR_BUFFERS 0x00001d94 +#define NV30_3D_CLEAR_BUFFERS_DEPTH 0x00000001 +#define NV30_3D_CLEAR_BUFFERS_STENCIL 0x00000002 +#define NV30_3D_CLEAR_BUFFERS_COLOR_R 0x00000010 +#define NV30_3D_CLEAR_BUFFERS_COLOR_G 0x00000020 +#define NV30_3D_CLEAR_BUFFERS_COLOR_B 0x00000040 +#define NV30_3D_CLEAR_BUFFERS_COLOR_A 0x00000080 + +#define NV30_3D_PRIMITIVE_RESTART_ENABLE 0x00001dac + +#define NV30_3D_PRIMITIVE_RESTART_INDEX 0x00001db0 + +#define NV30_3D_LINE_STIPPLE_ENABLE 0x00001db4 + +#define NV30_3D_LINE_STIPPLE_PATTERN 0x00001db8 +#define NV30_3D_LINE_STIPPLE_PATTERN_FACTOR__MASK 0x0000ffff +#define NV30_3D_LINE_STIPPLE_PATTERN_FACTOR__SHIFT 0 +#define NV30_3D_LINE_STIPPLE_PATTERN_PATTERN__MASK 0xffff0000 +#define NV30_3D_LINE_STIPPLE_PATTERN_PATTERN__SHIFT 16 + +#define NV30_3D_BACK_MATERIAL_SHININESS(i0) (0x00001e20 + 0x4*(i0)) +#define NV30_3D_BACK_MATERIAL_SHININESS__ESIZE 0x00000004 +#define NV30_3D_BACK_MATERIAL_SHININESS__LEN 0x00000006 + +#define NV30_3D_VTX_ATTR_1F(i0) (0x00001e40 + 0x4*(i0)) +#define NV30_3D_VTX_ATTR_1F__ESIZE 0x00000004 +#define NV30_3D_VTX_ATTR_1F__LEN 0x00000010 + +#define NV30_3D_ENGINE 0x00001e94 +#define NV30_3D_ENGINE_FP 0x00000001 +#define NV30_3D_ENGINE_VP 0x00000002 +#define NV30_3D_ENGINE_FIXED 0x00000004 + +#define NV30_3D_VP_UPLOAD_FROM_ID 0x00001e9c + +#define NV30_3D_VP_START_FROM_ID 0x00001ea0 + +#define NV30_3D_POINT_PARAMETERS(i0) (0x00001ec0 + 0x4*(i0)) +#define NV30_3D_POINT_PARAMETERS__ESIZE 0x00000004 +#define NV30_3D_POINT_PARAMETERS__LEN 0x00000008 + +#define NV30_3D_POINT_SIZE 0x00001ee0 + +#define NV30_3D_POINT_PARAMETERS_ENABLE 0x00001ee4 + +#define NV30_3D_POINT_SPRITE 0x00001ee8 +#define NV30_3D_POINT_SPRITE_ENABLE 0x00000001 +#define NV30_3D_POINT_SPRITE_R_MODE__MASK 0x00000006 +#define NV30_3D_POINT_SPRITE_R_MODE__SHIFT 1 +#define NV30_3D_POINT_SPRITE_R_MODE_ZERO 0x00000000 +#define NV30_3D_POINT_SPRITE_R_MODE_R 0x00000002 +#define NV30_3D_POINT_SPRITE_R_MODE_S 0x00000004 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_0 0x00000100 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_1 0x00000200 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_2 0x00000400 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_3 0x00000800 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_4 0x00001000 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_5 0x00002000 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_6 0x00004000 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_7 0x00008000 + +#define NV30_3D_VP_UPLOAD_CONST_ID 0x00001efc + +#define NV30_3D_VP_UPLOAD_CONST(i0) (0x00001f00 + 0x10*(i0)) +#define NV30_3D_VP_UPLOAD_CONST__ESIZE 0x00000010 +#define NV30_3D_VP_UPLOAD_CONST__LEN 0x00000004 + + +#define NV30_3D_VP_UPLOAD_CONST_X(i0) (0x00001f00 + 0x10*(i0)) + +#define NV30_3D_VP_UPLOAD_CONST_Y(i0) (0x00001f04 + 0x10*(i0)) + +#define NV30_3D_VP_UPLOAD_CONST_Z(i0) (0x00001f08 + 0x10*(i0)) + +#define NV30_3D_VP_UPLOAD_CONST_W(i0) (0x00001f0c + 0x10*(i0)) + +#define NV30_3D_UNK1F80(i0) (0x00001f80 + 0x4*(i0)) +#define NV30_3D_UNK1F80__ESIZE 0x00000004 +#define NV30_3D_UNK1F80__LEN 0x00000010 + +#define NV40_3D_TEX_CACHE_CTL 0x00001fd8 + +#define NV40_3D_VP_ATTRIB_EN 0x00001ff0 + +#define NV40_3D_VP_RESULT_EN 0x00001ff4 + + +#endif /* _HOME_SKEGGSB_GIT_ENVYTOOLS_RNNDB_NV30_40_3D_XML */ diff --git a/src/gallium/drivers/nouveau/nv30/nv30_clear.c b/src/gallium/drivers/nouveau/nv30/nv30_clear.c new file mode 100644 index 00000000000..5317e892b25 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_clear.c @@ -0,0 +1,226 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "pipe/p_defines.h" +#include "util/u_pack_color.h" + +#include "nouveau_gldefs.h" +#include "nv_object.xml.h" +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_format.h" + +static INLINE uint32_t +pack_rgba(enum pipe_format format, const float *rgba) +{ + union util_color uc; + util_pack_color(rgba, format, &uc); + return uc.ui; +} + +static INLINE uint32_t +pack_zeta(enum pipe_format format, double depth, unsigned stencil) +{ + uint32_t zuint = (uint32_t)(depth * 4294967295.0); + if (format != PIPE_FORMAT_Z16_UNORM) + return (zuint & 0xffffff00) | (stencil & 0xff); + return zuint >> 16; +} + +static void +nv30_clear(struct pipe_context *pipe, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct pipe_framebuffer_state *fb = &nv30->framebuffer; + uint32_t colr = 0, zeta = 0, mode = 0; + + if (!nv30_state_validate(nv30, TRUE)) + return; + + if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { + colr = pack_rgba(fb->cbufs[0]->format, color->f); + mode |= NV30_3D_CLEAR_BUFFERS_COLOR_R | + NV30_3D_CLEAR_BUFFERS_COLOR_G | + NV30_3D_CLEAR_BUFFERS_COLOR_B | + NV30_3D_CLEAR_BUFFERS_COLOR_A; + } + + if (fb->zsbuf) { + zeta = pack_zeta(fb->zsbuf->format, depth, stencil); + if (buffers & PIPE_CLEAR_DEPTH) + mode |= NV30_3D_CLEAR_BUFFERS_DEPTH; + if (buffers & PIPE_CLEAR_STENCIL) + mode |= NV30_3D_CLEAR_BUFFERS_STENCIL; + } + + /*XXX: wtf? fixes clears sometimes not clearing on nv3x... */ + if (nv30->screen->eng3d->oclass < NV40_3D_CLASS) { + BEGIN_NV04(push, NV30_3D(CLEAR_DEPTH_VALUE), 3); + PUSH_DATA (push, zeta); + PUSH_DATA (push, colr); + PUSH_DATA (push, mode); + } + + BEGIN_NV04(push, NV30_3D(CLEAR_DEPTH_VALUE), 3); + PUSH_DATA (push, zeta); + PUSH_DATA (push, colr); + PUSH_DATA (push, mode); + + nv30_state_release(nv30); +} + +static void +nv30_clear_render_target(struct pipe_context *pipe, struct pipe_surface *ps, + const union pipe_color_union *color, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_surface *sf = nv30_surface(ps); + struct nv30_miptree *mt = nv30_miptree(ps->texture); + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nouveau_object *eng3d = nv30->screen->eng3d; + struct nouveau_pushbuf_refn refn; + uint32_t rt_format; + + rt_format = nv30_format(pipe->screen, ps->format)->hw; + if (util_format_get_blocksize(ps->format) == 4) + rt_format |= NV30_3D_RT_FORMAT_ZETA_Z24S8; + else + rt_format |= NV30_3D_RT_FORMAT_ZETA_Z16; + + if (nv30_miptree(ps->texture)->swizzled) { + rt_format |= NV30_3D_RT_FORMAT_TYPE_SWIZZLED; + rt_format |= util_logbase2(sf->width) << 16; + rt_format |= util_logbase2(sf->height) << 24; + } else { + rt_format |= NV30_3D_RT_FORMAT_TYPE_LINEAR; + } + + refn.bo = mt->base.bo; + refn.flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; + if (nouveau_pushbuf_space(push, 16, 1, 0) || + nouveau_pushbuf_refn (push, &refn, 1)) + return; + + BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1); + PUSH_DATA (push, NV30_3D_RT_ENABLE_COLOR0); + BEGIN_NV04(push, NV30_3D(RT_HORIZ), 3); + PUSH_DATA (push, sf->width << 16); + PUSH_DATA (push, sf->height << 16); + PUSH_DATA (push, rt_format); + BEGIN_NV04(push, NV30_3D(COLOR0_PITCH), 2); + if (eng3d->oclass < NV40_3D_CLASS) + PUSH_DATA (push, (sf->pitch << 16) | sf->pitch); + else + PUSH_DATA (push, sf->pitch); + PUSH_RELOC(push, mt->base.bo, sf->offset, NOUVEAU_BO_LOW, 0, 0); + BEGIN_NV04(push, NV30_3D(SCISSOR_HORIZ), 2); + PUSH_DATA (push, (w << 16) | x); + PUSH_DATA (push, (h << 16) | y); + + BEGIN_NV04(push, NV30_3D(CLEAR_COLOR_VALUE), 2); + PUSH_DATA (push, pack_rgba(ps->format, color->f)); + PUSH_DATA (push, NV30_3D_CLEAR_BUFFERS_COLOR_R | + NV30_3D_CLEAR_BUFFERS_COLOR_G | + NV30_3D_CLEAR_BUFFERS_COLOR_B | + NV30_3D_CLEAR_BUFFERS_COLOR_A); + + nv30->dirty |= NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR; +} + +static void +nv30_clear_depth_stencil(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned buffers, double depth, unsigned stencil, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_surface *sf = nv30_surface(ps); + struct nv30_miptree *mt = nv30_miptree(ps->texture); + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nouveau_object *eng3d = nv30->screen->eng3d; + struct nouveau_pushbuf_refn refn; + uint32_t rt_format, mode = 0; + + rt_format = nv30_format(pipe->screen, ps->format)->hw; + if (util_format_get_blocksize(ps->format) == 4) + rt_format |= NV30_3D_RT_FORMAT_COLOR_A8R8G8B8; + else + rt_format |= NV30_3D_RT_FORMAT_COLOR_R5G6B5; + + if (nv30_miptree(ps->texture)->swizzled) { + rt_format |= NV30_3D_RT_FORMAT_TYPE_SWIZZLED; + rt_format |= util_logbase2(sf->width) << 16; + rt_format |= util_logbase2(sf->height) << 24; + } else { + rt_format |= NV30_3D_RT_FORMAT_TYPE_LINEAR; + } + + if (buffers & PIPE_CLEAR_DEPTH) + mode |= NV30_3D_CLEAR_BUFFERS_DEPTH; + if (buffers & PIPE_CLEAR_STENCIL) + mode |= NV30_3D_CLEAR_BUFFERS_STENCIL; + + refn.bo = mt->base.bo; + refn.flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; + if (nouveau_pushbuf_space(push, 32, 1, 0) || + nouveau_pushbuf_refn (push, &refn, 1)) + return; + + BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(RT_HORIZ), 3); + PUSH_DATA (push, sf->width << 16); + PUSH_DATA (push, sf->height << 16); + PUSH_DATA (push, rt_format); + if (eng3d->oclass < NV40_3D_CLASS) { + BEGIN_NV04(push, NV30_3D(COLOR0_PITCH), 1); + PUSH_DATA (push, (sf->pitch << 16) | sf->pitch); + } else { + BEGIN_NV04(push, NV40_3D(ZETA_PITCH), 1); + PUSH_DATA (push, sf->pitch); + } + BEGIN_NV04(push, NV30_3D(ZETA_OFFSET), 1); + PUSH_RELOC(push, mt->base.bo, sf->offset, NOUVEAU_BO_LOW, 0, 0); + BEGIN_NV04(push, NV30_3D(SCISSOR_HORIZ), 2); + PUSH_DATA (push, (w << 16) | x); + PUSH_DATA (push, (h << 16) | y); + + BEGIN_NV04(push, NV30_3D(CLEAR_DEPTH_VALUE), 1); + PUSH_DATA (push, pack_zeta(ps->format, depth, stencil)); + BEGIN_NV04(push, NV30_3D(CLEAR_BUFFERS), 1); + PUSH_DATA (push, mode); + + nv30->dirty |= NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR; +} + +void +nv30_clear_init(struct pipe_context *pipe) +{ + pipe->clear = nv30_clear; + pipe->clear_render_target = nv30_clear_render_target; + pipe->clear_depth_stencil = nv30_clear_depth_stencil; +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.c b/src/gallium/drivers/nouveau/nv30/nv30_context.c new file mode 100644 index 00000000000..2146d2726b4 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_context.c @@ -0,0 +1,263 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "draw/draw_context.h" + +#include "nv_object.xml.h" +#include "nv30/nv30-40_3d.xml.h" + +#include "nouveau_fence.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_transfer.h" +#include "nv30/nv30_state.h" + +static void +nv30_context_kick_notify(struct nouveau_pushbuf *push) +{ + struct nouveau_screen *screen; + struct nv30_context *nv30; + + if (!push->user_priv) + return; + nv30 = container_of(push->user_priv, nv30, bufctx); + screen = &nv30->screen->base; + + nouveau_fence_next(screen); + nouveau_fence_update(screen, TRUE); + + if (push->bufctx) { + struct nouveau_bufref *bref; + LIST_FOR_EACH_ENTRY(bref, &push->bufctx->current, thead) { + struct nv04_resource *res = bref->priv; + if (res && res->mm) { + nouveau_fence_ref(screen->fence.current, &res->fence); + + if (bref->flags & NOUVEAU_BO_RD) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + if (bref->flags & NOUVEAU_BO_WR) { + nouveau_fence_ref(screen->fence.current, &res->fence_wr); + res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING | + NOUVEAU_BUFFER_STATUS_DIRTY; + } + } + } + } +} + +static void +nv30_context_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence, + unsigned flags) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_pushbuf *push = nv30->base.pushbuf; + + if (fence) + nouveau_fence_ref(nv30->screen->base.fence.current, + (struct nouveau_fence **)fence); + + PUSH_KICK(push); + + nouveau_context_update_frame_stats(&nv30->base); +} + +static int +nv30_invalidate_resource_storage(struct nouveau_context *nv, + struct pipe_resource *res, + int ref) +{ + struct nv30_context *nv30 = nv30_context(&nv->pipe); + unsigned i; + + if (res->bind & PIPE_BIND_RENDER_TARGET) { + for (i = 0; i < nv30->framebuffer.nr_cbufs; ++i) { + if (nv30->framebuffer.cbufs[i] && + nv30->framebuffer.cbufs[i]->texture == res) { + nv30->dirty |= NV30_NEW_FRAMEBUFFER; + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_FB); + if (!--ref) + return ref; + } + } + } + if (res->bind & PIPE_BIND_DEPTH_STENCIL) { + if (nv30->framebuffer.zsbuf && + nv30->framebuffer.zsbuf->texture == res) { + nv30->dirty |= NV30_NEW_FRAMEBUFFER; + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_FB); + if (!--ref) + return ref; + } + } + + if (res->bind & PIPE_BIND_VERTEX_BUFFER) { + for (i = 0; i < nv30->num_vtxbufs; ++i) { + if (nv30->vtxbuf[i].buffer == res) { + nv30->dirty |= NV30_NEW_ARRAYS; + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_VTXBUF); + if (!--ref) + return ref; + } + } + } + if (res->bind & PIPE_BIND_INDEX_BUFFER) { + if (nv30->idxbuf.buffer == res) { + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_IDXBUF); + if (!--ref) + return ref; + } + } + + if (res->bind & PIPE_BIND_SAMPLER_VIEW) { + for (i = 0; i < nv30->fragprog.num_textures; ++i) { + if (nv30->fragprog.textures[i] && + nv30->fragprog.textures[i]->texture == res) { + nv30->dirty |= NV30_NEW_FRAGTEX; + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_FRAGTEX(i)); + if (!--ref) + return ref; + } + } + for (i = 0; i < nv30->vertprog.num_textures; ++i) { + if (nv30->vertprog.textures[i] && + nv30->vertprog.textures[i]->texture == res) { + nv30->dirty |= NV30_NEW_VERTTEX; + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_VERTTEX(i)); + if (!--ref) + return ref; + } + } + } + + return ref; +} + +static void +nv30_context_destroy(struct pipe_context *pipe) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + if (nv30->blitter) + util_blitter_destroy(nv30->blitter); + + if (nv30->draw) + draw_destroy(nv30->draw); + + nouveau_bufctx_del(&nv30->bufctx); + + if (nv30->screen->cur_ctx == nv30) + nv30->screen->cur_ctx = NULL; + + nouveau_context_destroy(&nv30->base); +} + +#define FAIL_CONTEXT_INIT(str, err) \ + do { \ + NOUVEAU_ERR(str, err); \ + nv30_context_destroy(pipe); \ + return NULL; \ + } while(0) + +struct pipe_context * +nv30_context_create(struct pipe_screen *pscreen, void *priv) +{ + struct nv30_screen *screen = nv30_screen(pscreen); + struct nv30_context *nv30 = CALLOC_STRUCT(nv30_context); + struct nouveau_pushbuf *push; + struct pipe_context *pipe; + int ret; + + if (!nv30) + return NULL; + + nv30->screen = screen; + nv30->base.screen = &screen->base; + nv30->base.copy_data = nv30_transfer_copy_data; + + pipe = &nv30->base.pipe; + pipe->screen = pscreen; + pipe->priv = priv; + pipe->destroy = nv30_context_destroy; + pipe->flush = nv30_context_flush; + + /*XXX: *cough* per-context client */ + nv30->base.client = screen->base.client; + + /*XXX: *cough* per-context pushbufs */ + push = screen->base.pushbuf; + nv30->base.pushbuf = push; + nv30->base.pushbuf->user_priv = &nv30->bufctx; /* hack at validate time */ + nv30->base.pushbuf->rsvd_kick = 16; /* hack in screen before first space */ + nv30->base.pushbuf->kick_notify = nv30_context_kick_notify; + + nv30->base.invalidate_resource_storage = nv30_invalidate_resource_storage; + + ret = nouveau_bufctx_new(nv30->base.client, 64, &nv30->bufctx); + if (ret) { + nv30_context_destroy(pipe); + return NULL; + } + + /*XXX: make configurable with performance vs quality, these defaults + * match the binary driver's defaults + */ + if (screen->eng3d->oclass < NV40_3D_CLASS) + nv30->config.filter = 0x00000004; + else + nv30->config.filter = 0x00002dc4; + + nv30->config.aniso = NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION_OFF; + + if (debug_get_bool_option("NV30_SWTNL", FALSE)) + nv30->draw_flags |= NV30_NEW_SWTNL; + + /*XXX: nvfx... */ + nv30->is_nv4x = (screen->eng3d->oclass >= NV40_3D_CLASS) ? ~0 : 0; + nv30->use_nv4x = (screen->eng3d->oclass >= NV40_3D_CLASS) ? ~0 : 0; + nv30->render_mode = HW; + + nv30->sample_mask = 0xffff; + nv30_vbo_init(pipe); + nv30_query_init(pipe); + nv30_state_init(pipe); + nv30_resource_init(pipe); + nv30_clear_init(pipe); + nv30_fragprog_init(pipe); + nv30_vertprog_init(pipe); + nv30_texture_init(pipe); + nv30_fragtex_init(pipe); + nv40_verttex_init(pipe); + nv30_draw_init(pipe); + + nv30->blitter = util_blitter_create(pipe); + if (!nv30->blitter) { + nv30_context_destroy(pipe); + return NULL; + } + + nouveau_context_init_vdec(&nv30->base); + + return pipe; +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.h b/src/gallium/drivers/nouveau/nv30/nv30_context.h new file mode 100644 index 00000000000..12ae0c71e4f --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_context.h @@ -0,0 +1,237 @@ +#ifndef __NV30_CONTEXT_H__ +#define __NV30_CONTEXT_H__ + +#include "pipe/p_format.h" +#include "util/u_blitter.h" + +#include "nv30/nv30_screen.h" +#include "nv30/nv30_state.h" + +#include "nouveau_context.h" + +#define BUFCTX_FB 0 +#define BUFCTX_VTXTMP 1 +#define BUFCTX_VTXBUF 2 +#define BUFCTX_IDXBUF 3 +#define BUFCTX_VERTTEX(n) (4 + (n)) +#define BUFCTX_FRAGPROG 8 +#define BUFCTX_FRAGTEX(n) (9 + (n)) + +#define NV30_NEW_BLEND (1 << 0) +#define NV30_NEW_RASTERIZER (1 << 1) +#define NV30_NEW_ZSA (1 << 2) +#define NV30_NEW_VERTPROG (1 << 3) +#define NV30_NEW_VERTCONST (1 << 4) +#define NV30_NEW_FRAGPROG (1 << 5) +#define NV30_NEW_FRAGCONST (1 << 6) +#define NV30_NEW_BLEND_COLOUR (1 << 7) +#define NV30_NEW_STENCIL_REF (1 << 8) +#define NV30_NEW_CLIP (1 << 9) +#define NV30_NEW_SAMPLE_MASK (1 << 10) +#define NV30_NEW_FRAMEBUFFER (1 << 11) +#define NV30_NEW_STIPPLE (1 << 12) +#define NV30_NEW_SCISSOR (1 << 13) +#define NV30_NEW_VIEWPORT (1 << 14) +#define NV30_NEW_ARRAYS (1 << 15) +#define NV30_NEW_VERTEX (1 << 16) +#define NV30_NEW_CONSTBUF (1 << 17) +#define NV30_NEW_FRAGTEX (1 << 18) +#define NV30_NEW_VERTTEX (1 << 19) +#define NV30_NEW_SWTNL (1 << 31) +#define NV30_NEW_ALL 0x000fffff + +struct nv30_context { + struct nouveau_context base; + struct nv30_screen *screen; + struct blitter_context *blitter; + + struct nouveau_bufctx *bufctx; + + struct { + unsigned rt_enable; + unsigned scissor_off; + unsigned num_vtxelts; + boolean prim_restart; + struct nv30_fragprog *fragprog; + } state; + + uint32_t dirty; + + struct draw_context *draw; + uint32_t draw_flags; + uint32_t draw_dirty; + + struct nv30_blend_stateobj *blend; + struct nv30_rasterizer_stateobj *rast; + struct nv30_zsa_stateobj *zsa; + struct nv30_vertex_stateobj *vertex; + + struct { + unsigned filter; + unsigned aniso; + } config; + + struct { + struct nv30_vertprog *program; + + struct pipe_resource *constbuf; + unsigned constbuf_nr; + + struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; + unsigned num_textures; + struct nv30_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + unsigned num_samplers; + unsigned dirty_samplers; + } vertprog; + + struct { + struct nv30_fragprog *program; + + struct pipe_resource *constbuf; + unsigned constbuf_nr; + + struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; + unsigned num_textures; + struct nv30_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + unsigned num_samplers; + unsigned dirty_samplers; + } fragprog; + + struct pipe_framebuffer_state framebuffer; + struct pipe_blend_color blend_colour; + struct pipe_stencil_ref stencil_ref; + struct pipe_poly_stipple stipple; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + struct pipe_clip_state clip; + + unsigned sample_mask; + + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned num_vtxbufs; + struct pipe_index_buffer idxbuf; + uint32_t vbo_fifo; + uint32_t vbo_user; + unsigned vbo_min_index; + unsigned vbo_max_index; + boolean vbo_push_hint; + + struct nouveau_heap *blit_vp; + struct pipe_resource *blit_fp; + + /*XXX: nvfx state, DO NOT USE EVER OUTSIDE "STOLEN" NVFX code */ + unsigned is_nv4x; + unsigned use_nv4x; + bool hw_pointsprite_control; + enum { + HW, + } render_mode; + + struct pipe_query *render_cond_query; + unsigned render_cond_mode; + boolean render_cond_cond; +}; + +static INLINE struct nv30_context * +nv30_context(struct pipe_context *pipe) +{ + return (struct nv30_context *)pipe; +} + +struct pipe_context * +nv30_context_create(struct pipe_screen *pscreen, void *priv); + +void +nv30_vbo_init(struct pipe_context *pipe); + +void +nv30_vbo_validate(struct nv30_context *nv30); + +void +nv30_query_init(struct pipe_context *pipe); + +void +nv30_state_init(struct pipe_context *pipe); + +void +nv30_clear_init(struct pipe_context *pipe); + +void +nv30_vertprog_init(struct pipe_context *pipe); + +void +nv30_vertprog_validate(struct nv30_context *nv30); + +void +nv30_fragprog_init(struct pipe_context *pipe); + +void +nv30_fragprog_validate(struct nv30_context *nv30); + +void +nv30_texture_init(struct pipe_context *pipe); + +void +nv30_texture_validate(struct nv30_context *nv30); + +void +nv30_fragtex_init(struct pipe_context *pipe); + +void +nv30_fragtex_validate(struct nv30_context *nv30); + +void +nv40_verttex_init(struct pipe_context *pipe); + +void +nv40_verttex_validate(struct nv30_context *nv30); + +void +nv30_push_vbo(struct nv30_context *nv30, const struct pipe_draw_info *info); + +void +nv30_draw_init(struct pipe_context *pipe); + +void +nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); + +boolean +nv30_state_validate(struct nv30_context *nv30, boolean hwtnl); + +void +nv30_state_release(struct nv30_context *nv30); + +//XXX: needed to make it build, clean this up! +void +_nvfx_fragprog_translate(struct nv30_context *nvfx, struct nv30_fragprog *fp, + boolean emulate_sprite_flipping); + +boolean +_nvfx_vertprog_translate(struct nv30_context *nv30, struct nv30_vertprog *vp); + +#ifdef NV30_3D_VERTEX_BEGIN_END +#define NV30_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NV30_3D_VERTEX_BEGIN_END_##n + +static INLINE unsigned +nv30_prim_gl(unsigned prim) +{ + switch (prim) { + NV30_PRIM_GL_CASE(POINTS); + NV30_PRIM_GL_CASE(LINES); + NV30_PRIM_GL_CASE(LINE_LOOP); + NV30_PRIM_GL_CASE(LINE_STRIP); + NV30_PRIM_GL_CASE(TRIANGLES); + NV30_PRIM_GL_CASE(TRIANGLE_STRIP); + NV30_PRIM_GL_CASE(TRIANGLE_FAN); + NV30_PRIM_GL_CASE(QUADS); + NV30_PRIM_GL_CASE(QUAD_STRIP); + NV30_PRIM_GL_CASE(POLYGON); + default: + return NV30_3D_VERTEX_BEGIN_END_POINTS; + break; + } +} +#endif + +#endif diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c new file mode 100644 index 00000000000..3575c3d29fa --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c @@ -0,0 +1,506 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pipe.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_private.h" + +#include "nv_object.xml.h" +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_format.h" + +struct nv30_render { + struct vbuf_render base; + struct nv30_context *nv30; + + struct pipe_transfer *transfer; + struct pipe_resource *buffer; + unsigned offset; + unsigned length; + + struct vertex_info vertex_info; + + struct nouveau_heap *vertprog; + uint32_t vtxprog[16][4]; + uint32_t vtxfmt[16]; + uint32_t vtxptr[16]; + uint32_t prim; +}; + +static INLINE struct nv30_render * +nv30_render(struct vbuf_render *render) +{ + return (struct nv30_render *)render; +} + +static const struct vertex_info * +nv30_render_get_vertex_info(struct vbuf_render *render) +{ + return &nv30_render(render)->vertex_info; +} + +static boolean +nv30_render_allocate_vertices(struct vbuf_render *render, + ushort vertex_size, ushort nr_vertices) +{ + struct nv30_render *r = nv30_render(render); + struct nv30_context *nv30 = r->nv30; + + r->length = vertex_size * nr_vertices; + + if (r->offset + r->length >= render->max_vertex_buffer_bytes) { + pipe_resource_reference(&r->buffer, NULL); + r->buffer = pipe_buffer_create(&nv30->screen->base.base, + PIPE_BIND_VERTEX_BUFFER, 0, + render->max_vertex_buffer_bytes); + if (!r->buffer) + return FALSE; + + r->offset = 0; + } + + return TRUE; +} + +static void * +nv30_render_map_vertices(struct vbuf_render *render) +{ + struct nv30_render *r = nv30_render(render); + char *map = pipe_buffer_map(&r->nv30->base.pipe, r->buffer, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_UNSYNCHRONIZED, &r->transfer); + return map + r->offset; +} + +static void +nv30_render_unmap_vertices(struct vbuf_render *render, + ushort min_index, ushort max_index) +{ + struct nv30_render *r = nv30_render(render); + pipe_buffer_unmap(&r->nv30->base.pipe, r->transfer); +} + +static void +nv30_render_set_primitive(struct vbuf_render *render, unsigned prim) +{ + struct nv30_render *r = nv30_render(render); + + r->prim = nv30_prim_gl(prim); +} + +static void +nv30_render_draw_elements(struct vbuf_render *render, + const ushort *indices, uint count) +{ + struct nv30_render *r = nv30_render(render); + struct nv30_context *nv30 = r->nv30; + struct nouveau_pushbuf *push = nv30->screen->base.pushbuf; + unsigned i; + + BEGIN_NV04(push, NV30_3D(VTXBUF(0)), r->vertex_info.num_attribs); + for (i = 0; i < r->vertex_info.num_attribs; i++) { + PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP, + nv04_resource(r->buffer), r->offset + r->vtxptr[i], + NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); + } + + if (!nv30_state_validate(nv30, FALSE)) + return; + + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, r->prim); + + if (count & 1) { + BEGIN_NV04(push, NV30_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, *indices++); + } + + count >>= 1; + while (count) { + unsigned npush = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); + count -= npush; + + BEGIN_NI04(push, NV30_3D(VB_ELEMENT_U16), npush); + while (npush--) { + PUSH_DATA(push, (indices[1] << 16) | indices[0]); + indices += 2; + } + } + + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP); + PUSH_RESET(push, BUFCTX_VTXTMP); +} + +static void +nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr) +{ + struct nv30_render *r = nv30_render(render); + struct nv30_context *nv30 = r->nv30; + struct nouveau_pushbuf *push = nv30->base.pushbuf; + unsigned fn = nr >> 8, pn = nr & 0xff; + unsigned ps = fn + (pn ? 1 : 0); + unsigned i; + + BEGIN_NV04(push, NV30_3D(VTXBUF(0)), r->vertex_info.num_attribs); + for (i = 0; i < r->vertex_info.num_attribs; i++) { + PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP, + nv04_resource(r->buffer), r->offset + r->vtxptr[i], + NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); + } + + if (!nv30_state_validate(nv30, FALSE)) + return; + + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, r->prim); + + BEGIN_NI04(push, NV30_3D(VB_VERTEX_BATCH), ps); + while (fn--) { + PUSH_DATA (push, 0xff000000 | start); + start += 256; + } + + if (pn) + PUSH_DATA (push, ((pn - 1) << 24) | start); + + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP); + PUSH_RESET(push, BUFCTX_VTXTMP); +} + +static void +nv30_render_release_vertices(struct vbuf_render *render) +{ + struct nv30_render *r = nv30_render(render); + r->offset += r->length; +} + +static const struct { + unsigned emit; + unsigned interp; + unsigned vp30; + unsigned vp40; + unsigned ow40; +} vroute [] = { + [TGSI_SEMANTIC_POSITION] = { EMIT_4F, INTERP_PERSPECTIVE, 0, 0, 0x00000000 }, + [TGSI_SEMANTIC_COLOR ] = { EMIT_4F, INTERP_LINEAR , 3, 1, 0x00000001 }, + [TGSI_SEMANTIC_BCOLOR ] = { EMIT_4F, INTERP_LINEAR , 1, 3, 0x00000004 }, + [TGSI_SEMANTIC_FOG ] = { EMIT_4F, INTERP_PERSPECTIVE, 5, 5, 0x00000010 }, + [TGSI_SEMANTIC_PSIZE ] = { EMIT_1F_PSIZE, INTERP_POS , 6, 6, 0x00000020 }, + [TGSI_SEMANTIC_GENERIC ] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 } +}; + +static boolean +vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx) +{ + struct pipe_screen *pscreen = &r->nv30->screen->base.base; + struct nv30_fragprog *fp = r->nv30->fragprog.program; + struct vertex_info *vinfo = &r->vertex_info; + enum pipe_format format; + uint emit = EMIT_OMIT; + uint result = *idx; + + if (sem == TGSI_SEMANTIC_GENERIC && result >= 8) { + for (result = 0; result < 8; result++) { + if (fp->texcoord[result] == *idx) { + emit = vroute[sem].emit; + break; + } + } + } else { + emit = vroute[sem].emit; + } + + if (emit == EMIT_OMIT) + return FALSE; + + draw_emit_vertex_attr(vinfo, emit, vroute[sem].interp, attrib); + format = draw_translate_vinfo_format(emit); + + r->vtxfmt[attrib] = nv30_vtxfmt(pscreen, format)->hw; + r->vtxptr[attrib] = vinfo->size | NV30_3D_VTXBUF_DMA1; + vinfo->size += draw_translate_vinfo_size(emit); + + if (nv30_screen(pscreen)->eng3d->oclass < NV40_3D_CLASS) { + r->vtxprog[attrib][0] = 0x001f38d8; + r->vtxprog[attrib][1] = 0x0080001b | (attrib << 9); + r->vtxprog[attrib][2] = 0x0836106c; + r->vtxprog[attrib][3] = 0x2000f800 | (result + vroute[sem].vp30) << 2; + } else { + r->vtxprog[attrib][0] = 0x401f9c6c; + r->vtxprog[attrib][1] = 0x0040000d | (attrib << 8); + r->vtxprog[attrib][2] = 0x8106c083; + r->vtxprog[attrib][3] = 0x6041ff80 | (result + vroute[sem].vp40) << 2; + } + + *idx = vroute[sem].ow40 << result; + return TRUE; +} + +static boolean +nv30_render_validate(struct nv30_context *nv30) +{ + struct nv30_render *r = nv30_render(nv30->draw->render); + struct nv30_rasterizer_stateobj *rast = nv30->rast; + struct pipe_screen *pscreen = &nv30->screen->base.base; + struct nouveau_pushbuf *push = nv30->screen->base.pushbuf; + struct nouveau_object *eng3d = nv30->screen->eng3d; + struct nv30_vertprog *vp = nv30->vertprog.program; + struct vertex_info *vinfo = &r->vertex_info; + unsigned vp_attribs = 0; + unsigned vp_results = 0; + unsigned attrib = 0; + unsigned pntc; + int i; + + if (!r->vertprog) { + struct nouveau_heap *heap = nv30_screen(pscreen)->vp_exec_heap; + if (nouveau_heap_alloc(heap, 16, &r->vertprog, &r->vertprog)) { + while (heap->next && heap->size < 16) { + struct nouveau_heap **evict = heap->next->priv; + nouveau_heap_free(evict); + } + + if (nouveau_heap_alloc(heap, 16, &r->vertprog, &r->vertprog)) + return FALSE; + } + } + + vinfo->num_attribs = 0; + vinfo->size = 0; + + /* setup routing for all necessary vp outputs */ + for (i = 0; i < vp->info.num_outputs && attrib < 16; i++) { + uint semantic = vp->info.output_semantic_name[i]; + uint index = vp->info.output_semantic_index[i]; + if (vroute_add(r, attrib, semantic, &index)) { + vp_attribs |= (1 << attrib++); + vp_results |= index; + } + } + + /* setup routing for replaced point coords not written by vp */ + if (rast && rast->pipe.point_quad_rasterization) + pntc = rast->pipe.sprite_coord_enable & 0x000002ff; + else + pntc = 0; + + while (pntc && attrib < 16) { + uint index = ffs(pntc) - 1; pntc &= ~(1 << index); + if (vroute_add(r, attrib, TGSI_SEMANTIC_GENERIC, &index)) { + vp_attribs |= (1 << attrib++); + vp_results |= index; + } + } + + /* modify vertex format for correct stride, and stub out unused ones */ + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1); + PUSH_DATA (push, r->vertprog->start); + r->vtxprog[attrib - 1][3] |= 1; + for (i = 0; i < attrib; i++) { + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); + PUSH_DATAp(push, r->vtxprog[i], 4); + r->vtxfmt[i] |= vinfo->size << 8; + } + for (; i < 16; i++) + r->vtxfmt[i] = NV30_3D_VTXFMT_TYPE_V32_FLOAT; + + BEGIN_NV04(push, NV30_3D(VIEWPORT_TRANSLATE_X), 8); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 1.0); + BEGIN_NV04(push, NV30_3D(DEPTH_RANGE_NEAR), 2); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 1.0); + + BEGIN_NV04(push, NV30_3D(VTXFMT(0)), 16); + PUSH_DATAp(push, r->vtxfmt, 16); + + BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1); + PUSH_DATA (push, r->vertprog->start); + BEGIN_NV04(push, NV30_3D(ENGINE), 1); + PUSH_DATA (push, 0x00000103); + if (eng3d->oclass >= NV40_3D_CLASS) { + BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2); + PUSH_DATA (push, vp_attribs); + PUSH_DATA (push, vp_results); + } + + vinfo->size /= 4; + return TRUE; +} + +void +nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct draw_context *draw = nv30->draw; + struct pipe_transfer *transfer[PIPE_MAX_ATTRIBS] = {NULL}; + struct pipe_transfer *transferi = NULL; + int i; + + nv30_render_validate(nv30); + + if (nv30->draw_dirty & NV30_NEW_VIEWPORT) + draw_set_viewport_states(draw, 0, 1, &nv30->viewport); + if (nv30->draw_dirty & NV30_NEW_RASTERIZER) + draw_set_rasterizer_state(draw, &nv30->rast->pipe, NULL); + if (nv30->draw_dirty & NV30_NEW_CLIP) + draw_set_clip_state(draw, &nv30->clip); + if (nv30->draw_dirty & NV30_NEW_ARRAYS) { + draw_set_vertex_buffers(draw, 0, nv30->num_vtxbufs, nv30->vtxbuf); + draw_set_vertex_elements(draw, nv30->vertex->num_elements, nv30->vertex->pipe); + } + if (nv30->draw_dirty & NV30_NEW_FRAGPROG) { + struct nv30_fragprog *fp = nv30->fragprog.program; + if (!fp->draw) + fp->draw = draw_create_fragment_shader(draw, &fp->pipe); + draw_bind_fragment_shader(draw, fp->draw); + } + if (nv30->draw_dirty & NV30_NEW_VERTPROG) { + struct nv30_vertprog *vp = nv30->vertprog.program; + if (!vp->draw) + vp->draw = draw_create_vertex_shader(draw, &vp->pipe); + draw_bind_vertex_shader(draw, vp->draw); + } + if (nv30->draw_dirty & NV30_NEW_VERTCONST) { + if (nv30->vertprog.constbuf) { + void *map = nv04_resource(nv30->vertprog.constbuf)->data; + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, + map, nv30->vertprog.constbuf_nr); + } + } + + for (i = 0; i < nv30->num_vtxbufs; i++) { + const void *map = nv30->vtxbuf[i].user_buffer; + if (!map) { + if (!nv30->vtxbuf[i].buffer) { + continue; + } + map = pipe_buffer_map(pipe, nv30->vtxbuf[i].buffer, + PIPE_TRANSFER_UNSYNCHRONIZED | + PIPE_TRANSFER_READ, &transfer[i]); + } + draw_set_mapped_vertex_buffer(draw, i, map, ~0); + } + + if (info->indexed) { + const void *map = nv30->idxbuf.user_buffer; + if (!map) + pipe_buffer_map(pipe, nv30->idxbuf.buffer, + PIPE_TRANSFER_UNSYNCHRONIZED | + PIPE_TRANSFER_READ, &transferi); + draw_set_indexes(draw, + (ubyte *) map + nv30->idxbuf.offset, + nv30->idxbuf.index_size, ~0); + } else { + draw_set_indexes(draw, NULL, 0, 0); + } + + draw_vbo(draw, info); + draw_flush(draw); + + if (info->indexed && transferi) + pipe_buffer_unmap(pipe, transferi); + for (i = 0; i < nv30->num_vtxbufs; i++) + if (transfer[i]) + pipe_buffer_unmap(pipe, transfer[i]); + + nv30->draw_dirty = 0; + nv30_state_release(nv30); +} + +static void +nv30_render_destroy(struct vbuf_render *render) +{ + FREE(render); +} + +static struct vbuf_render * +nv30_render_create(struct nv30_context *nv30) +{ + struct nv30_render *r = CALLOC_STRUCT(nv30_render); + if (!r) + return NULL; + + r->nv30 = nv30; + r->offset = 1 * 1024 * 1024; + + r->base.max_indices = 16 * 1024; + r->base.max_vertex_buffer_bytes = r->offset; + + r->base.get_vertex_info = nv30_render_get_vertex_info; + r->base.allocate_vertices = nv30_render_allocate_vertices; + r->base.map_vertices = nv30_render_map_vertices; + r->base.unmap_vertices = nv30_render_unmap_vertices; + r->base.set_primitive = nv30_render_set_primitive; + r->base.draw_elements = nv30_render_draw_elements; + r->base.draw_arrays = nv30_render_draw_arrays; + r->base.release_vertices = nv30_render_release_vertices; + r->base.destroy = nv30_render_destroy; + return &r->base; +} + +void +nv30_draw_init(struct pipe_context *pipe) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct vbuf_render *render; + struct draw_context *draw; + struct draw_stage *stage; + + draw = draw_create(pipe); + if (!draw) + return; + + render = nv30_render_create(nv30); + if (!render) { + draw_destroy(draw); + return; + } + + stage = draw_vbuf_stage(draw, render); + if (!stage) { + render->destroy(render); + draw_destroy(draw); + return; + } + + draw_set_render(draw, render); + draw_set_rasterize_stage(draw, stage); + draw_wide_line_threshold(draw, 10000000.f); + draw_wide_point_threshold(draw, 10000000.f); + draw_wide_point_sprites(draw, TRUE); + nv30->draw = draw; +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_format.c b/src/gallium/drivers/nouveau/nv30/nv30_format.c new file mode 100644 index 00000000000..67e0d5e9c56 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_format.c @@ -0,0 +1,259 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_format.h" + +#define NV30_3D_RT_FORMAT_COLOR_X1R5G5B5 2 + +#define NV30_3D_TEX_FORMAT_FORMAT_A16L16 NV30_3D_TEX_FORMAT_FORMAT_HILO16 +#define NV30_3D_TEX_FORMAT_FORMAT_A16L16_RECT NV30_3D_TEX_FORMAT_FORMAT_HILO16_RECT +#define NV30_3D_TEX_FORMAT_FORMAT_RGBA16F 0x00004a00 +#define NV30_3D_TEX_FORMAT_FORMAT_RGBA16F_RECT NV30_3D_TEX_FORMAT_FORMAT_RGBA16F +#define NV30_3D_TEX_FORMAT_FORMAT_RGBA32F 0x00004b00 +#define NV30_3D_TEX_FORMAT_FORMAT_RGBA32F_RECT NV30_3D_TEX_FORMAT_FORMAT_RGBA32F +#define NV30_3D_TEX_FORMAT_FORMAT_R32F 0x00004c00 +#define NV30_3D_TEX_FORMAT_FORMAT_R32F_RECT NV30_3D_TEX_FORMAT_FORMAT_R32F +#define NV30_3D_TEX_FORMAT_FORMAT_DXT1_RECT NV30_3D_TEX_FORMAT_FORMAT_DXT1 +#define NV30_3D_TEX_FORMAT_FORMAT_DXT3_RECT NV30_3D_TEX_FORMAT_FORMAT_DXT3 +#define NV30_3D_TEX_FORMAT_FORMAT_DXT5_RECT NV30_3D_TEX_FORMAT_FORMAT_DXT5 +#define NV30_3D_TEX_FORMAT_FORMAT_RG16F 0xdeadcafe +#define NV30_3D_TEX_FORMAT_FORMAT_RG16F_RECT 0xdeadcafe + +#define NV40_3D_TEX_FORMAT_FORMAT_R32F 0x00001c00 +#define NV40_3D_TEX_FORMAT_FORMAT_RG16F 0x00001f00 + +#define ____ 0 +#define S___ PIPE_BIND_SAMPLER_VIEW +#define _R__ PIPE_BIND_RENDER_TARGET +#define _B__ PIPE_BIND_RENDER_TARGET | PIPE_BIND_BLENDABLE +#define _Z__ PIPE_BIND_DEPTH_STENCIL +#define __V_ PIPE_BIND_VERTEX_BUFFER +#define SR__ (S___ | _R__) +#define SB__ (S___ | _B__) +#define SZ__ (S___ | _Z__) +#define S_V_ (S___ | __V_) +#define SRV_ (SR__ | __V_) +#define SBV_ (SB__ | __V_) + +#define _(a,b) [PIPE_FORMAT_##a] = { \ + .bindings = (b), \ +} +const struct nv30_format_info +nv30_format_info_table[PIPE_FORMAT_COUNT] = { + _(L8_UNORM , S___), + _(L8_SNORM , S___), + _(L8_SRGB , S___), + _(I8_UNORM , S___), + _(I8_SNORM , S___), + _(A8_UNORM , S___), + _(A8_SNORM , S___), + _(R8_UNORM , S_V_), + _(R8_SNORM , S___), + _(B5G5R5X1_UNORM , SB__), + _(B5G5R5A1_UNORM , S___), + _(B4G4R4X4_UNORM , S___), + _(B4G4R4A4_UNORM , S___), + _(B5G6R5_UNORM , SB__), + _(B8G8R8X8_UNORM , SB__), + _(B8G8R8X8_SRGB , S___), + _(B8G8R8A8_UNORM , SB__), + _(B8G8R8A8_SRGB , S___), + _(R8G8B8A8_UNORM , __V_), + _(R8G8B8A8_SNORM , S___), + _(DXT1_RGB , S___), + _(DXT1_SRGB , S___), + _(DXT1_RGBA , S___), + _(DXT1_SRGBA , S___), + _(DXT3_RGBA , S___), + _(DXT3_SRGBA , S___), + _(DXT5_RGBA , S___), + _(DXT5_SRGBA , S___), + _(L8A8_UNORM , S___), + _(L8A8_SRGB , S___), + _(R8G8_UNORM , S_V_), + _(R8G8_SNORM , S___), + _(R8G8B8_UNORM , __V_), + _(Z16_UNORM , SZ__), + _(X8Z24_UNORM , SZ__), + _(S8_UINT_Z24_UNORM , SZ__), + _(L16_UNORM , S___), + _(L16_SNORM , S___), + _(I16_UNORM , S___), + _(I16_SNORM , S___), + _(A16_UNORM , S___), + _(A16_SNORM , S___), + _(R16_UNORM , S___), + _(R16_SNORM , S_V_), + _(R16G16_SNORM , __V_), + _(R16G16B16_SNORM , __V_), + _(R16G16B16A16_SNORM , __V_), + _(R8G8B8A8_USCALED , __V_), + _(R16_FLOAT , __V_), + _(R16G16_FLOAT , __V_), //S_V_), + _(R16G16B16_FLOAT , __V_), + _(R16G16B16A16_FLOAT , __V_), //SBV_), + _(R16_SSCALED , __V_), + _(R16G16_SSCALED , __V_), + _(R16G16B16_SSCALED , __V_), + _(R16G16B16A16_SSCALED, __V_), + _(R32_FLOAT , __V_), //SRV_), + _(R32G32_FLOAT , __V_), + _(R32G32B32_FLOAT , __V_), + _(R32G32B32A32_FLOAT , __V_), //SRV_), +}; +#undef _ +#undef ____ + +#define R_(a,b) [PIPE_FORMAT_##a] = { \ + .hw = NV30_3D_RT_FORMAT_COLOR_##b, \ +} +#define Z_(a,b) [PIPE_FORMAT_##a] = { \ + .hw = NV30_3D_RT_FORMAT_ZETA_##b, \ +} +const struct nv30_format +nv30_format_table[PIPE_FORMAT_COUNT] = { + R_(B5G5R5X1_UNORM , X1R5G5B5 ), + R_(B5G6R5_UNORM , R5G6B5 ), + R_(B8G8R8X8_UNORM , X8R8G8B8 ), + R_(B8G8R8A8_UNORM , A8R8G8B8 ), + Z_(Z16_UNORM , Z16 ), + Z_(X8Z24_UNORM , Z24S8 ), + Z_(S8_UINT_Z24_UNORM , Z24S8 ), + R_(R16G16B16A16_FLOAT, A16B16G16R16_FLOAT), + R_(R32G32B32A32_FLOAT, A32B32G32R32_FLOAT), + R_(R32_FLOAT , R32_FLOAT ), +}; + +#define _(a,b,c) [PIPE_FORMAT_##a] = { \ + .hw = NV30_3D_VTXFMT_TYPE_##b | ((c) << NV30_3D_VTXFMT_SIZE__SHIFT) \ +} +const struct nv30_vtxfmt +nv30_vtxfmt_table[PIPE_FORMAT_COUNT] = { + _(R8_UNORM , U8_UNORM , 1), + _(R8G8_UNORM , U8_UNORM , 2), + _(R8G8B8_UNORM , U8_UNORM , 3), + _(R8G8B8A8_UNORM , U8_UNORM , 4), + _(R8G8B8A8_USCALED , U8_USCALED , 4), + _(R16_SNORM , V16_SNORM , 1), + _(R16G16_SNORM , V16_SNORM , 2), + _(R16G16B16_SNORM , V16_SNORM , 3), + _(R16G16B16A16_SNORM , V16_SNORM , 4), + _(R16_SSCALED , V16_SSCALED, 1), + _(R16G16_SSCALED , V16_SSCALED, 2), + _(R16G16B16_SSCALED , V16_SSCALED, 3), + _(R16G16B16A16_SSCALED, V16_SSCALED, 4), + _(R16_FLOAT , V16_FLOAT , 1), + _(R16G16_FLOAT , V16_FLOAT , 2), + _(R16G16B16_FLOAT , V16_FLOAT , 3), + _(R16G16B16A16_FLOAT , V16_FLOAT , 4), + _(R32_FLOAT , V32_FLOAT , 1), + _(R32G32_FLOAT , V32_FLOAT , 2), + _(R32G32B32_FLOAT , V32_FLOAT , 3), + _(R32G32B32A32_FLOAT , V32_FLOAT , 4), +}; +#undef _ + +#define SWZ_OUT_0 0 +#define SWZ_OUT_1 1 +#define SWZ_OUT_C 2 + +#define SWZ_SRC_0 3 +#define SWZ_SRC_1 2 +#define SWZ_SRC_2 1 +#define SWZ_SRC_3 0 +#define SWZ_SRC_x 0 + +#define NONE 0x00000000 +#define SRGB 0x00700000 + +#define ____ 0x00000000 +#define SSSS 0xf0000000 + +#define _(a,b,c,d,e,f,g,h,i,j,k,l,m) [PIPE_FORMAT_##a] = { \ + .nv30 = NV30_3D_TEX_FORMAT_FORMAT_##b, \ + .nv30_rect = NV30_3D_TEX_FORMAT_FORMAT_##b##_RECT, \ + .nv40 = NV40_3D_TEX_FORMAT_FORMAT_##b, \ + .swz[0] = { SWZ_OUT_##d, SWZ_SRC_##h }, \ + .swz[1] = { SWZ_OUT_##e, SWZ_SRC_##i }, \ + .swz[2] = { SWZ_OUT_##f, SWZ_SRC_##j }, \ + .swz[3] = { SWZ_OUT_##g, SWZ_SRC_##k }, \ + .swz[4] = { SWZ_OUT_0, SWZ_SRC_x }, \ + .swz[5] = { SWZ_OUT_1, SWZ_SRC_x }, \ + .swizzle = (c) * 0x00010000, \ + .wrap = (l), \ + .filter = (m), \ +} +const struct nv30_texfmt +nv30_texfmt_table[PIPE_FORMAT_COUNT] = { + _(L8_UNORM , L8 , 0, C, C, C, 1, 0, 0, 0, x, NONE, ____), + _(L8_SNORM , L8 , 0, C, C, C, 1, 0, 0, 0, x, NONE, SSSS), + _(L8_SRGB , L8 , 0, C, C, C, 1, 0, 0, 0, x, SRGB, ____), + _(I8_UNORM , L8 , 0, C, C, C, C, 0, 0, 0, 0, NONE, ____), + _(I8_SNORM , L8 , 0, C, C, C, C, 0, 0, 0, 0, NONE, SSSS), + _(A8_UNORM , L8 , 0, 0, 0, 0, C, x, x, x, 0, NONE, ____), + _(A8_SNORM , L8 , 0, 0, 0, 0, C, x, x, x, 0, NONE, SSSS), + _(R8_UNORM , L8 , 0, C, 0, 0, 1, 0, x, x, x, NONE, ____), + _(R8_SNORM , L8 , 0, C, 0, 0, 1, 0, x, x, x, NONE, SSSS), + _(B5G5R5X1_UNORM , A1R5G5B5, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____), + _(B5G5R5A1_UNORM , A1R5G5B5, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____), + _(B4G4R4X4_UNORM , A4R4G4B4, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____), + _(B4G4R4A4_UNORM , A4R4G4B4, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____), + _(B5G6R5_UNORM , R5G6B5 , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____), + _(B8G8R8X8_UNORM , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____), + _(B8G8R8X8_SRGB , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____), + _(B8G8R8A8_UNORM , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____), + _(B8G8R8A8_SRGB , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____), + _(R8G8B8A8_SNORM , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS), + _(DXT1_RGB , DXT1 , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____), + _(DXT1_SRGB , DXT1 , 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____), + _(DXT1_RGBA , DXT1 , 0, C, C, C, C, 2, 1, 0, 3, NONE, ____), + _(DXT1_SRGBA , DXT1 , 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____), + _(DXT3_RGBA , DXT3 , 0, C, C, C, C, 2, 1, 0, 3, NONE, ____), + _(DXT3_SRGBA , DXT3 , 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____), + _(DXT5_RGBA , DXT5 , 0, C, C, C, C, 2, 1, 0, 3, NONE, ____), + _(DXT5_SRGBA , DXT5 , 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____), + _(L8A8_UNORM , A8L8 , 0, C, C, C, C, 0, 0, 0, 3, NONE, ____), + _(L8A8_SRGB , A8L8 , 0, C, C, C, C, 0, 0, 0, 3, SRGB, ____), + _(R8G8_UNORM , A8L8 , 0, C, C, 0, 1, 0, 3, x, x, NONE, ____), + _(R8G8_SNORM , A8L8 , 0, C, C, 0, 1, 0, 3, x, x, NONE, SSSS), + _(Z16_UNORM , Z16 , 0, C, C, C, 1, 3, 3, 3, x, NONE, ____), + _(X8Z24_UNORM , Z24 , 0, C, C, C, 1, 3, 3, 3, x, NONE, ____), + _(S8_UINT_Z24_UNORM , Z24 , 0, C, C, C, 1, 3, 3, 3, x, NONE, ____), + _(L16_UNORM , A16 , 0, C, C, C, 1, 1, 1, 1, 1, NONE, ____), + _(L16_SNORM , A16 , 0, C, C, C, 1, 1, 1, 1, 1, NONE, SSSS), + _(I16_UNORM , A16 , 0, C, C, C, C, 1, 1, 1, 1, NONE, ____), + _(I16_SNORM , A16 , 0, C, C, C, C, 1, 1, 1, 1, NONE, SSSS), + _(A16_UNORM , A16 , 0, 0, 0, 0, C, 1, 1, 1, 1, NONE, ____), + _(A16_SNORM , A16 , 0, 0, 0, 0, C, 1, 1, 1, 1, NONE, SSSS), + _(R16_UNORM , A16 , 0, C, 0, 0, 1, 1, 1, 1, 1, NONE, ____), + _(R16_SNORM , A16 , 0, C, 0, 0, 1, 1, 1, 1, 1, NONE, SSSS), + _(R16G16_FLOAT , RG16F , 0, C, C, 0, 1, 2, 1, 0, 3, NONE, ____), + _(R16G16B16A16_FLOAT, RGBA16F , 0, C, C, C, C, 2, 1, 0, 3, NONE, ____), + _(R32_FLOAT , R32F , 0, C, 0, 0, 1, 2, 1, 0, 3, NONE, ____), + _(R32G32B32A32_FLOAT, RGBA32F , 0, C, C, C, C, 2, 1, 0, 3, NONE, ____), +}; +#undef _ diff --git a/src/gallium/drivers/nouveau/nv30/nv30_format.h b/src/gallium/drivers/nouveau/nv30/nv30_format.h new file mode 100644 index 00000000000..8bf4a37299f --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_format.h @@ -0,0 +1,57 @@ +#ifndef __NV30_FORMAT_H__ +#define __NV30_FORMAT_H__ + +struct nv30_format_info { + unsigned bindings; +}; + +struct nv30_format { + unsigned hw; +}; + +struct nv30_vtxfmt { + unsigned hw; +}; + +struct nv30_texfmt { + unsigned nv30; + unsigned nv30_rect; + unsigned nv40; + struct { + unsigned src; + unsigned cmp; + } swz[6]; + unsigned swizzle; + unsigned filter; + unsigned wrap; +}; + +extern const struct nv30_format_info nv30_format_info_table[]; +static INLINE const struct nv30_format_info * +nv30_format_info(struct pipe_screen *pscreen, enum pipe_format format) +{ + return &nv30_format_info_table[format]; +} + +extern const struct nv30_format nv30_format_table[]; +static INLINE const struct nv30_format * +nv30_format(struct pipe_screen *pscreen, enum pipe_format format) +{ + return &nv30_format_table[format]; +} + +extern const struct nv30_vtxfmt nv30_vtxfmt_table[]; +static INLINE const struct nv30_vtxfmt * +nv30_vtxfmt(struct pipe_screen *pscreen, enum pipe_format format) +{ + return &nv30_vtxfmt_table[format]; +} + +extern const struct nv30_texfmt nv30_texfmt_table[]; +static INLINE const struct nv30_texfmt * +nv30_texfmt(struct pipe_screen *pscreen, enum pipe_format format) +{ + return &nv30_texfmt_table[format]; +} + +#endif diff --git a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c new file mode 100644 index 00000000000..e8acdfeaf75 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c @@ -0,0 +1,170 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "tgsi/tgsi_parse.h" + +#include "nv_object.xml.h" +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_context.h" +#include "nv30/nvfx_shader.h" + +static void +nv30_fragprog_upload(struct nv30_context *nv30) +{ + struct nouveau_context *nv = &nv30->base; + struct nv30_fragprog *fp = nv30->fragprog.program; + struct pipe_context *pipe = &nv30->base.pipe; + struct pipe_transfer *transfer; + uint32_t *map; + int i; (void)i; + + if (unlikely(!fp->buffer)) { + fp->buffer = pipe_buffer_create(pipe->screen, 0, 0, fp->insn_len * 4); + } + + map = pipe_buffer_map(pipe, fp->buffer, PIPE_TRANSFER_WRITE, &transfer); +#ifndef PIPE_ARCH_BIG_ENDIAN + memcpy(map, fp->insn, fp->insn_len * 4); +#else + for (i = 0; i < fp->insn_len; i++) + *map++ = (fp->insn[i] >> 16) | (fp->insn[i] << 16); +#endif + pipe_buffer_unmap(pipe, transfer); + + if (nv04_resource(fp->buffer)->domain != NOUVEAU_BO_VRAM) + nouveau_buffer_migrate(nv, nv04_resource(fp->buffer), NOUVEAU_BO_VRAM); +} + +void +nv30_fragprog_validate(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nouveau_object *eng3d = nv30->screen->eng3d; + struct nv30_fragprog *fp = nv30->fragprog.program; + boolean upload = FALSE; + int i; + + if (!fp->translated) { + _nvfx_fragprog_translate(nv30, fp, FALSE); + if (!fp->translated) + return; + + upload = TRUE; + } + + /* update constants, also needs to be done on every fp switch as we + * have no idea whether the constbuf changed in the meantime + */ + if (nv30->fragprog.constbuf) { + struct pipe_resource *constbuf = nv30->fragprog.constbuf; + uint32_t *cbuf = (uint32_t *)nv04_resource(constbuf)->data; + + for (i = 0; i < fp->nr_consts; i++) { + unsigned off = fp->consts[i].offset; + unsigned idx = fp->consts[i].index * 4; + + if (!memcmp(&fp->insn[off], &cbuf[idx], 4 * 4)) + continue; + memcpy(&fp->insn[off], &cbuf[idx], 4 * 4); + upload = TRUE; + } + } + + if (upload) + nv30_fragprog_upload(nv30); + + /* FP_ACTIVE_PROGRAM needs to be done again even if only the consts + * were updated. TEX_CACHE_CTL magic is not enough to convince the + * GPU that it should re-read the fragprog from VRAM... sigh. + */ + if (nv30->state.fragprog != fp || upload) { + struct nv04_resource *r = nv04_resource(fp->buffer); + + if (!PUSH_SPACE(push, 8)) + return; + PUSH_RESET(push, BUFCTX_FRAGPROG); + + BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1); + PUSH_RESRC(push, NV30_3D(FP_ACTIVE_PROGRAM), BUFCTX_FRAGPROG, r, 0, + NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_OR, + NV30_3D_FP_ACTIVE_PROGRAM_DMA0, + NV30_3D_FP_ACTIVE_PROGRAM_DMA1); + BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1); + PUSH_DATA (push, fp->fp_control); + if (eng3d->oclass < NV40_3D_CLASS) { + BEGIN_NV04(push, NV30_3D(FP_REG_CONTROL), 1); + PUSH_DATA (push, 0x00010004); + BEGIN_NV04(push, NV30_3D(TEX_UNITS_ENABLE), 1); + PUSH_DATA (push, fp->texcoords); + } else { + BEGIN_NV04(push, SUBC_3D(0x0b40), 1); + PUSH_DATA (push, 0x00000000); + } + + nv30->state.fragprog = fp; + } +} + +static void * +nv30_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv30_fragprog *fp = CALLOC_STRUCT(nv30_fragprog); + if (!fp) + return NULL; + + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + tgsi_scan_shader(fp->pipe.tokens, &fp->info); + return fp; +} + +static void +nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_fragprog *fp = hwcso; + + pipe_resource_reference(&fp->buffer, NULL); + + FREE((void *)fp->pipe.tokens); + FREE(fp->insn); + FREE(fp); +} + +static void +nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->fragprog.program = hwcso; + nv30->dirty |= NV30_NEW_FRAGPROG; +} + +void +nv30_fragprog_init(struct pipe_context *pipe) +{ + pipe->create_fs_state = nv30_fp_state_create; + pipe->bind_fs_state = nv30_fp_state_bind; + pipe->delete_fs_state = nv30_fp_state_delete; +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_fragtex.c b/src/gallium/drivers/nouveau/nv30/nv30_fragtex.c new file mode 100644 index 00000000000..60ffb03c89f --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_fragtex.c @@ -0,0 +1,202 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "util/u_inlines.h" + +#include "nv_object.xml.h" +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_format.h" + +void +nv30_fragtex_validate(struct nv30_context *nv30) +{ + struct pipe_screen *pscreen = &nv30->screen->base.base; + struct nouveau_object *eng3d = nv30->screen->eng3d; + struct nouveau_pushbuf *push = nv30->base.pushbuf; + unsigned dirty = nv30->fragprog.dirty_samplers; + + while (dirty) { + unsigned unit = ffs(dirty) - 1; + struct nv30_sampler_view *sv = (void *)nv30->fragprog.textures[unit]; + struct nv30_sampler_state *ss = nv30->fragprog.samplers[unit]; + + PUSH_RESET(push, BUFCTX_FRAGTEX(unit)); + + if (ss && sv) { + const struct nv30_texfmt *fmt = nv30_texfmt(pscreen, sv->pipe.format); + struct pipe_resource *pt = sv->pipe.texture; + struct nv30_miptree *mt = nv30_miptree(pt); + unsigned min_lod, max_lod; + u32 filter = sv->filt | (ss->filt & sv->filt_mask); + u32 format = sv->fmt | ss->fmt; + u32 enable = ss->en; + + /* handle base_level when not using a mip filter, min/max level + * is unfortunately ignored by the hardware otherwise + */ + if (ss->pipe.min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + if (sv->base_lod) + filter += 0x00020000; /* N/L -> NMN/LMN */ + max_lod = sv->base_lod; + min_lod = sv->base_lod; + } else { + max_lod = MIN2(ss->max_lod + sv->base_lod, sv->high_lod); + min_lod = MIN2(ss->min_lod + sv->base_lod, max_lod); + } + + if (eng3d->oclass >= NV40_3D_CLASS) { + /* this is a tad stupid of the hardware, but there's no non-rcomp + * z16/z24 texture formats to be had, we have to suffer and lose + * some precision to handle this case. + */ + if (ss->pipe.compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) { + if (fmt->nv40 == NV40_3D_TEX_FORMAT_FORMAT_Z16) + format |= NV40_3D_TEX_FORMAT_FORMAT_A8L8; + else + if (fmt->nv40 == NV40_3D_TEX_FORMAT_FORMAT_Z24) + format |= NV40_3D_TEX_FORMAT_FORMAT_A16L16; + else + format |= fmt->nv40; + } else { + format |= fmt->nv40; + } + + enable |= (min_lod << 19) | (max_lod << 7); + enable |= NV40_3D_TEX_ENABLE_ENABLE; + + BEGIN_NV04(push, NV40_3D(TEX_SIZE1(unit)), 1); + PUSH_DATA (push, sv->npot_size1); + } else { + /* this is a tad stupid of the hardware, but there's no non-rcomp + * z16/z24 texture formats to be had, we have to suffer and lose + * some precision to handle this case. + */ + if (ss->pipe.compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) { + if (fmt->nv30 == NV30_3D_TEX_FORMAT_FORMAT_Z16) { + if (ss->pipe.normalized_coords) + format |= NV30_3D_TEX_FORMAT_FORMAT_A8L8; + else + format |= NV30_3D_TEX_FORMAT_FORMAT_A8L8_RECT; + } else + if (fmt->nv30 == NV30_3D_TEX_FORMAT_FORMAT_Z24) { + if (ss->pipe.normalized_coords) + format |= NV30_3D_TEX_FORMAT_FORMAT_HILO16; + else + format |= NV30_3D_TEX_FORMAT_FORMAT_HILO16_RECT; + } else { + if (ss->pipe.normalized_coords) + format |= fmt->nv30; + else + format |= fmt->nv30_rect; + } + } else { + if (ss->pipe.normalized_coords) + format |= fmt->nv30; + else + format |= fmt->nv30_rect; + } + + enable |= NV30_3D_TEX_ENABLE_ENABLE; + enable |= (min_lod << 18) | (max_lod << 6); + } + + BEGIN_NV04(push, NV30_3D(TEX_OFFSET(unit)), 8); + PUSH_MTHDl(push, NV30_3D(TEX_OFFSET(unit)), BUFCTX_FRAGTEX(unit), + mt->base.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + PUSH_MTHDs(push, NV30_3D(TEX_FORMAT(unit)), BUFCTX_FRAGTEX(unit), + mt->base.bo, format, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD, + NV30_3D_TEX_FORMAT_DMA0, + NV30_3D_TEX_FORMAT_DMA1); + PUSH_DATA (push, sv->wrap | (ss->wrap & sv->wrap_mask)); + PUSH_DATA (push, enable); + PUSH_DATA (push, sv->swz); + PUSH_DATA (push, filter); + PUSH_DATA (push, sv->npot_size0); + PUSH_DATA (push, ss->bcol); + BEGIN_NV04(push, NV30_3D(TEX_FILTER_OPTIMIZATION(unit)), 1); + PUSH_DATA (push, nv30->config.filter); + } else { + BEGIN_NV04(push, NV30_3D(TEX_ENABLE(unit)), 1); + PUSH_DATA (push, 0); + } + + dirty &= ~(1 << unit); + } + + nv30->fragprog.dirty_samplers = 0; +} + +static void +nv30_fragtex_sampler_states_bind(struct pipe_context *pipe, + unsigned nr, void **hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + unsigned i; + + for (i = 0; i < nr; i++) { + nv30->fragprog.samplers[i] = hwcso[i]; + nv30->fragprog.dirty_samplers |= (1 << i); + } + + for (; i < nv30->fragprog.num_samplers; i++) { + nv30->fragprog.samplers[i] = NULL; + nv30->fragprog.dirty_samplers |= (1 << i); + } + + nv30->fragprog.num_samplers = nr; + nv30->dirty |= NV30_NEW_FRAGTEX; +} + + +static void +nv30_fragtex_set_sampler_views(struct pipe_context *pipe, unsigned nr, + struct pipe_sampler_view **views) +{ + struct nv30_context *nv30 = nv30_context(pipe); + unsigned i; + + for (i = 0; i < nr; i++) { + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_FRAGTEX(i)); + pipe_sampler_view_reference(&nv30->fragprog.textures[i], views[i]); + nv30->fragprog.dirty_samplers |= (1 << i); + } + + for (; i < nv30->fragprog.num_textures; i++) { + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_FRAGTEX(i)); + pipe_sampler_view_reference(&nv30->fragprog.textures[i], NULL); + nv30->fragprog.dirty_samplers |= (1 << i); + } + + nv30->fragprog.num_textures = nr; + nv30->dirty |= NV30_NEW_FRAGTEX; +} + +void +nv30_fragtex_init(struct pipe_context *pipe) +{ + pipe->bind_fragment_sampler_states = nv30_fragtex_sampler_states_bind; + pipe->set_fragment_sampler_views = nv30_fragtex_set_sampler_views; +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c new file mode 100644 index 00000000000..4c237f615cd --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c @@ -0,0 +1,490 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" + +#include "nv_m2mf.xml.h" +#include "nv30/nv30_screen.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_resource.h" +#include "nv30/nv30_transfer.h" + +static INLINE unsigned +layer_offset(struct pipe_resource *pt, unsigned level, unsigned layer) +{ + struct nv30_miptree *mt = nv30_miptree(pt); + struct nv30_miptree_level *lvl = &mt->level[level]; + + if (pt->target == PIPE_TEXTURE_CUBE) + return (layer * mt->layer_size) + lvl->offset; + + return lvl->offset + (layer * lvl->zslice_size); +} + +static boolean +nv30_miptree_get_handle(struct pipe_screen *pscreen, + struct pipe_resource *pt, + struct winsys_handle *handle) +{ + struct nv30_miptree *mt = nv30_miptree(pt); + unsigned stride; + + if (!mt || !mt->base.bo) + return FALSE; + + stride = mt->level[0].pitch; + + return nouveau_screen_bo_get_handle(pscreen, mt->base.bo, stride, handle); +} + +static void +nv30_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt) +{ + struct nv30_miptree *mt = nv30_miptree(pt); + + nouveau_bo_ref(NULL, &mt->base.bo); + FREE(mt); +} + +struct nv30_transfer { + struct pipe_transfer base; + struct nv30_rect img; + struct nv30_rect tmp; + unsigned nblocksx; + unsigned nblocksy; +}; + +static INLINE struct nv30_transfer * +nv30_transfer(struct pipe_transfer *ptx) +{ + return (struct nv30_transfer *)ptx; +} + +static INLINE void +define_rect(struct pipe_resource *pt, unsigned level, unsigned z, + unsigned x, unsigned y, unsigned w, unsigned h, + struct nv30_rect *rect) +{ + struct nv30_miptree *mt = nv30_miptree(pt); + struct nv30_miptree_level *lvl = &mt->level[level]; + + rect->w = u_minify(pt->width0, level) << mt->ms_x; + rect->w = util_format_get_nblocksx(pt->format, rect->w); + rect->h = u_minify(pt->height0, level) << mt->ms_y; + rect->h = util_format_get_nblocksy(pt->format, rect->h); + rect->d = 1; + rect->z = 0; + if (mt->swizzled) { + if (pt->target == PIPE_TEXTURE_3D) { + rect->d = u_minify(pt->depth0, level); + rect->z = z; z = 0; + } + rect->pitch = 0; + } else { + rect->pitch = lvl->pitch; + } + + rect->bo = mt->base.bo; + rect->domain = NOUVEAU_BO_VRAM; + rect->offset = layer_offset(pt, level, z); + rect->cpp = util_format_get_blocksize(pt->format); + + rect->x0 = util_format_get_nblocksx(pt->format, x) << mt->ms_x; + rect->y0 = util_format_get_nblocksy(pt->format, y) << mt->ms_y; + rect->x1 = rect->x0 + (w << mt->ms_x); + rect->y1 = rect->y0 + (h << mt->ms_y); +} + +void +nv30_resource_copy_region(struct pipe_context *pipe, + struct pipe_resource *dstres, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *srcres, unsigned src_level, + const struct pipe_box *src_box) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_rect src, dst; + + if (dstres->target == PIPE_BUFFER && srcres->target == PIPE_BUFFER) { + nouveau_copy_buffer(&nv30->base, + nv04_resource(dstres), dstx, + nv04_resource(srcres), src_box->x, src_box->width); + return; + } + + define_rect(srcres, src_level, src_box->z, src_box->x, src_box->y, + src_box->width, src_box->height, &src); + define_rect(dstres, dst_level, dstz, dstx, dsty, + src_box->width, src_box->height, &dst); + + nv30_transfer_rect(nv30, NEAREST, &src, &dst); +} + +void +nv30_resource_resolve(struct pipe_context *pipe, + const struct pipe_resolve_info *info) +{ +#if 0 + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_rect src, dst; + + define_rect(info->src.res, 0, 0, info->src.x0, info->src.y0, + info->src.x1 - info->src.x0, info->src.y1 - info->src.y0, &src); + define_rect(info->dst.res, info->dst.level, 0, info->dst.x0, info->dst.y0, + info->dst.x1 - info->dst.x0, info->dst.y1 - info->dst.y0, &dst); + + nv30_transfer_rect(nv30, BILINEAR, &src, &dst); +#endif +} + +void +nv30_blit(struct pipe_context *pipe, + const struct pipe_blit_info *blit_info) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct pipe_blit_info info = *blit_info; + + if (info.src.resource->nr_samples > 1 && + info.dst.resource->nr_samples <= 1 && + !util_format_is_depth_or_stencil(info.src.resource->format) && + !util_format_is_pure_integer(info.src.resource->format)) { + debug_printf("nv30: color resolve unimplemented\n"); + return; + } + + if (util_try_blit_via_copy_region(pipe, &info)) { + return; /* done */ + } + + if (info.mask & PIPE_MASK_S) { + debug_printf("nv30: cannot blit stencil, skipping\n"); + info.mask &= ~PIPE_MASK_S; + } + + if (!util_blitter_is_blit_supported(nv30->blitter, &info)) { + debug_printf("nv30: blit unsupported %s -> %s\n", + util_format_short_name(info.src.resource->format), + util_format_short_name(info.dst.resource->format)); + return; + } + + /* XXX turn off occlusion queries */ + + util_blitter_save_vertex_buffer_slot(nv30->blitter, nv30->vtxbuf); + util_blitter_save_vertex_elements(nv30->blitter, nv30->vertex); + util_blitter_save_vertex_shader(nv30->blitter, nv30->vertprog.program); + util_blitter_save_rasterizer(nv30->blitter, nv30->rast); + util_blitter_save_viewport(nv30->blitter, &nv30->viewport); + util_blitter_save_scissor(nv30->blitter, &nv30->scissor); + util_blitter_save_fragment_shader(nv30->blitter, nv30->fragprog.program); + util_blitter_save_blend(nv30->blitter, nv30->blend); + util_blitter_save_depth_stencil_alpha(nv30->blitter, + nv30->zsa); + util_blitter_save_stencil_ref(nv30->blitter, &nv30->stencil_ref); + util_blitter_save_sample_mask(nv30->blitter, nv30->sample_mask); + util_blitter_save_framebuffer(nv30->blitter, &nv30->framebuffer); + util_blitter_save_fragment_sampler_states(nv30->blitter, + nv30->fragprog.num_samplers, + (void**)nv30->fragprog.samplers); + util_blitter_save_fragment_sampler_views(nv30->blitter, + nv30->fragprog.num_textures, nv30->fragprog.textures); + util_blitter_save_render_condition(nv30->blitter, nv30->render_cond_query, + nv30->render_cond_cond, nv30->render_cond_mode); + util_blitter_blit(nv30->blitter, &info); +} + +static void * +nv30_miptree_transfer_map(struct pipe_context *pipe, struct pipe_resource *pt, + unsigned level, unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_device *dev = nv30->screen->base.device; + struct nv30_transfer *tx; + unsigned access = 0; + int ret; + + tx = CALLOC_STRUCT(nv30_transfer); + if (!tx) + return NULL; + pipe_resource_reference(&tx->base.resource, pt); + tx->base.level = level; + tx->base.usage = usage; + tx->base.box = *box; + tx->base.stride = util_format_get_nblocksx(pt->format, box->width) * + util_format_get_blocksize(pt->format); + tx->base.layer_stride = util_format_get_nblocksy(pt->format, box->height) * + tx->base.stride; + + tx->nblocksx = util_format_get_nblocksx(pt->format, box->width); + tx->nblocksy = util_format_get_nblocksy(pt->format, box->height); + + define_rect(pt, level, box->z, box->x, box->y, + tx->nblocksx, tx->nblocksy, &tx->img); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, + tx->base.layer_stride, NULL, &tx->tmp.bo); + if (ret) { + pipe_resource_reference(&tx->base.resource, NULL); + FREE(tx); + return NULL; + } + + tx->tmp.domain = NOUVEAU_BO_GART; + tx->tmp.offset = 0; + tx->tmp.pitch = tx->base.stride; + tx->tmp.cpp = tx->img.cpp; + tx->tmp.w = tx->nblocksx; + tx->tmp.h = tx->nblocksy; + tx->tmp.d = 1; + tx->tmp.x0 = 0; + tx->tmp.y0 = 0; + tx->tmp.x1 = tx->tmp.w; + tx->tmp.y1 = tx->tmp.h; + tx->tmp.z = 0; + + if (usage & PIPE_TRANSFER_READ) + nv30_transfer_rect(nv30, NEAREST, &tx->img, &tx->tmp); + + if (tx->tmp.bo->map) { + *ptransfer = &tx->base; + return tx->tmp.bo->map; + } + + if (usage & PIPE_TRANSFER_READ) + access |= NOUVEAU_BO_RD; + if (usage & PIPE_TRANSFER_WRITE) + access |= NOUVEAU_BO_WR; + + ret = nouveau_bo_map(tx->tmp.bo, access, nv30->base.client); + if (ret) { + pipe_resource_reference(&tx->base.resource, NULL); + FREE(tx); + return NULL; + } + + *ptransfer = &tx->base; + return tx->tmp.bo->map; +} + +static void +nv30_miptree_transfer_unmap(struct pipe_context *pipe, + struct pipe_transfer *ptx) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_transfer *tx = nv30_transfer(ptx); + + if (ptx->usage & PIPE_TRANSFER_WRITE) + nv30_transfer_rect(nv30, NEAREST, &tx->tmp, &tx->img); + + nouveau_bo_ref(NULL, &tx->tmp.bo); + pipe_resource_reference(&ptx->resource, NULL); + FREE(tx); +} + +const struct u_resource_vtbl nv30_miptree_vtbl = { + nv30_miptree_get_handle, + nv30_miptree_destroy, + nv30_miptree_transfer_map, + u_default_transfer_flush_region, + nv30_miptree_transfer_unmap, + u_default_transfer_inline_write +}; + +struct pipe_resource * +nv30_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct nv30_miptree *mt = CALLOC_STRUCT(nv30_miptree); + struct pipe_resource *pt = &mt->base.base; + unsigned blocksz, size; + unsigned w, h, d, l; + int ret; + + switch (tmpl->nr_samples) { + case 4: + mt->ms_mode = 0x00004000; + mt->ms_x = 1; + mt->ms_y = 1; + break; + case 2: + mt->ms_mode = 0x00003000; + mt->ms_x = 1; + mt->ms_y = 0; + break; + default: + mt->ms_mode = 0x00000000; + mt->ms_x = 0; + mt->ms_y = 0; + break; + } + + mt->base.vtbl = &nv30_miptree_vtbl; + *pt = *tmpl; + pipe_reference_init(&pt->reference, 1); + pt->screen = pscreen; + + w = pt->width0 << mt->ms_x; + h = pt->height0 << mt->ms_y; + d = (pt->target == PIPE_TEXTURE_3D) ? pt->depth0 : 1; + blocksz = util_format_get_blocksize(pt->format); + + if ((pt->target == PIPE_TEXTURE_RECT) || + !util_is_power_of_two(pt->width0) || + !util_is_power_of_two(pt->height0) || + !util_is_power_of_two(pt->depth0) || + util_format_is_compressed(pt->format) || + util_format_is_float(pt->format) || mt->ms_mode) { + mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz; + mt->uniform_pitch = align(mt->uniform_pitch, 64); + } + + if (!mt->uniform_pitch) + mt->swizzled = TRUE; + + size = 0; + for (l = 0; l <= pt->last_level; l++) { + struct nv30_miptree_level *lvl = &mt->level[l]; + unsigned nbx = util_format_get_nblocksx(pt->format, w); + unsigned nby = util_format_get_nblocksx(pt->format, h); + + lvl->offset = size; + lvl->pitch = mt->uniform_pitch; + if (!lvl->pitch) + lvl->pitch = nbx * blocksz; + + lvl->zslice_size = lvl->pitch * nby; + size += lvl->zslice_size * d; + + w = u_minify(w, 1); + h = u_minify(h, 1); + d = u_minify(d, 1); + } + + mt->layer_size = size; + if (pt->target == PIPE_TEXTURE_CUBE) { + if (!mt->uniform_pitch) + mt->layer_size = align(mt->layer_size, 128); + size = mt->layer_size * 6; + } + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 256, size, NULL, &mt->base.bo); + if (ret) { + FREE(mt); + return NULL; + } + + mt->base.domain = NOUVEAU_BO_VRAM; + return &mt->base.base; +} + +struct pipe_resource * +nv30_miptree_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + struct winsys_handle *handle) +{ + struct nv30_miptree *mt; + unsigned stride; + + /* only supports 2D, non-mipmapped textures for the moment */ + if ((tmpl->target != PIPE_TEXTURE_2D && + tmpl->target != PIPE_TEXTURE_RECT) || + tmpl->last_level != 0 || + tmpl->depth0 != 1 || + tmpl->array_size > 1) + return NULL; + + mt = CALLOC_STRUCT(nv30_miptree); + if (!mt) + return NULL; + + mt->base.bo = nouveau_screen_bo_from_handle(pscreen, handle, &stride); + if (mt->base.bo == NULL) { + FREE(mt); + return NULL; + } + + mt->base.base = *tmpl; + mt->base.vtbl = &nv30_miptree_vtbl; + pipe_reference_init(&mt->base.base.reference, 1); + mt->base.base.screen = pscreen; + mt->uniform_pitch = stride; + mt->level[0].pitch = mt->uniform_pitch; + mt->level[0].offset = 0; + + /* no need to adjust bo reference count */ + return &mt->base.base; +} + +struct pipe_surface * +nv30_miptree_surface_new(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *tmpl) +{ + struct nv30_miptree *mt = nv30_miptree(pt); /* guaranteed */ + struct nv30_surface *ns; + struct pipe_surface *ps; + struct nv30_miptree_level *lvl = &mt->level[tmpl->u.tex.level]; + + ns = CALLOC_STRUCT(nv30_surface); + if (!ns) + return NULL; + ps = &ns->base; + + pipe_reference_init(&ps->reference, 1); + pipe_resource_reference(&ps->texture, pt); + ps->context = pipe; + ps->format = tmpl->format; + ps->u.tex.level = tmpl->u.tex.level; + ps->u.tex.first_layer = tmpl->u.tex.first_layer; + ps->u.tex.last_layer = tmpl->u.tex.last_layer; + + ns->width = u_minify(pt->width0, ps->u.tex.level); + ns->height = u_minify(pt->height0, ps->u.tex.level); + ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1; + ns->offset = layer_offset(pt, ps->u.tex.level, ps->u.tex.first_layer); + if (mt->swizzled) + ns->pitch = 4096; /* random, just something the hw won't reject.. */ + else + ns->pitch = lvl->pitch; + + /* comment says there are going to be removed, but they're used by the st */ + ps->width = ns->width; + ps->height = ns->height; + return ps; +} + +void +nv30_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps) +{ + struct nv30_surface *ns = nv30_surface(ps); + + pipe_resource_reference(&ps->texture, NULL); + FREE(ns); +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_push.c b/src/gallium/drivers/nouveau/nv30/nv30_push.c new file mode 100644 index 00000000000..e0734fa70d3 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_push.c @@ -0,0 +1,290 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nv_object.xml.h" +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_resource.h" + +struct push_context { + struct nouveau_pushbuf *push; + + const void *idxbuf; + + float edgeflag; + int edgeflag_attr; + + uint32_t vertex_words; + uint32_t packet_vertex_limit; + + struct translate *translate; + + boolean primitive_restart; + uint32_t prim; + uint32_t restart_index; +}; + +static INLINE unsigned +prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static void +emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) +{ + uint8_t *elts = (uint8_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i08(elts, push, ctx->restart_index); + + size = ctx->vertex_words * nr; + + BEGIN_NI04(ctx->push, NV30_3D(VERTEX_DATA), size); + + ctx->translate->run_elts8(ctx->translate, elts, nr, 0, 0, ctx->push->cur); + + ctx->push->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + BEGIN_NV04(ctx->push, NV30_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (ctx->push, ctx->restart_index); + count--; + elts++; + } + } +} + +static void +emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) +{ + uint16_t *elts = (uint16_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i16(elts, push, ctx->restart_index); + + size = ctx->vertex_words * nr; + + BEGIN_NI04(ctx->push, NV30_3D(VERTEX_DATA), size); + + ctx->translate->run_elts16(ctx->translate, elts, nr, 0, 0, ctx->push->cur); + + ctx->push->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + BEGIN_NV04(ctx->push, NV30_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (ctx->push, ctx->restart_index); + count--; + elts++; + } + } +} + +static void +emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) +{ + uint32_t *elts = (uint32_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i32(elts, push, ctx->restart_index); + + size = ctx->vertex_words * nr; + + BEGIN_NI04(ctx->push, NV30_3D(VERTEX_DATA), size); + + ctx->translate->run_elts(ctx->translate, elts, nr, 0, 0, ctx->push->cur); + + ctx->push->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + BEGIN_NV04(ctx->push, NV30_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (ctx->push, ctx->restart_index); + count--; + elts++; + } + } +} + +static void +emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) +{ + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size = ctx->vertex_words * push; + + BEGIN_NI04(ctx->push, NV30_3D(VERTEX_DATA), size); + + ctx->translate->run(ctx->translate, start, push, 0, 0, ctx->push->cur); + ctx->push->cur += size; + count -= push; + start += push; + } +} + +void +nv30_push_vbo(struct nv30_context *nv30, const struct pipe_draw_info *info) +{ + struct push_context ctx; + unsigned i, index_size; + boolean apply_bias = info->indexed && info->index_bias; + + ctx.push = nv30->base.pushbuf; + ctx.translate = nv30->vertex->translate; + ctx.packet_vertex_limit = nv30->vertex->vtx_per_packet_max; + ctx.vertex_words = nv30->vertex->vtx_size; + + for (i = 0; i < nv30->num_vtxbufs; ++i) { + uint8_t *data; + struct pipe_vertex_buffer *vb = &nv30->vtxbuf[i]; + struct nv04_resource *res = nv04_resource(vb->buffer); + + if (!vb->buffer && !vb->user_buffer) { + continue; + } + + data = nouveau_resource_map_offset(&nv30->base, res, + vb->buffer_offset, NOUVEAU_BO_RD); + + if (apply_bias) + data += info->index_bias * vb->stride; + + ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); + } + + if (info->indexed) { + if (nv30->idxbuf.buffer) + ctx.idxbuf = nouveau_resource_map_offset(&nv30->base, + nv04_resource(nv30->idxbuf.buffer), nv30->idxbuf.offset, + NOUVEAU_BO_RD); + else + ctx.idxbuf = nv30->idxbuf.user_buffer; + if (!ctx.idxbuf) { + nv30_state_release(nv30); + return; + } + index_size = nv30->idxbuf.index_size; + ctx.primitive_restart = info->primitive_restart; + ctx.restart_index = info->restart_index; + } else { + ctx.idxbuf = NULL; + index_size = 0; + ctx.primitive_restart = FALSE; + ctx.restart_index = 0; + } + + if (nv30->screen->eng3d->oclass >= NV40_3D_CLASS) { + BEGIN_NV04(ctx.push, NV40_3D(PRIM_RESTART_ENABLE), 2); + PUSH_DATA (ctx.push, info->primitive_restart); + PUSH_DATA (ctx.push, info->restart_index); + nv30->state.prim_restart = info->primitive_restart; + } + + ctx.prim = nv30_prim_gl(info->mode); + + PUSH_RESET(ctx.push, BUFCTX_IDXBUF); + BEGIN_NV04(ctx.push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (ctx.push, ctx.prim); + switch (index_size) { + case 0: + emit_vertices_seq(&ctx, info->start, info->count); + break; + case 1: + emit_vertices_i08(&ctx, info->start, info->count); + break; + case 2: + emit_vertices_i16(&ctx, info->start, info->count); + break; + case 4: + emit_vertices_i32(&ctx, info->start, info->count); + break; + default: + assert(0); + break; + } + BEGIN_NV04(ctx.push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (ctx.push, NV30_3D_VERTEX_BEGIN_END_STOP); + + if (info->indexed) + nouveau_resource_unmap(nv04_resource(nv30->idxbuf.buffer)); + + for (i = 0; i < nv30->num_vtxbufs; ++i) { + if (nv30->vtxbuf[i].buffer) { + nouveau_resource_unmap(nv04_resource(nv30->vtxbuf[i].buffer)); + } + } + + nv30_state_release(nv30); +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_query.c b/src/gallium/drivers/nouveau/nv30/nv30_query.c new file mode 100644 index 00000000000..01b3817c45d --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_query.c @@ -0,0 +1,274 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "nv_object.xml.h" +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_screen.h" +#include "nv30/nv30_context.h" + +#define LIST_FIRST_ENTRY(__type, __item, __field) \ + LIST_ENTRY(__type, (__item)->next, __field) + +struct nv30_query_object { + struct list_head list; + struct nouveau_heap *hw; +}; + +static volatile void * +nv30_ntfy(struct nv30_screen *screen, struct nv30_query_object *qo) +{ + struct nv04_notify *query = screen->query->data; + struct nouveau_bo *notify = screen->notify; + volatile void *ntfy = NULL; + + if (qo && qo->hw) + ntfy = (char *)notify->map + query->offset + qo->hw->start; + + return ntfy; +} + +static void +nv30_query_object_del(struct nv30_screen *screen, struct nv30_query_object **po) +{ + struct nv30_query_object *qo = *po; *po = NULL; + if (qo) { + volatile uint32_t *ntfy = nv30_ntfy(screen, qo); + while (ntfy[3] & 0xff000000) { + } + nouveau_heap_free(&qo->hw); + LIST_DEL(&qo->list); + FREE(qo); + } +} + +static struct nv30_query_object * +nv30_query_object_new(struct nv30_screen *screen) +{ + struct nv30_query_object *oq, *qo = CALLOC_STRUCT(nv30_query_object); + volatile uint32_t *ntfy; + + if (!qo) + return NULL; + + /* allocate a new hw query object, if no hw objects left we need to + * spin waiting for one to become free + */ + while (nouveau_heap_alloc(screen->query_heap, 32, NULL, &qo->hw)) { + oq = LIST_FIRST_ENTRY(struct nv30_query_object, &screen->queries, list); + nv30_query_object_del(screen, &oq); + } + + LIST_ADDTAIL(&qo->list, &screen->queries); + + ntfy = nv30_ntfy(screen, qo); + ntfy[0] = 0x00000000; + ntfy[1] = 0x00000000; + ntfy[2] = 0x00000000; + ntfy[3] = 0x01000000; + return qo; +} + +struct nv30_query { + struct nv30_query_object *qo[2]; + unsigned type; + uint32_t report; + uint32_t enable; + uint64_t result; +}; + +static INLINE struct nv30_query * +nv30_query(struct pipe_query *pipe) +{ + return (struct nv30_query *)pipe; +} + +static struct pipe_query * +nv30_query_create(struct pipe_context *pipe, unsigned type) +{ + struct nv30_query *q = CALLOC_STRUCT(nv30_query); + if (!q) + return NULL; + + q->type = type; + + switch (q->type) { + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIME_ELAPSED: + q->enable = 0x0000; + q->report = 1; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: + q->enable = NV30_3D_QUERY_ENABLE; + q->report = 1; + break; + case NV30_QUERY_ZCULL_0: + case NV30_QUERY_ZCULL_1: + case NV30_QUERY_ZCULL_2: + case NV30_QUERY_ZCULL_3: + q->enable = 0x1804; + q->report = 2 + (q->type - NV30_QUERY_ZCULL_0); + break; + default: + FREE(q); + return NULL; + } + + return (struct pipe_query *)q; +} + +static void +nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + FREE(pq); +} + +static void +nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + struct nouveau_pushbuf *push = nv30->base.pushbuf; + + switch (q->type) { + case PIPE_QUERY_TIME_ELAPSED: + q->qo[0] = nv30_query_object_new(nv30->screen); + if (q->qo[0]) { + BEGIN_NV04(push, NV30_3D(QUERY_GET), 1); + PUSH_DATA (push, (q->report << 24) | q->qo[0]->hw->start); + } + break; + case PIPE_QUERY_TIMESTAMP: + return; + default: + BEGIN_NV04(push, NV30_3D(QUERY_RESET), 1); + PUSH_DATA (push, q->report); + break; + } + + if (q->enable) { + BEGIN_NV04(push, SUBC_3D(q->enable), 1); + PUSH_DATA (push, 1); + } +} + +static void +nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_screen *screen = nv30->screen; + struct nv30_query *q = nv30_query(pq); + struct nouveau_pushbuf *push = nv30->base.pushbuf; + + q->qo[1] = nv30_query_object_new(screen); + if (q->qo[1]) { + BEGIN_NV04(push, NV30_3D(QUERY_GET), 1); + PUSH_DATA (push, (q->report << 24) | q->qo[1]->hw->start); + } + + if (q->enable) { + BEGIN_NV04(push, SUBC_3D(q->enable), 1); + PUSH_DATA (push, 0); + } + PUSH_KICK (push); +} + +static boolean +nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, union pipe_query_result *result) +{ + struct nv30_screen *screen = nv30_screen(pipe->screen); + struct nv30_query *q = nv30_query(pq); + volatile uint32_t *ntfy0 = nv30_ntfy(screen, q->qo[0]); + volatile uint32_t *ntfy1 = nv30_ntfy(screen, q->qo[1]); + uint64_t *res64 = &result->u64; + + if (ntfy1) { + while (ntfy1[3] & 0xff000000) { + if (!wait) + return FALSE; + } + + switch (q->type) { + case PIPE_QUERY_TIMESTAMP: + q->result = *(uint64_t *)&ntfy1[0]; + break; + case PIPE_QUERY_TIME_ELAPSED: + q->result = *(uint64_t *)&ntfy1[0] - *(uint64_t *)&ntfy0[0]; + break; + default: + q->result = ntfy1[2]; + break; + } + + nv30_query_object_del(screen, &q->qo[0]); + nv30_query_object_del(screen, &q->qo[1]); + } + + *res64 = q->result; + return TRUE; +} + +static void +nv40_query_render_condition(struct pipe_context *pipe, + struct pipe_query *pq, + boolean condition, uint mode) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + struct nouveau_pushbuf *push = nv30->base.pushbuf; + + nv30->render_cond_query = pq; + nv30->render_cond_mode = mode; + nv30->render_cond_cond = condition; + + if (!pq) { + BEGIN_NV04(push, SUBC_3D(0x1e98), 1); + PUSH_DATA (push, 0x01000000); + return; + } + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + BEGIN_NV04(push, SUBC_3D(0x0110), 1); + PUSH_DATA (push, 0); + } + + BEGIN_NV04(push, SUBC_3D(0x1e98), 1); + PUSH_DATA (push, 0x02000000 | q->qo[1]->hw->start); +} + +void +nv30_query_init(struct pipe_context *pipe) +{ + struct nouveau_object *eng3d = nv30_context(pipe)->screen->eng3d; + + pipe->create_query = nv30_query_create; + pipe->destroy_query = nv30_query_destroy; + pipe->begin_query = nv30_query_begin; + pipe->end_query = nv30_query_end; + pipe->get_query_result = nv30_query_result; + if (eng3d->oclass >= NV40_3D_CLASS) + pipe->render_condition = nv40_query_render_condition; +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_resource.c b/src/gallium/drivers/nouveau/nv30/nv30_resource.c new file mode 100644 index 00000000000..c99db1ce91b --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.c @@ -0,0 +1,77 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "util/u_format.h" +#include "util/u_inlines.h" + +#include "nv30/nv30_screen.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_resource.h" +#include "nv30/nv30_transfer.h" + +static struct pipe_resource * +nv30_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + switch (tmpl->target) { + case PIPE_BUFFER: + return nouveau_buffer_create(pscreen, tmpl); + default: + return nv30_miptree_create(pscreen, tmpl); + } +} + +static struct pipe_resource * +nv30_resource_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + struct winsys_handle *handle) +{ + if (tmpl->target == PIPE_BUFFER) + return NULL; + else + return nv30_miptree_from_handle(pscreen, tmpl, handle); +} + +void +nv30_resource_screen_init(struct pipe_screen *pscreen) +{ + pscreen->resource_create = nv30_resource_create; + pscreen->resource_from_handle = nv30_resource_from_handle; + pscreen->resource_get_handle = u_resource_get_handle_vtbl; + pscreen->resource_destroy = u_resource_destroy_vtbl; +} + +void +nv30_resource_init(struct pipe_context *pipe) +{ + pipe->transfer_map = u_transfer_map_vtbl; + pipe->transfer_flush_region = u_transfer_flush_region_vtbl; + pipe->transfer_unmap = u_transfer_unmap_vtbl; + pipe->transfer_inline_write = u_transfer_inline_write_vtbl; + pipe->create_surface = nv30_miptree_surface_new; + pipe->surface_destroy = nv30_miptree_surface_del; + pipe->resource_copy_region = nv30_resource_copy_region; + pipe->blit = nv30_blit; +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_resource.h b/src/gallium/drivers/nouveau/nv30/nv30_resource.h new file mode 100644 index 00000000000..aff41966b3c --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.h @@ -0,0 +1,75 @@ +#ifndef __NV30_RESOURCE_H__ +#define __NV30_RESOURCE_H__ + +#include "nouveau_buffer.h" + +void nv30_resource_screen_init(struct pipe_screen *); +void nv30_resource_init(struct pipe_context *); + +struct nv30_surface { + struct pipe_surface base; + uint32_t offset; + uint32_t pitch; + uint32_t width; + uint16_t height; + uint16_t depth; +}; + +static INLINE struct nv30_surface * +nv30_surface(struct pipe_surface *ps) +{ + return (struct nv30_surface *)ps; +} + +struct nv30_miptree_level { + uint32_t offset; + uint32_t pitch; + uint32_t zslice_size; +}; + +struct nv30_miptree { + struct nv04_resource base; + struct nv30_miptree_level level[13]; + uint32_t uniform_pitch; + uint32_t layer_size; + boolean swizzled; + unsigned ms_mode; + unsigned ms_x:1; + unsigned ms_y:1; +}; + +static INLINE struct nv30_miptree * +nv30_miptree(struct pipe_resource *pt) +{ + return (struct nv30_miptree *)pt; +} + +struct pipe_resource * +nv30_miptree_create(struct pipe_screen *, const struct pipe_resource *); + +struct pipe_resource * +nv30_miptree_from_handle(struct pipe_screen *, const struct pipe_resource *, + struct winsys_handle *); + +struct pipe_surface * +nv30_miptree_surface_new(struct pipe_context *, struct pipe_resource *, + const struct pipe_surface *); + +void +nv30_miptree_surface_del(struct pipe_context *, struct pipe_surface *); + +void +nv30_resource_copy_region(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box); + +void +nv30_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *); + +void +nv30_blit(struct pipe_context *pipe, + const struct pipe_blit_info *blit_info); + +#endif diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c new file mode 100644 index 00000000000..50ddfec3b21 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -0,0 +1,588 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "util/u_format.h" +#include "util/u_format_s3tc.h" + +#include "nv_object.xml.h" +#include "nv_m2mf.xml.h" +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv01_2d.xml.h" + +#include "nouveau_fence.h" +#include "nv30/nv30_screen.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_resource.h" +#include "nv30/nv30_format.h" + +#define RANKINE_0397_CHIPSET 0x00000003 +#define RANKINE_0497_CHIPSET 0x000001e0 +#define RANKINE_0697_CHIPSET 0x00000010 +#define CURIE_4097_CHIPSET 0x00000baf +#define CURIE_4497_CHIPSET 0x00005450 +#define CURIE_4497_CHIPSET6X 0x00000088 + +static int +nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + struct nv30_screen *screen = nv30_screen(pscreen); + struct nouveau_object *eng3d = screen->eng3d; + + switch (param) { + /* non-boolean capabilities */ + case PIPE_CAP_MAX_RENDER_TARGETS: + return (eng3d->oclass >= NV40_3D_CLASS) ? 4 : 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 16; + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 120; + /* supported capabilities */ + case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_SCALED_RESOLVE: + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_QUERY_TIMESTAMP: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_TGSI_TEXCOORD: + case PIPE_CAP_USER_CONSTANT_BUFFERS: + case PIPE_CAP_USER_INDEX_BUFFERS: + return 1; + case PIPE_CAP_USER_VERTEX_BUFFERS: + return 0; + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 16; + /* nv4x capabilities */ + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_PRIMITIVE_RESTART: + return (eng3d->oclass >= NV40_3D_CLASS) ? 1 : 0; + /* unsupported */ + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + case PIPE_CAP_SM3: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: /* XXX: yes? */ + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_MIN_TEXEL_OFFSET: + case PIPE_CAP_MAX_TEXEL_OFFSET: + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_CUBE_MAP_ARRAY: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_START_INSTANCE: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return 0; + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 1; + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; + default: + debug_printf("unknown param %d\n", param); + return 0; + } +} + +static float +nv30_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) +{ + struct nv30_screen *screen = nv30_screen(pscreen); + struct nouveau_object *eng3d = screen->eng3d; + + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return (eng3d->oclass >= NV40_3D_CLASS) ? 16.0 : 8.0; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 15.0; + default: + debug_printf("unknown paramf %d\n", param); + return 0; + } +} + +static int +nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, + enum pipe_shader_cap param) +{ + struct nv30_screen *screen = nv30_screen(pscreen); + struct nouveau_object *eng3d = screen->eng3d; + + switch (shader) { + case PIPE_SHADER_VERTEX: + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + return (eng3d->oclass >= NV40_3D_CLASS) ? 512 : 256; + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return (eng3d->oclass >= NV40_3D_CLASS) ? 512 : 0; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 0; + case PIPE_SHADER_CAP_MAX_INPUTS: + return 16; + case PIPE_SHADER_CAP_MAX_CONSTS: + return (eng3d->oclass >= NV40_3D_CLASS) ? (468 - 6): (256 - 6); + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 1; + case PIPE_SHADER_CAP_MAX_TEMPS: + return (eng3d->oclass >= NV40_3D_CLASS) ? 32 : 13; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + return 0; + case PIPE_SHADER_CAP_MAX_ADDRS: + return 2; + case PIPE_SHADER_CAP_MAX_PREDS: + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + case PIPE_SHADER_CAP_SUBROUTINES: + case PIPE_SHADER_CAP_INTEGERS: + return 0; + default: + debug_printf("unknown vertex shader param %d\n", param); + return 0; + } + break; + case PIPE_SHADER_FRAGMENT: + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 4096; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 0; + case PIPE_SHADER_CAP_MAX_INPUTS: + return (eng3d->oclass >= NV40_3D_CLASS) ? 12 : 10; + case PIPE_SHADER_CAP_MAX_CONSTS: + return (eng3d->oclass >= NV40_3D_CLASS) ? 224 : 32; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 1; + case PIPE_SHADER_CAP_MAX_TEMPS: + return 32; + case PIPE_SHADER_CAP_MAX_ADDRS: + return (eng3d->oclass >= NV40_3D_CLASS) ? 1 : 0; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + return 16; + case PIPE_SHADER_CAP_MAX_PREDS: + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; + default: + debug_printf("unknown fragment shader param %d\n", param); + return 0; + } + break; + default: + return 0; + } +} + +static boolean +nv30_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bindings) +{ + if (sample_count > 4) + return FALSE; + if (!(0x00000017 & (1 << sample_count))) + return FALSE; + + if (!util_format_is_supported(format, bindings)) { + return FALSE; + } + + /* transfers & shared are always supported */ + bindings &= ~(PIPE_BIND_TRANSFER_READ | + PIPE_BIND_TRANSFER_WRITE | + PIPE_BIND_SHARED); + + return (nv30_format_info(pscreen, format)->bindings & bindings) == bindings; +} + +static void +nv30_screen_fence_emit(struct pipe_screen *pscreen, uint32_t *sequence) +{ + struct nv30_screen *screen = nv30_screen(pscreen); + struct nouveau_pushbuf *push = screen->base.pushbuf; + + *sequence = ++screen->base.fence.sequence; + + BEGIN_NV04(push, NV30_3D(FENCE_OFFSET), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, *sequence); +} + +static uint32_t +nv30_screen_fence_update(struct pipe_screen *pscreen) +{ + struct nv30_screen *screen = nv30_screen(pscreen); + struct nv04_notify *fence = screen->fence->data; + return *(uint32_t *)((char *)screen->notify->map + fence->offset); +} + +static void +nv30_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv30_screen *screen = nv30_screen(pscreen); + + if (screen->base.fence.current && + screen->base.fence.current->state >= NOUVEAU_FENCE_STATE_EMITTED) { + nouveau_fence_wait(screen->base.fence.current); + nouveau_fence_ref (NULL, &screen->base.fence.current); + } + + nouveau_object_del(&screen->query); + nouveau_object_del(&screen->fence); + nouveau_object_del(&screen->ntfy); + + nouveau_object_del(&screen->sifm); + nouveau_object_del(&screen->swzsurf); + nouveau_object_del(&screen->surf2d); + nouveau_object_del(&screen->m2mf); + nouveau_object_del(&screen->eng3d); + nouveau_object_del(&screen->null); + + nouveau_screen_fini(&screen->base); + FREE(screen); +} + +#define FAIL_SCREEN_INIT(str, err) \ + do { \ + NOUVEAU_ERR(str, err); \ + nv30_screen_destroy(pscreen); \ + return NULL; \ + } while(0) + +struct pipe_screen * +nv30_screen_create(struct nouveau_device *dev) +{ + struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen); + struct pipe_screen *pscreen; + struct nouveau_pushbuf *push; + struct nv04_fifo *fifo; + unsigned oclass = 0; + int ret, i; + + if (!screen) + return NULL; + + switch (dev->chipset & 0xf0) { + case 0x30: + if (RANKINE_0397_CHIPSET & (1 << (dev->chipset & 0x0f))) + oclass = NV30_3D_CLASS; + else + if (RANKINE_0697_CHIPSET & (1 << (dev->chipset & 0x0f))) + oclass = NV34_3D_CLASS; + else + if (RANKINE_0497_CHIPSET & (1 << (dev->chipset & 0x0f))) + oclass = NV35_3D_CLASS; + break; + case 0x40: + if (CURIE_4097_CHIPSET & (1 << (dev->chipset & 0x0f))) + oclass = NV40_3D_CLASS; + else + if (CURIE_4497_CHIPSET & (1 << (dev->chipset & 0x0f))) + oclass = NV44_3D_CLASS; + break; + case 0x60: + if (CURIE_4497_CHIPSET6X & (1 << (dev->chipset & 0x0f))) + oclass = NV44_3D_CLASS; + break; + default: + break; + } + + if (!oclass) { + NOUVEAU_ERR("unknown 3d class for 0x%02x\n", dev->chipset); + FREE(screen); + return NULL; + } + + pscreen = &screen->base.base; + pscreen->destroy = nv30_screen_destroy; + pscreen->get_param = nv30_screen_get_param; + pscreen->get_paramf = nv30_screen_get_paramf; + pscreen->get_shader_param = nv30_screen_get_shader_param; + pscreen->context_create = nv30_context_create; + pscreen->is_format_supported = nv30_screen_is_format_supported; + nv30_resource_screen_init(pscreen); + nouveau_screen_init_vdec(&screen->base); + + screen->base.fence.emit = nv30_screen_fence_emit; + screen->base.fence.update = nv30_screen_fence_update; + + ret = nouveau_screen_init(&screen->base, dev); + if (ret) + FAIL_SCREEN_INIT("nv30_screen_init failed: %d\n", ret); + + screen->base.vidmem_bindings |= PIPE_BIND_VERTEX_BUFFER; + screen->base.sysmem_bindings |= PIPE_BIND_VERTEX_BUFFER; + if (oclass == NV40_3D_CLASS) { + screen->base.vidmem_bindings |= PIPE_BIND_INDEX_BUFFER; + screen->base.sysmem_bindings |= PIPE_BIND_INDEX_BUFFER; + } + + fifo = screen->base.channel->data; + push = screen->base.pushbuf; + push->rsvd_kick = 16; + + ret = nouveau_object_new(screen->base.channel, 0x00000000, NV01_NULL_CLASS, + NULL, 0, &screen->null); + if (ret) + FAIL_SCREEN_INIT("error allocating null object: %d\n", ret); + + /* DMA_FENCE refuses to accept DMA objects with "adjust" filled in, + * this means that the address pointed at by the DMA object must + * be 4KiB aligned, which means this object needs to be the first + * one allocated on the channel. + */ + ret = nouveau_object_new(screen->base.channel, 0xbeef1e00, + NOUVEAU_NOTIFIER_CLASS, &(struct nv04_notify) { + .length = 32 }, sizeof(struct nv04_notify), + &screen->fence); + if (ret) + FAIL_SCREEN_INIT("error allocating fence notifier: %d\n", ret); + + /* DMA_NOTIFY object, we don't actually use this but M2MF fails without */ + ret = nouveau_object_new(screen->base.channel, 0xbeef0301, + NOUVEAU_NOTIFIER_CLASS, &(struct nv04_notify) { + .length = 32 }, sizeof(struct nv04_notify), + &screen->ntfy); + if (ret) + FAIL_SCREEN_INIT("error allocating sync notifier: %d\n", ret); + + /* DMA_QUERY, used to implement occlusion queries, we attempt to allocate + * the remainder of the "notifier block" assigned by the kernel for + * use as query objects + */ + ret = nouveau_object_new(screen->base.channel, 0xbeef0351, + NOUVEAU_NOTIFIER_CLASS, &(struct nv04_notify) { + .length = 4096 - 128 }, sizeof(struct nv04_notify), + &screen->query); + if (ret) + FAIL_SCREEN_INIT("error allocating query notifier: %d\n", ret); + + ret = nouveau_heap_init(&screen->query_heap, 0, 4096 - 128); + if (ret) + FAIL_SCREEN_INIT("error creating query heap: %d\n", ret); + + LIST_INITHEAD(&screen->queries); + + /* Vertex program resources (code/data), currently 6 of the constant + * slots are reserved to implement user clipping planes + */ + if (oclass < NV40_3D_CLASS) { + nouveau_heap_init(&screen->vp_exec_heap, 0, 256); + nouveau_heap_init(&screen->vp_data_heap, 6, 256 - 6); + } else { + nouveau_heap_init(&screen->vp_exec_heap, 0, 512); + nouveau_heap_init(&screen->vp_data_heap, 6, 468 - 6); + } + + ret = nouveau_bo_wrap(screen->base.device, fifo->notify, &screen->notify); + if (ret == 0) + nouveau_bo_map(screen->notify, 0, screen->base.client); + if (ret) + FAIL_SCREEN_INIT("error mapping notifier memory: %d\n", ret); + + ret = nouveau_object_new(screen->base.channel, 0xbeef3097, oclass, + NULL, 0, &screen->eng3d); + if (ret) + FAIL_SCREEN_INIT("error allocating 3d object: %d\n", ret); + + BEGIN_NV04(push, NV01_SUBC(3D, OBJECT), 1); + PUSH_DATA (push, screen->eng3d->handle); + BEGIN_NV04(push, NV30_3D(DMA_NOTIFY), 13); + PUSH_DATA (push, screen->ntfy->handle); + PUSH_DATA (push, fifo->vram); /* TEXTURE0 */ + PUSH_DATA (push, fifo->gart); /* TEXTURE1 */ + PUSH_DATA (push, fifo->vram); /* COLOR1 */ + PUSH_DATA (push, screen->null->handle); /* UNK190 */ + PUSH_DATA (push, fifo->vram); /* COLOR0 */ + PUSH_DATA (push, fifo->vram); /* ZETA */ + PUSH_DATA (push, fifo->vram); /* VTXBUF0 */ + PUSH_DATA (push, fifo->gart); /* VTXBUF1 */ + PUSH_DATA (push, screen->fence->handle); /* FENCE */ + PUSH_DATA (push, screen->query->handle); /* QUERY - intr 0x80 if nullobj */ + PUSH_DATA (push, screen->null->handle); /* UNK1AC */ + PUSH_DATA (push, screen->null->handle); /* UNK1B0 */ + if (screen->eng3d->oclass < NV40_3D_CLASS) { + BEGIN_NV04(push, SUBC_3D(0x03b0), 1); + PUSH_DATA (push, 0x00100000); + BEGIN_NV04(push, SUBC_3D(0x1d80), 1); + PUSH_DATA (push, 3); + + BEGIN_NV04(push, SUBC_3D(0x1e98), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, SUBC_3D(0x17e0), 3); + PUSH_DATA (push, fui(0.0)); + PUSH_DATA (push, fui(0.0)); + PUSH_DATA (push, fui(1.0)); + BEGIN_NV04(push, SUBC_3D(0x1f80), 16); + for (i = 0; i < 16; i++) + PUSH_DATA (push, (i == 8) ? 0x0000ffff : 0); + + BEGIN_NV04(push, NV30_3D(RC_ENABLE), 1); + PUSH_DATA (push, 0); + } else { + BEGIN_NV04(push, NV40_3D(DMA_COLOR2), 2); + PUSH_DATA (push, fifo->vram); + PUSH_DATA (push, fifo->vram); /* COLOR3 */ + + BEGIN_NV04(push, SUBC_3D(0x1450), 1); + PUSH_DATA (push, 0x00000004); + + BEGIN_NV04(push, SUBC_3D(0x1ea4), 3); /* ZCULL */ + PUSH_DATA (push, 0x00000010); + PUSH_DATA (push, 0x01000100); + PUSH_DATA (push, 0xff800006); + + /* vtxprog output routing */ + BEGIN_NV04(push, SUBC_3D(0x1fc4), 1); + PUSH_DATA (push, 0x06144321); + BEGIN_NV04(push, SUBC_3D(0x1fc8), 2); + PUSH_DATA (push, 0xedcba987); + PUSH_DATA (push, 0x0000006f); + BEGIN_NV04(push, SUBC_3D(0x1fd0), 1); + PUSH_DATA (push, 0x00171615); + BEGIN_NV04(push, SUBC_3D(0x1fd4), 1); + PUSH_DATA (push, 0x001b1a19); + + BEGIN_NV04(push, SUBC_3D(0x1ef8), 1); + PUSH_DATA (push, 0x0020ffff); + BEGIN_NV04(push, SUBC_3D(0x1d64), 1); + PUSH_DATA (push, 0x01d300d4); + + BEGIN_NV04(push, NV40_3D(MIPMAP_ROUNDING), 1); + PUSH_DATA (push, NV40_3D_MIPMAP_ROUNDING_MODE_DOWN); + } + + ret = nouveau_object_new(screen->base.channel, 0xbeef3901, NV03_M2MF_CLASS, + NULL, 0, &screen->m2mf); + if (ret) + FAIL_SCREEN_INIT("error allocating m2mf object: %d\n", ret); + + BEGIN_NV04(push, NV01_SUBC(M2MF, OBJECT), 1); + PUSH_DATA (push, screen->m2mf->handle); + BEGIN_NV04(push, NV03_M2MF(DMA_NOTIFY), 1); + PUSH_DATA (push, screen->ntfy->handle); + + ret = nouveau_object_new(screen->base.channel, 0xbeef6201, + NV10_SURFACE_2D_CLASS, NULL, 0, &screen->surf2d); + if (ret) + FAIL_SCREEN_INIT("error allocating surf2d object: %d\n", ret); + + BEGIN_NV04(push, NV01_SUBC(SF2D, OBJECT), 1); + PUSH_DATA (push, screen->surf2d->handle); + BEGIN_NV04(push, NV04_SF2D(DMA_NOTIFY), 1); + PUSH_DATA (push, screen->ntfy->handle); + + if (dev->chipset < 0x40) + oclass = NV30_SURFACE_SWZ_CLASS; + else + oclass = NV40_SURFACE_SWZ_CLASS; + + ret = nouveau_object_new(screen->base.channel, 0xbeef5201, oclass, + NULL, 0, &screen->swzsurf); + if (ret) + FAIL_SCREEN_INIT("error allocating swizzled surface object: %d\n", ret); + + BEGIN_NV04(push, NV01_SUBC(SSWZ, OBJECT), 1); + PUSH_DATA (push, screen->swzsurf->handle); + BEGIN_NV04(push, NV04_SSWZ(DMA_NOTIFY), 1); + PUSH_DATA (push, screen->ntfy->handle); + + if (dev->chipset < 0x40) + oclass = NV30_SIFM_CLASS; + else + oclass = NV40_SIFM_CLASS; + + ret = nouveau_object_new(screen->base.channel, 0xbeef7701, oclass, + NULL, 0, &screen->sifm); + if (ret) + FAIL_SCREEN_INIT("error allocating scaled image object: %d\n", ret); + + BEGIN_NV04(push, NV01_SUBC(SIFM, OBJECT), 1); + PUSH_DATA (push, screen->sifm->handle); + BEGIN_NV04(push, NV03_SIFM(DMA_NOTIFY), 1); + PUSH_DATA (push, screen->ntfy->handle); + BEGIN_NV04(push, NV05_SIFM(COLOR_CONVERSION), 1); + PUSH_DATA (push, NV05_SIFM_COLOR_CONVERSION_TRUNCATE); + + nouveau_pushbuf_kick(push, push->channel); + + nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE); + return pscreen; +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.h b/src/gallium/drivers/nouveau/nv30/nv30_screen.h new file mode 100644 index 00000000000..c4c3aae52d9 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.h @@ -0,0 +1,51 @@ +#ifndef __NV30_SCREEN_H__ +#define __NV30_SCREEN_H__ + +#include <stdio.h> + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args); + +#include "util/u_double_list.h" + +#include "nouveau_screen.h" +#include "nouveau_fence.h" +#include "nouveau_heap.h" +#include "nv30/nv30_winsys.h" +#include "nv30/nv30_resource.h" + +struct nv30_context; + +struct nv30_screen { + struct nouveau_screen base; + + struct nv30_context *cur_ctx; + + struct nouveau_bo *notify; + + struct nouveau_object *ntfy; + struct nouveau_object *fence; + + struct nouveau_object *query; + struct nouveau_heap *query_heap; + struct list_head queries; + + struct nouveau_object *null; + struct nouveau_object *eng3d; + struct nouveau_object *m2mf; + struct nouveau_object *surf2d; + struct nouveau_object *swzsurf; + struct nouveau_object *sifm; + + /*XXX: nvfx state */ + struct nouveau_heap *vp_exec_heap; + struct nouveau_heap *vp_data_heap; +}; + +static INLINE struct nv30_screen * +nv30_screen(struct pipe_screen *pscreen) +{ + return (struct nv30_screen *)pscreen; +} + +#endif diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state.c b/src/gallium/drivers/nouveau/nv30/nv30_state.c new file mode 100644 index 00000000000..64be1b78f79 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_state.c @@ -0,0 +1,458 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "util/u_helpers.h" +#include "util/u_inlines.h" + +#include "nouveau_gldefs.h" +#include "nv_object.xml.h" +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_winsys.h" + +#define NV40_3D_MRT_BLEND_ENABLE 0x0000036c + +static void * +nv30_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nouveau_object *eng3d = nv30_context(pipe)->screen->eng3d; + struct nv30_blend_stateobj *so; + uint32_t blend[2], cmask[2]; + int i; + + so = CALLOC_STRUCT(nv30_blend_stateobj); + if (!so) + return NULL; + so->pipe = *cso; + + if (cso->logicop_enable) { + SB_MTHD30(so, COLOR_LOGIC_OP_ENABLE, 2); + SB_DATA (so, 1); + SB_DATA (so, nvgl_logicop_func(cso->logicop_func)); + } else { + SB_MTHD30(so, COLOR_LOGIC_OP_ENABLE, 1); + SB_DATA (so, 0); + } + + SB_MTHD30(so, DITHER_ENABLE, 1); + SB_DATA (so, cso->dither); + + blend[0] = cso->rt[0].blend_enable; + cmask[0] = !!(cso->rt[0].colormask & PIPE_MASK_A) << 24 | + !!(cso->rt[0].colormask & PIPE_MASK_R) << 16 | + !!(cso->rt[0].colormask & PIPE_MASK_G) << 8 | + !!(cso->rt[0].colormask & PIPE_MASK_B); + if (cso->independent_blend_enable) { + blend[1] = 0; + cmask[1] = 0; + for (i = 1; i < 4; i++) { + blend[1] |= cso->rt[i].blend_enable << i; + cmask[1] |= !!(cso->rt[i].colormask & PIPE_MASK_A) << (0 + (i * 4)) | + !!(cso->rt[i].colormask & PIPE_MASK_R) << (1 + (i * 4)) | + !!(cso->rt[i].colormask & PIPE_MASK_G) << (2 + (i * 4)) | + !!(cso->rt[i].colormask & PIPE_MASK_B) << (3 + (i * 4)); + } + } else { + blend[1] = 0x0000000e * (blend[0] & 0x00000001); + cmask[1] = 0x00001110 * !!(cmask[0] & 0x01000000); + cmask[1] |= 0x00002220 * !!(cmask[0] & 0x00010000); + cmask[1] |= 0x00004440 * !!(cmask[0] & 0x00000100); + cmask[1] |= 0x00008880 * !!(cmask[0] & 0x00000001); + } + + if (eng3d->oclass >= NV40_3D_CLASS) { + SB_MTHD40(so, MRT_BLEND_ENABLE, 2); + SB_DATA (so, blend[1]); + SB_DATA (so, cmask[1]); + } + + if (blend[0] || blend[1]) { + SB_MTHD30(so, BLEND_FUNC_ENABLE, 3); + SB_DATA (so, blend[0]); + SB_DATA (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | + nvgl_blend_func(cso->rt[0].rgb_src_factor)); + SB_DATA (so, (nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16) | + nvgl_blend_func(cso->rt[0].rgb_dst_factor)); + if (eng3d->oclass < NV40_3D_CLASS) { + SB_MTHD30(so, BLEND_EQUATION, 1); + SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); + } else { + SB_MTHD40(so, BLEND_EQUATION, 1); + SB_DATA (so, (nvgl_blend_eqn(cso->rt[0].alpha_func) << 16) | + nvgl_blend_eqn(cso->rt[0].rgb_func)); + } + } else { + SB_MTHD30(so, BLEND_FUNC_ENABLE, 1); + SB_DATA (so, blend[0]); + } + + SB_MTHD30(so, COLOR_MASK, 1); + SB_DATA (so, cmask[0]); + return so; +} + +static void +nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->blend = hwcso; + nv30->dirty |= NV30_NEW_BLEND; +} + +static void +nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nv30_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv30_rasterizer_stateobj *so; + + so = CALLOC_STRUCT(nv30_rasterizer_stateobj); + if (!so) + return NULL; + so->pipe = *cso; + + SB_MTHD30(so, SHADE_MODEL, 1); + SB_DATA (so, cso->flatshade ? NV30_3D_SHADE_MODEL_FLAT : + NV30_3D_SHADE_MODEL_SMOOTH); + + SB_MTHD30(so, POLYGON_MODE_FRONT, 6); + SB_DATA (so, nvgl_polygon_mode(cso->fill_front)); + SB_DATA (so, nvgl_polygon_mode(cso->fill_back)); + if (cso->cull_face == PIPE_FACE_FRONT_AND_BACK) + SB_DATA (so, NV30_3D_CULL_FACE_FRONT_AND_BACK); + else + if (cso->cull_face == PIPE_FACE_FRONT) + SB_DATA (so, NV30_3D_CULL_FACE_FRONT); + else + SB_DATA (so, NV30_3D_CULL_FACE_BACK); + SB_DATA (so, cso->front_ccw ? NV30_3D_FRONT_FACE_CCW : + NV30_3D_FRONT_FACE_CW); + SB_DATA (so, cso->poly_smooth); + SB_DATA (so, cso->cull_face != PIPE_FACE_NONE); + + SB_MTHD30(so, POLYGON_OFFSET_POINT_ENABLE, 3); + SB_DATA (so, cso->offset_point); + SB_DATA (so, cso->offset_line); + SB_DATA (so, cso->offset_tri); + if (cso->offset_point || cso->offset_line || cso->offset_tri) { + SB_MTHD30(so, POLYGON_OFFSET_FACTOR, 2); + SB_DATA (so, fui(cso->offset_scale)); + SB_DATA (so, fui(cso->offset_units * 2.0)); + } + + SB_MTHD30(so, LINE_WIDTH, 2); + SB_DATA (so, (unsigned char)(cso->line_width * 8.0) & 0xff); + SB_DATA (so, cso->line_smooth); + SB_MTHD30(so, LINE_STIPPLE_ENABLE, 2); + SB_DATA (so, cso->line_stipple_enable); + SB_DATA (so, (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor); + + SB_MTHD30(so, VERTEX_TWO_SIDE_ENABLE, 1); + SB_DATA (so, cso->light_twoside); + SB_MTHD30(so, POLYGON_STIPPLE_ENABLE, 1); + SB_DATA (so, cso->poly_stipple_enable); + SB_MTHD30(so, POINT_SIZE, 1); + SB_DATA (so, fui(cso->point_size)); + SB_MTHD30(so, FLATSHADE_FIRST, 1); + SB_DATA (so, cso->flatshade_first); + + SB_MTHD30(so, DEPTH_CONTROL, 1); + SB_DATA (so, cso->depth_clip ? 0x00000001 : 0x00000010); + return so; +} + +static void +nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->rast = hwcso; + nv30->dirty |= NV30_NEW_RASTERIZER; +} + +static void +nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nv30_zsa_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv30_zsa_stateobj *so; + + so = CALLOC_STRUCT(nv30_zsa_stateobj); + if (!so) + return NULL; + so->pipe = *cso; + + SB_MTHD30(so, DEPTH_FUNC, 3); + SB_DATA (so, nvgl_comparison_op(cso->depth.func)); + SB_DATA (so, cso->depth.writemask); + SB_DATA (so, cso->depth.enabled); + + if (cso->stencil[0].enabled) { + SB_MTHD30(so, STENCIL_ENABLE(0), 3); + SB_DATA (so, 1); + SB_DATA (so, cso->stencil[0].writemask); + SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func)); + SB_MTHD30(so, STENCIL_FUNC_MASK(0), 4); + SB_DATA (so, cso->stencil[0].valuemask); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + } else { + SB_MTHD30(so, STENCIL_ENABLE(0), 2); + SB_DATA (so, 0); + SB_DATA (so, 0x000000ff); + } + + if (cso->stencil[1].enabled) { + SB_MTHD30(so, STENCIL_ENABLE(1), 3); + SB_DATA (so, 1); + SB_DATA (so, cso->stencil[1].writemask); + SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func)); + SB_MTHD30(so, STENCIL_FUNC_MASK(1), 4); + SB_DATA (so, cso->stencil[1].valuemask); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + } else { + SB_MTHD30(so, STENCIL_ENABLE(1), 1); + SB_DATA (so, 0); + } + + SB_MTHD30(so, ALPHA_FUNC_ENABLE, 3); + SB_DATA (so, cso->alpha.enabled ? 1 : 0); + SB_DATA (so, nvgl_comparison_op(cso->alpha.func)); + SB_DATA (so, float_to_ubyte(cso->alpha.ref_value)); + + return so; +} + +static void +nv30_zsa_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->zsa = hwcso; + nv30->dirty |= NV30_NEW_ZSA; +} + +static void +nv30_zsa_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nv30_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->blend_colour = *bcol; + nv30->dirty |= NV30_NEW_BLEND_COLOUR; +} + +static void +nv30_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *sr) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->stencil_ref = *sr; + nv30->dirty |= NV30_NEW_STENCIL_REF; +} + +static void +nv30_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + memcpy(nv30->clip.ucp, clip->ucp, sizeof(clip->ucp)); + + nv30->dirty |= NV30_NEW_CLIP; +} + +static void +nv30_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->sample_mask = sample_mask; + nv30->dirty |= NV30_NEW_SAMPLE_MASK; +} + +static void +nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + struct pipe_constant_buffer *cb) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct pipe_resource *buf = cb ? cb->buffer : NULL; + unsigned size; + + if (cb && cb->user_buffer) { + buf = nouveau_user_buffer_create(pipe->screen, (void*)cb->user_buffer, + cb->buffer_size, + PIPE_BIND_CONSTANT_BUFFER); + } + + size = 0; + if (buf) + size = buf->width0 / (4 * sizeof(float)); + + if (shader == PIPE_SHADER_VERTEX) { + pipe_resource_reference(&nv30->vertprog.constbuf, buf); + nv30->vertprog.constbuf_nr = size; + nv30->dirty |= NV30_NEW_VERTCONST; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + pipe_resource_reference(&nv30->fragprog.constbuf, buf); + nv30->fragprog.constbuf_nr = size; + nv30->dirty |= NV30_NEW_FRAGCONST; + } + + if (cb && cb->user_buffer) { + pipe_resource_reference(&buf, NULL); + } +} + +static void +nv30_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_FB); + + nv30->framebuffer = *fb; + nv30->dirty |= NV30_NEW_FRAMEBUFFER; +} + +static void +nv30_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->stipple = *stipple; + nv30->dirty |= NV30_NEW_STIPPLE; +} + +static void +nv30_set_scissor_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_scissor_state *scissor) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->scissor = *scissor; + nv30->dirty |= NV30_NEW_SCISSOR; +} + +static void +nv30_set_viewport_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *vpt) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->viewport = *vpt; + nv30->dirty |= NV30_NEW_VIEWPORT; +} + +static void +nv30_set_vertex_buffers(struct pipe_context *pipe, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_VTXBUF); + + util_set_vertex_buffers_count(nv30->vtxbuf, &nv30->num_vtxbufs, + vb, start_slot, count); + + nv30->dirty |= NV30_NEW_ARRAYS; +} + +static void +nv30_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + if (ib) { + pipe_resource_reference(&nv30->idxbuf.buffer, ib->buffer); + nv30->idxbuf.index_size = ib->index_size; + nv30->idxbuf.offset = ib->offset; + nv30->idxbuf.user_buffer = ib->user_buffer; + } else { + pipe_resource_reference(&nv30->idxbuf.buffer, NULL); + nv30->idxbuf.user_buffer = NULL; + } +} + +void +nv30_state_init(struct pipe_context *pipe) +{ + pipe->create_blend_state = nv30_blend_state_create; + pipe->bind_blend_state = nv30_blend_state_bind; + pipe->delete_blend_state = nv30_blend_state_delete; + + pipe->create_rasterizer_state = nv30_rasterizer_state_create; + pipe->bind_rasterizer_state = nv30_rasterizer_state_bind; + pipe->delete_rasterizer_state = nv30_rasterizer_state_delete; + + pipe->create_depth_stencil_alpha_state = nv30_zsa_state_create; + pipe->bind_depth_stencil_alpha_state = nv30_zsa_state_bind; + pipe->delete_depth_stencil_alpha_state = nv30_zsa_state_delete; + + pipe->set_blend_color = nv30_set_blend_color; + pipe->set_stencil_ref = nv30_set_stencil_ref; + pipe->set_clip_state = nv30_set_clip_state; + pipe->set_sample_mask = nv30_set_sample_mask; + pipe->set_constant_buffer = nv30_set_constant_buffer; + pipe->set_framebuffer_state = nv30_set_framebuffer_state; + pipe->set_polygon_stipple = nv30_set_polygon_stipple; + pipe->set_scissor_states = nv30_set_scissor_states; + pipe->set_viewport_states = nv30_set_viewport_states; + + pipe->set_vertex_buffers = nv30_set_vertex_buffers; + pipe->set_index_buffer = nv30_set_index_buffer; +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state.h b/src/gallium/drivers/nouveau/nv30/nv30_state.h new file mode 100644 index 00000000000..e27e16fae82 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_state.h @@ -0,0 +1,144 @@ +#ifndef __NV30_STATE_H__ +#define __NV30_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" +#include "util/u_dynarray.h" + +#define NV30_QUERY_ZCULL_0 (PIPE_QUERY_TYPES + 0) +#define NV30_QUERY_ZCULL_1 (PIPE_QUERY_TYPES + 1) +#define NV30_QUERY_ZCULL_2 (PIPE_QUERY_TYPES + 2) +#define NV30_QUERY_ZCULL_3 (PIPE_QUERY_TYPES + 3) + +#define SB_DATA(so, u) (so)->data[(so)->size++] = (u) +#define SB_MTHD30(so, mthd, size) \ + SB_DATA((so), ((size) << 18) | (7 << 13) | NV30_3D_##mthd) +#define SB_MTHD40(so, mthd, size) \ + SB_DATA((so), ((size) << 18) | (7 << 13) | NV40_3D_##mthd) + +struct nv30_blend_stateobj { + struct pipe_blend_state pipe; + unsigned data[16]; + unsigned size; +}; + +struct nv30_rasterizer_stateobj { + struct pipe_rasterizer_state pipe; + unsigned data[32]; + unsigned size; +}; + +struct nv30_zsa_stateobj { + struct pipe_depth_stencil_alpha_state pipe; + unsigned data[32]; + unsigned size; +}; + +struct nv30_sampler_state { + struct pipe_sampler_state pipe; + unsigned fmt; + unsigned wrap; + unsigned en; + unsigned filt; + unsigned bcol; + /* 4.8 */ + unsigned min_lod; + unsigned max_lod; +}; + +struct nv30_sampler_view { + struct pipe_sampler_view pipe; + unsigned fmt; + unsigned swz; + unsigned filt; + unsigned filt_mask; + unsigned wrap; + unsigned wrap_mask; + unsigned npot_size0; + unsigned npot_size1; + /* 4.8 */ + unsigned base_lod; + unsigned high_lod; +}; + +struct nv30_shader_reloc { + unsigned location; + int target; +}; + +struct nv30_vertprog_exec { + uint32_t data[4]; +}; + +struct nv30_vertprog_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nv30_vertprog { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + struct draw_vertex_shader *draw; + boolean translated; + unsigned enabled_ucps; + uint16_t texcoord[10]; + + struct util_dynarray branch_relocs; + struct nv30_vertprog_exec *insns; + unsigned nr_insns; + + struct util_dynarray const_relocs; + struct nv30_vertprog_data *consts; + unsigned nr_consts; + + struct nouveau_heap *exec; + struct nouveau_heap *data; + uint32_t ir; + uint32_t or; + void *nvfx; +}; + +struct nv30_fragprog_data { + unsigned offset; + unsigned index; +}; + +struct nv30_fragprog { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + struct draw_fragment_shader *draw; + boolean translated; + + uint32_t *insn; + unsigned insn_len; + + uint16_t texcoord[10]; + struct nv30_fragprog_data *consts; + unsigned nr_consts; + + struct pipe_resource *buffer; + uint32_t vp_or; /* appended to VP_RESULT_EN */ + uint32_t fp_control; + uint32_t point_sprite_control; + uint32_t coord_conventions; + uint32_t texcoords; + uint32_t rt_enable; +}; + +struct nv30_vertex_element { + unsigned state; +}; + +struct nv30_vertex_stateobj { + struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS]; + struct translate *translate; + bool need_conversion; + unsigned num_elements; + unsigned vtx_size; + unsigned vtx_per_packet_max; + struct nv30_vertex_element element[]; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c new file mode 100644 index 00000000000..f22755983ba --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c @@ -0,0 +1,538 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_half.h" + +#include "nv_object.xml.h" +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_format.h" + +static void +nv30_validate_fb(struct nv30_context *nv30) +{ + struct pipe_screen *pscreen = &nv30->screen->base.base; + struct pipe_framebuffer_state *fb = &nv30->framebuffer; + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nouveau_object *eng3d = nv30->screen->eng3d; + uint32_t rt_format; + int h = fb->height; + int w = fb->width; + int x = 0; + int y = 0; + + nv30->state.rt_enable = (NV30_3D_RT_ENABLE_COLOR0 << fb->nr_cbufs) - 1; + if (nv30->state.rt_enable > 1) + nv30->state.rt_enable |= NV30_3D_RT_ENABLE_MRT; + + rt_format = 0; + if (fb->nr_cbufs > 0) { + struct nv30_miptree *mt = nv30_miptree(fb->cbufs[0]->texture); + rt_format |= nv30_format(pscreen, fb->cbufs[0]->format)->hw; + rt_format |= mt->ms_mode; + if (mt->swizzled) + rt_format |= NV30_3D_RT_FORMAT_TYPE_SWIZZLED; + else + rt_format |= NV30_3D_RT_FORMAT_TYPE_LINEAR; + } else { + if (fb->zsbuf && util_format_get_blocksize(fb->zsbuf->format) > 2) + rt_format |= NV30_3D_RT_FORMAT_COLOR_A8R8G8B8; + else + rt_format |= NV30_3D_RT_FORMAT_COLOR_R5G6B5; + } + + if (fb->zsbuf) { + rt_format |= nv30_format(pscreen, fb->zsbuf->format)->hw; + if (nv30_miptree(fb->zsbuf->texture)->swizzled) + rt_format |= NV30_3D_RT_FORMAT_TYPE_SWIZZLED; + else + rt_format |= NV30_3D_RT_FORMAT_TYPE_LINEAR; + } else { + if (fb->nr_cbufs && util_format_get_blocksize(fb->cbufs[0]->format) > 2) + rt_format |= NV30_3D_RT_FORMAT_ZETA_Z24S8; + else + rt_format |= NV30_3D_RT_FORMAT_ZETA_Z16; + } + + /* hardware rounds down render target offset to 64 bytes, but surfaces + * with a size of 2x2 pixel (16bpp) or 1x1 pixel (32bpp) have an + * unaligned start aaddress. For these two important square formats + * we can hack around this limitation by adjusting the viewport origin + */ + if (nv30->state.rt_enable) { + int off = nv30_surface(fb->cbufs[0])->offset & 63; + if (off) { + x += off / (util_format_get_blocksize(fb->cbufs[0]->format) * 2); + w = 16; + h = 2; + } + } + + if (rt_format & NV30_3D_RT_FORMAT_TYPE_SWIZZLED) { + rt_format |= util_logbase2(w) << 16; + rt_format |= util_logbase2(h) << 24; + } + + if (!PUSH_SPACE(push, 64)) + return; + PUSH_RESET(push, BUFCTX_FB); + + BEGIN_NV04(push, SUBC_3D(0x1da4), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(RT_HORIZ), 3); + PUSH_DATA (push, w << 16); + PUSH_DATA (push, h << 16); + PUSH_DATA (push, rt_format); + BEGIN_NV04(push, NV30_3D(VIEWPORT_HORIZ), 2); + PUSH_DATA (push, w << 16); + PUSH_DATA (push, h << 16); + BEGIN_NV04(push, NV30_3D(VIEWPORT_TX_ORIGIN), 4); + PUSH_DATA (push, (y << 16) | x); + PUSH_DATA (push, 0); + PUSH_DATA (push, ((w - 1) << 16) | 0); + PUSH_DATA (push, ((h - 1) << 16) | 0); + + if ((nv30->state.rt_enable & NV30_3D_RT_ENABLE_COLOR0) || fb->zsbuf) { + struct nv30_surface *rsf = nv30_surface(fb->cbufs[0]); + struct nv30_surface *zsf = nv30_surface(fb->zsbuf); + struct nouveau_bo *rbo, *zbo; + + if (!rsf) rsf = zsf; + else if (!zsf) zsf = rsf; + rbo = nv30_miptree(rsf->base.texture)->base.bo; + zbo = nv30_miptree(zsf->base.texture)->base.bo; + + if (eng3d->oclass >= NV40_3D_CLASS) { + BEGIN_NV04(push, NV40_3D(ZETA_PITCH), 1); + PUSH_DATA (push, zsf->pitch); + BEGIN_NV04(push, NV40_3D(COLOR0_PITCH), 3); + PUSH_DATA (push, rsf->pitch); + } else { + BEGIN_NV04(push, NV30_3D(COLOR0_PITCH), 3); + PUSH_DATA (push, (zsf->pitch << 16) | rsf->pitch); + } + PUSH_MTHDl(push, NV30_3D(COLOR0_OFFSET), BUFCTX_FB, rbo, rsf->offset & ~63, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + PUSH_MTHDl(push, NV30_3D(ZETA_OFFSET), BUFCTX_FB, zbo, zsf->offset & ~63, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + } + + if (nv30->state.rt_enable & NV30_3D_RT_ENABLE_COLOR1) { + struct nv30_surface *sf = nv30_surface(fb->cbufs[1]); + struct nouveau_bo *bo = nv30_miptree(sf->base.texture)->base.bo; + + BEGIN_NV04(push, NV30_3D(COLOR1_OFFSET), 2); + PUSH_MTHDl(push, NV30_3D(COLOR1_OFFSET), BUFCTX_FB, bo, sf->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + PUSH_DATA (push, sf->pitch); + } + + if (nv30->state.rt_enable & NV40_3D_RT_ENABLE_COLOR2) { + struct nv30_surface *sf = nv30_surface(fb->cbufs[2]); + struct nouveau_bo *bo = nv30_miptree(sf->base.texture)->base.bo; + + BEGIN_NV04(push, NV40_3D(COLOR2_OFFSET), 1); + PUSH_MTHDl(push, NV40_3D(COLOR2_OFFSET), BUFCTX_FB, bo, sf->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + BEGIN_NV04(push, NV40_3D(COLOR2_PITCH), 1); + PUSH_DATA (push, sf->pitch); + } + + if (nv30->state.rt_enable & NV40_3D_RT_ENABLE_COLOR3) { + struct nv30_surface *sf = nv30_surface(fb->cbufs[3]); + struct nouveau_bo *bo = nv30_miptree(sf->base.texture)->base.bo; + + BEGIN_NV04(push, NV40_3D(COLOR3_OFFSET), 1); + PUSH_MTHDl(push, NV40_3D(COLOR3_OFFSET), BUFCTX_FB, bo, sf->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + BEGIN_NV04(push, NV40_3D(COLOR3_PITCH), 1); + PUSH_DATA (push, sf->pitch); + } +} + +static void +nv30_validate_blend_colour(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + float *rgba = nv30->blend_colour.color; + + if (nv30->framebuffer.nr_cbufs) { + switch (nv30->framebuffer.cbufs[0]->format) { + case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + BEGIN_NV04(push, NV30_3D(BLEND_COLOR), 1); + PUSH_DATA (push, (util_float_to_half(rgba[0]) << 0) | + (util_float_to_half(rgba[1]) << 16)); + BEGIN_NV04(push, SUBC_3D(0x037c), 1); + PUSH_DATA (push, (util_float_to_half(rgba[2]) << 0) | + (util_float_to_half(rgba[3]) << 16)); + break; + default: + break; + } + } + + BEGIN_NV04(push, NV30_3D(BLEND_COLOR), 1); + PUSH_DATA (push, (float_to_ubyte(rgba[3]) << 24) | + (float_to_ubyte(rgba[0]) << 16) | + (float_to_ubyte(rgba[1]) << 8) | + (float_to_ubyte(rgba[2]) << 0)); +} + +static void +nv30_validate_stencil_ref(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + + BEGIN_NV04(push, NV30_3D(STENCIL_FUNC_REF(0)), 1); + PUSH_DATA (push, nv30->stencil_ref.ref_value[0]); + BEGIN_NV04(push, NV30_3D(STENCIL_FUNC_REF(1)), 1); + PUSH_DATA (push, nv30->stencil_ref.ref_value[1]); +} + +static void +nv30_validate_stipple(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + + BEGIN_NV04(push, NV30_3D(POLYGON_STIPPLE_PATTERN(0)), 32); + PUSH_DATAp(push, nv30->stipple.stipple, 32); +} + +static void +nv30_validate_scissor(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct pipe_scissor_state *s = &nv30->scissor; + + if (!(nv30->dirty & NV30_NEW_SCISSOR) && + nv30->rast->pipe.scissor != nv30->state.scissor_off) + return; + nv30->state.scissor_off = !nv30->rast->pipe.scissor; + + BEGIN_NV04(push, NV30_3D(SCISSOR_HORIZ), 2); + if (nv30->rast->pipe.scissor) { + PUSH_DATA (push, ((s->maxx - s->minx) << 16) | s->minx); + PUSH_DATA (push, ((s->maxy - s->miny) << 16) | s->miny); + } else { + PUSH_DATA (push, 0x10000000); + PUSH_DATA (push, 0x10000000); + } +} + +static void +nv30_validate_viewport(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct pipe_viewport_state *vp = &nv30->viewport; + + BEGIN_NV04(push, NV30_3D(VIEWPORT_TRANSLATE_X), 8); + PUSH_DATAf(push, vp->translate[0]); + PUSH_DATAf(push, vp->translate[1]); + PUSH_DATAf(push, vp->translate[2]); + PUSH_DATAf(push, vp->translate[3]); + PUSH_DATAf(push, vp->scale[0]); + PUSH_DATAf(push, vp->scale[1]); + PUSH_DATAf(push, vp->scale[2]); + PUSH_DATAf(push, vp->scale[3]); + BEGIN_NV04(push, NV30_3D(DEPTH_RANGE_NEAR), 2); + PUSH_DATAf(push, vp->translate[2] - fabsf(vp->scale[2])); + PUSH_DATAf(push, vp->translate[2] + fabsf(vp->scale[2])); +} + +static void +nv30_validate_clip(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + unsigned i; + uint32_t clpd_enable = 0; + + for (i = 0; i < 6; i++) { + if (nv30->rast->pipe.clip_plane_enable & (1 << i)) { + if (nv30->dirty & NV30_NEW_CLIP) { + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5); + PUSH_DATA (push, i); + PUSH_DATAp(push, nv30->clip.ucp[i], 4); + } + + clpd_enable |= 1 << (1 + 4*i); + } + } + + BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1); + PUSH_DATA (push, clpd_enable); +} + +static void +nv30_validate_blend(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + + PUSH_SPACE(push, nv30->blend->size); + PUSH_DATAp(push, nv30->blend->data, nv30->blend->size); +} + +static void +nv30_validate_zsa(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + + PUSH_SPACE(push, nv30->zsa->size); + PUSH_DATAp(push, nv30->zsa->data, nv30->zsa->size); +} + +static void +nv30_validate_rasterizer(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + + PUSH_SPACE(push, nv30->rast->size); + PUSH_DATAp(push, nv30->rast->data, nv30->rast->size); +} + +static void +nv30_validate_multisample(struct nv30_context *nv30) +{ + struct pipe_rasterizer_state *rasterizer = &nv30->rast->pipe; + struct pipe_blend_state *blend = &nv30->blend->pipe; + struct nouveau_pushbuf *push = nv30->base.pushbuf; + uint32_t ctrl = nv30->sample_mask << 16; + + if (blend->alpha_to_one) + ctrl |= 0x00000100; + if (blend->alpha_to_coverage) + ctrl |= 0x00000010; + if (rasterizer->multisample) + ctrl |= 0x00000001; + + BEGIN_NV04(push, NV30_3D(MULTISAMPLE_CONTROL), 1); + PUSH_DATA (push, ctrl); +} + +static void +nv30_validate_fragment(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nv30_fragprog *fp = nv30->fragprog.program; + + BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1); + PUSH_DATA (push, nv30->state.rt_enable & ~fp->rt_enable); + BEGIN_NV04(push, NV30_3D(COORD_CONVENTIONS), 1); + PUSH_DATA (push, fp->coord_conventions | nv30->framebuffer.height); +} + +static void +nv30_validate_point_coord(struct nv30_context *nv30) +{ + struct pipe_rasterizer_state *rasterizer = &nv30->rast->pipe; + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nv30_fragprog *fp = nv30->fragprog.program; + uint32_t hw = 0x00000000; + + if (rasterizer) { + hw |= (nv30->rast->pipe.sprite_coord_enable & 0xff) << 8; + if (fp) + hw |= fp->point_sprite_control; + + if (rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) { + if (hw) + nv30->draw_flags |= NV30_NEW_RASTERIZER; + } else + if (rasterizer->point_quad_rasterization) { + hw |= NV30_3D_POINT_SPRITE_ENABLE; + } + } + + BEGIN_NV04(push, NV30_3D(POINT_SPRITE), 1); + PUSH_DATA (push, hw); +} + +struct state_validate { + void (*func)(struct nv30_context *); + uint32_t mask; +}; + +static struct state_validate hwtnl_validate_list[] = { + { nv30_validate_fb, NV30_NEW_FRAMEBUFFER }, + { nv30_validate_blend, NV30_NEW_BLEND }, + { nv30_validate_zsa, NV30_NEW_ZSA }, + { nv30_validate_rasterizer, NV30_NEW_RASTERIZER }, + { nv30_validate_multisample, NV30_NEW_SAMPLE_MASK | NV30_NEW_BLEND | + NV30_NEW_RASTERIZER }, + { nv30_validate_blend_colour, NV30_NEW_BLEND_COLOUR | + NV30_NEW_FRAMEBUFFER }, + { nv30_validate_stencil_ref, NV30_NEW_STENCIL_REF }, + { nv30_validate_stipple, NV30_NEW_STIPPLE }, + { nv30_validate_scissor, NV30_NEW_SCISSOR | NV30_NEW_RASTERIZER }, + { nv30_validate_viewport, NV30_NEW_VIEWPORT }, + { nv30_validate_clip, NV30_NEW_CLIP }, + { nv30_fragprog_validate, NV30_NEW_FRAGPROG | NV30_NEW_FRAGCONST }, + { nv30_vertprog_validate, NV30_NEW_VERTPROG | NV30_NEW_VERTCONST | + NV30_NEW_FRAGPROG | NV30_NEW_RASTERIZER }, + { nv30_validate_fragment, NV30_NEW_FRAMEBUFFER | NV30_NEW_FRAGPROG }, + { nv30_validate_point_coord, NV30_NEW_RASTERIZER | NV30_NEW_FRAGPROG }, + { nv30_fragtex_validate, NV30_NEW_FRAGTEX }, + { nv40_verttex_validate, NV30_NEW_VERTTEX }, + { nv30_vbo_validate, NV30_NEW_VERTEX | NV30_NEW_ARRAYS }, + {} +}; + +#define NV30_SWTNL_MASK (NV30_NEW_VIEWPORT | \ + NV30_NEW_CLIP | \ + NV30_NEW_VERTPROG | \ + NV30_NEW_VERTCONST | \ + NV30_NEW_VERTTEX | \ + NV30_NEW_VERTEX | \ + NV30_NEW_ARRAYS) + +static struct state_validate swtnl_validate_list[] = { + { nv30_validate_fb, NV30_NEW_FRAMEBUFFER }, + { nv30_validate_blend, NV30_NEW_BLEND }, + { nv30_validate_zsa, NV30_NEW_ZSA }, + { nv30_validate_rasterizer, NV30_NEW_RASTERIZER }, + { nv30_validate_multisample, NV30_NEW_SAMPLE_MASK | NV30_NEW_BLEND | + NV30_NEW_RASTERIZER }, + { nv30_validate_blend_colour, NV30_NEW_BLEND_COLOUR | + NV30_NEW_FRAMEBUFFER }, + { nv30_validate_stencil_ref, NV30_NEW_STENCIL_REF }, + { nv30_validate_stipple, NV30_NEW_STIPPLE }, + { nv30_validate_scissor, NV30_NEW_SCISSOR | NV30_NEW_RASTERIZER }, + { nv30_fragprog_validate, NV30_NEW_FRAGPROG | NV30_NEW_FRAGCONST }, + { nv30_validate_fragment, NV30_NEW_FRAMEBUFFER | NV30_NEW_FRAGPROG }, + { nv30_fragtex_validate, NV30_NEW_FRAGTEX }, + {} +}; + +static void +nv30_state_context_switch(struct nv30_context *nv30) +{ + struct nv30_context *prev = nv30->screen->cur_ctx; + + if (prev) + nv30->state = prev->state; + nv30->dirty = NV30_NEW_ALL; + + if (!nv30->vertex) + nv30->dirty &= ~(NV30_NEW_VERTEX | NV30_NEW_ARRAYS); + + if (!nv30->vertprog.program) + nv30->dirty &= ~NV30_NEW_VERTPROG; + if (!nv30->fragprog.program) + nv30->dirty &= ~NV30_NEW_FRAGPROG; + + if (!nv30->blend) + nv30->dirty &= ~NV30_NEW_BLEND; + if (!nv30->rast) + nv30->dirty &= ~NV30_NEW_RASTERIZER; + if (!nv30->zsa) + nv30->dirty &= ~NV30_NEW_ZSA; + + nv30->screen->cur_ctx = nv30; + nv30->base.pushbuf->user_priv = &nv30->bufctx; +} + +boolean +nv30_state_validate(struct nv30_context *nv30, boolean hwtnl) +{ + struct nouveau_screen *screen = &nv30->screen->base; + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nouveau_bufctx *bctx = nv30->bufctx; + struct nouveau_bufref *bref; + struct state_validate *validate; + + if (nv30->screen->cur_ctx != nv30) + nv30_state_context_switch(nv30); + + if (hwtnl) { + nv30->draw_dirty |= nv30->dirty; + if (nv30->draw_flags) { + nv30->draw_flags &= ~nv30->dirty; + if (!nv30->draw_flags) + nv30->dirty |= NV30_SWTNL_MASK; + } + } + + if (!nv30->draw_flags) + validate = hwtnl_validate_list; + else + validate = swtnl_validate_list; + + if (nv30->dirty) { + while (validate->func) { + if (nv30->dirty & validate->mask) + validate->func(nv30); + validate++; + } + + nv30->dirty = 0; + } + + nouveau_pushbuf_bufctx(push, bctx); + if (nouveau_pushbuf_validate(push)) { + nouveau_pushbuf_bufctx(push, NULL); + return FALSE; + } + + /*XXX*/ + BEGIN_NV04(push, NV30_3D(VTX_CACHE_INVALIDATE_1710), 1); + PUSH_DATA (push, 0); + if (nv30->screen->eng3d->oclass >= NV40_3D_CLASS) { + BEGIN_NV04(push, NV40_3D(TEX_CACHE_CTL), 1); + PUSH_DATA (push, 2); + BEGIN_NV04(push, NV40_3D(TEX_CACHE_CTL), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV30_3D(R1718), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(R1718), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(R1718), 1); + PUSH_DATA (push, 0); + } + + LIST_FOR_EACH_ENTRY(bref, &bctx->current, thead) { + struct nv04_resource *res = bref->priv; + if (res && res->mm) { + nouveau_fence_ref(screen->fence.current, &res->fence); + + if (bref->flags & NOUVEAU_BO_RD) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + if (bref->flags & NOUVEAU_BO_WR) { + nouveau_fence_ref(screen->fence.current, &res->fence_wr); + res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + } + } + } + + return TRUE; +} + +void +nv30_state_release(struct nv30_context *nv30) +{ + nouveau_pushbuf_bufctx(nv30->base.pushbuf, NULL); +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_texture.c b/src/gallium/drivers/nouveau/nv30/nv30_texture.c new file mode 100644 index 00000000000..d3cffcfb261 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_texture.c @@ -0,0 +1,306 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nv_object.xml.h" +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_format.h" + +#define NV30_3D_TEX_WRAP_S_MIRROR_REPEAT NV30_3D_TEX_WRAP_S_MIRRORED_REPEAT +#define NV30_WRAP(n) \ + case PIPE_TEX_WRAP_##n: ret = NV30_3D_TEX_WRAP_S_##n; break +#define NV40_WRAP(n) \ + case PIPE_TEX_WRAP_##n: ret = NV40_3D_TEX_WRAP_S_##n; break + +static INLINE unsigned +wrap_mode(unsigned pipe) +{ + unsigned ret = NV30_3D_TEX_WRAP_S_REPEAT; + + switch (pipe) { + NV30_WRAP(REPEAT); + NV30_WRAP(MIRROR_REPEAT); + NV30_WRAP(CLAMP_TO_EDGE); + NV30_WRAP(CLAMP_TO_BORDER); + NV30_WRAP(CLAMP); + NV40_WRAP(MIRROR_CLAMP_TO_EDGE); + NV40_WRAP(MIRROR_CLAMP_TO_BORDER); + NV40_WRAP(MIRROR_CLAMP); + default: + break; + } + + return ret >> NV30_3D_TEX_WRAP_S__SHIFT; +} + +static INLINE unsigned +filter_mode(const struct pipe_sampler_state *cso) +{ + unsigned filter; + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter = NV30_3D_TEX_FILTER_MAG_LINEAR; + break; + default: + filter = NV30_3D_TEX_FILTER_MAG_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV30_3D_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV30_3D_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR; + break; + default: + filter |= NV30_3D_TEX_FILTER_MIN_LINEAR; + break; + } + break; + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV30_3D_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV30_3D_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR; + break; + default: + filter |= NV30_3D_TEX_FILTER_MIN_NEAREST; + break; + } + break; + } + + return filter; +} + +static INLINE unsigned +compare_mode(const struct pipe_sampler_state *cso) +{ + if (cso->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) + return 0; + + switch (cso->compare_func) { + case PIPE_FUNC_NEVER : return NV30_3D_TEX_WRAP_RCOMP_NEVER; + case PIPE_FUNC_GREATER : return NV30_3D_TEX_WRAP_RCOMP_GREATER; + case PIPE_FUNC_EQUAL : return NV30_3D_TEX_WRAP_RCOMP_EQUAL; + case PIPE_FUNC_GEQUAL : return NV30_3D_TEX_WRAP_RCOMP_GEQUAL; + case PIPE_FUNC_LESS : return NV30_3D_TEX_WRAP_RCOMP_LESS; + case PIPE_FUNC_NOTEQUAL: return NV30_3D_TEX_WRAP_RCOMP_NOTEQUAL; + case PIPE_FUNC_LEQUAL : return NV30_3D_TEX_WRAP_RCOMP_LEQUAL; + case PIPE_FUNC_ALWAYS : return NV30_3D_TEX_WRAP_RCOMP_ALWAYS; + default: + return 0; + } +} + +static void * +nv30_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nouveau_object *eng3d = nv30_context(pipe)->screen->eng3d; + struct nv30_sampler_state *so; + const float max_lod = 15.0 + (255.0 / 256.0); + + so = MALLOC_STRUCT(nv30_sampler_state); + if (!so) + return NULL; + + so->pipe = *cso; + so->fmt = 0; + so->wrap = (wrap_mode(cso->wrap_s) << NV30_3D_TEX_WRAP_S__SHIFT) | + (wrap_mode(cso->wrap_t) << NV30_3D_TEX_WRAP_T__SHIFT) | + (wrap_mode(cso->wrap_r) << NV30_3D_TEX_WRAP_R__SHIFT); + so->en = 0; + so->wrap |= compare_mode(cso); + so->filt = filter_mode(cso) | 0x00002000; + so->bcol = (float_to_ubyte(cso->border_color.f[3]) << 24) | + (float_to_ubyte(cso->border_color.f[0]) << 16) | + (float_to_ubyte(cso->border_color.f[1]) << 8) | + (float_to_ubyte(cso->border_color.f[2]) << 0); + + if (eng3d->oclass >= NV40_3D_CLASS) { + unsigned aniso = cso->max_anisotropy; + + if (!cso->normalized_coords) + so->fmt |= NV40_3D_TEX_FORMAT_RECT; + + if (aniso > 1) { + if (aniso >= 16) so->en |= NV40_3D_TEX_ENABLE_ANISO_16X; + else if (aniso >= 12) so->en |= NV40_3D_TEX_ENABLE_ANISO_12X; + else if (aniso >= 10) so->en |= NV40_3D_TEX_ENABLE_ANISO_10X; + else if (aniso >= 8) so->en |= NV40_3D_TEX_ENABLE_ANISO_8X; + else if (aniso >= 6) so->en |= NV40_3D_TEX_ENABLE_ANISO_6X; + else if (aniso >= 4) so->en |= NV40_3D_TEX_ENABLE_ANISO_4X; + else so->en |= NV40_3D_TEX_ENABLE_ANISO_2X; + + so->wrap |= nv30_context(pipe)->config.aniso; + } + } else { + so->en |= NV30_3D_TEX_ENABLE_ENABLE; + + if (cso->max_anisotropy >= 8) so->en |= NV30_3D_TEX_ENABLE_ANISO_8X; + else if (cso->max_anisotropy >= 4) so->en |= NV30_3D_TEX_ENABLE_ANISO_4X; + else if (cso->max_anisotropy >= 2) so->en |= NV30_3D_TEX_ENABLE_ANISO_2X; + } + + so->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + so->max_lod = (int)(CLAMP(cso->max_lod, 0.0, max_lod) * 256.0); + so->min_lod = (int)(CLAMP(cso->min_lod, 0.0, max_lod) * 256.0); + return so; +} + +static void +nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static INLINE uint32_t +swizzle(const struct nv30_texfmt *fmt, unsigned cmp, unsigned swz) +{ + uint32_t data = fmt->swz[swz].src << 8; + if (swz <= PIPE_SWIZZLE_ALPHA) + data |= fmt->swz[swz].cmp; + else + data |= fmt->swz[cmp].cmp; + return data; +} + +static struct pipe_sampler_view * +nv30_sampler_view_create(struct pipe_context *pipe, struct pipe_resource *pt, + const struct pipe_sampler_view *tmpl) +{ + const struct nv30_texfmt *fmt = nv30_texfmt(pipe->screen, tmpl->format); + struct nouveau_object *eng3d = nv30_context(pipe)->screen->eng3d; + struct nv30_miptree *mt = nv30_miptree(pt); + struct nv30_sampler_view *so; + + so = MALLOC_STRUCT(nv30_sampler_view); + if (!so) + return NULL; + so->pipe = *tmpl; + so->pipe.reference.count = 1; + so->pipe.texture = NULL; + so->pipe.context = pipe; + pipe_resource_reference(&so->pipe.texture, pt); + + so->fmt = NV30_3D_TEX_FORMAT_NO_BORDER; + switch (pt->target) { + case PIPE_TEXTURE_1D: + so->fmt |= NV30_3D_TEX_FORMAT_DIMS_1D; + break; + case PIPE_TEXTURE_CUBE: + so->fmt |= NV30_3D_TEX_FORMAT_CUBIC; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + so->fmt |= NV30_3D_TEX_FORMAT_DIMS_2D; + break; + case PIPE_TEXTURE_3D: + so->fmt |= NV30_3D_TEX_FORMAT_DIMS_3D; + break; + default: + assert(0); + so->fmt |= NV30_3D_TEX_FORMAT_DIMS_1D; + break; + } + + so->filt = fmt->filter; + so->wrap = fmt->wrap; + so->swz = fmt->swizzle; + so->swz |= swizzle(fmt, 3, tmpl->swizzle_a); + so->swz |= swizzle(fmt, 0, tmpl->swizzle_r) << 2; + so->swz |= swizzle(fmt, 1, tmpl->swizzle_g) << 4; + so->swz |= swizzle(fmt, 2, tmpl->swizzle_b) << 6; + + /* apparently, we need to ignore the t coordinate for 1D textures to + * fix piglit tex1d-2dborder + */ + so->wrap_mask = ~0; + if (pt->target == PIPE_TEXTURE_1D) { + so->wrap_mask &= ~NV30_3D_TEX_WRAP_T__MASK; + so->wrap |= NV30_3D_TEX_WRAP_T_REPEAT; + } + + /* yet more hardware suckage, can't filter 32-bit float formats */ + switch (tmpl->format) { + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + so->filt_mask = ~(NV30_3D_TEX_FILTER_MIN__MASK | + NV30_3D_TEX_FILTER_MAG__MASK); + so->filt |= NV30_3D_TEX_FILTER_MIN_NEAREST | + NV30_3D_TEX_FILTER_MAG_NEAREST; + break; + default: + so->filt_mask = ~0; + break; + } + + so->npot_size0 = (pt->width0 << 16) | pt->height0; + if (eng3d->oclass >= NV40_3D_CLASS) { + so->npot_size1 = (pt->depth0 << 20) | mt->uniform_pitch; + if (!mt->swizzled) + so->fmt |= NV40_3D_TEX_FORMAT_LINEAR; + so->fmt |= 0x00008000; + so->fmt |= (pt->last_level + 1) << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT; + } else { + so->swz |= mt->uniform_pitch << NV30_3D_TEX_SWIZZLE_RECT_PITCH__SHIFT; + if (pt->last_level) + so->fmt |= NV30_3D_TEX_FORMAT_MIPMAP; + so->fmt |= util_logbase2(pt->width0) << 20; + so->fmt |= util_logbase2(pt->height0) << 24; + so->fmt |= util_logbase2(pt->depth0) << 28; + so->fmt |= 0x00010000; + } + + so->base_lod = so->pipe.u.tex.first_level << 8; + so->high_lod = MIN2(pt->last_level, so->pipe.u.tex.last_level) << 8; + return &so->pipe; +} + +static void +nv30_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + FREE(view); +} + +void +nv30_texture_init(struct pipe_context *pipe) +{ + pipe->create_sampler_state = nv30_sampler_state_create; + pipe->delete_sampler_state = nv30_sampler_state_delete; + pipe->create_sampler_view = nv30_sampler_view_create; + pipe->sampler_view_destroy = nv30_sampler_view_destroy; +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c new file mode 100644 index 00000000000..99bc0994ac2 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c @@ -0,0 +1,754 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#define XFER_ARGS \ + struct nv30_context *nv30, enum nv30_transfer_filter filter, \ + struct nv30_rect *src, struct nv30_rect *dst + +#include "util/u_math.h" + +#include "nv_object.xml.h" +#include "nv_m2mf.xml.h" +#include "nv30/nv01_2d.xml.h" +#include "nv30/nv30-40_3d.xml.h" + +#include "nv30/nv30_context.h" +#include "nv30/nv30_transfer.h" + +/* Various helper functions to transfer different types of data in a number + * of different ways. + */ + +static INLINE boolean +nv30_transfer_scaled(struct nv30_rect *src, struct nv30_rect *dst) +{ + if (src->x1 - src->x0 != dst->x1 - dst->x0) + return TRUE; + if (src->y1 - src->y0 != dst->y1 - dst->y0) + return TRUE; + return FALSE; +} + +static INLINE boolean +nv30_transfer_blit(XFER_ARGS) +{ + if (nv30->screen->eng3d->oclass < NV40_3D_CLASS) + return FALSE; + if (dst->offset & 63 || dst->pitch & 63 || dst->d > 1) + return FALSE; + if (dst->w < 2 || dst->h < 2) + return FALSE; + if (dst->cpp > 4 || (dst->cpp == 1 && !dst->pitch)) + return FALSE; + if (src->cpp > 4) + return FALSE; + return TRUE; +} + +static INLINE struct nouveau_heap * +nv30_transfer_rect_vertprog(struct nv30_context *nv30) +{ + struct nouveau_heap *heap = nv30->screen->vp_exec_heap; + struct nouveau_heap *vp; + + vp = nv30->blit_vp; + if (!vp) { + if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp)) { + while (heap->next && heap->size < 2) { + struct nouveau_heap **evict = heap->next->priv; + nouveau_heap_free(evict); + } + + if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp)) + return NULL; + } + + vp = nv30->blit_vp; + if (vp) { + struct nouveau_pushbuf *push = nv30->base.pushbuf; + + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1); + PUSH_DATA (push, vp->start); + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); + PUSH_DATA (push, 0x401f9c6c); /* mov o[hpos], a[0]; */ + PUSH_DATA (push, 0x0040000d); + PUSH_DATA (push, 0x8106c083); + PUSH_DATA (push, 0x6041ff80); + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); + PUSH_DATA (push, 0x401f9c6c); /* mov o[tex0], a[8]; end; */ + PUSH_DATA (push, 0x0040080d); + PUSH_DATA (push, 0x8106c083); + PUSH_DATA (push, 0x6041ff9d); + } + } + + return vp; +} + + +static INLINE struct nv04_resource * +nv30_transfer_rect_fragprog(struct nv30_context *nv30) +{ + struct nv04_resource *fp = nv04_resource(nv30->blit_fp); + struct pipe_context *pipe = &nv30->base.pipe; + + if (!fp) { + nv30->blit_fp = pipe_buffer_create(pipe->screen, 0, 0, 12 * 4); + if (nv30->blit_fp) { + struct pipe_transfer *transfer; + u32 *map = pipe_buffer_map(pipe, nv30->blit_fp, + PIPE_TRANSFER_WRITE, &transfer); + if (map) { + map[0] = 0x17009e00; /* texr r0, i[tex0], texture[0]; end; */ + map[1] = 0x1c9dc801; + map[2] = 0x0001c800; + map[3] = 0x3fe1c800; + map[4] = 0x01401e81; /* end; */ + map[5] = 0x1c9dc800; + map[6] = 0x0001c800; + map[7] = 0x0001c800; + pipe_buffer_unmap(pipe, transfer); + } + + fp = nv04_resource(nv30->blit_fp); + nouveau_buffer_migrate(&nv30->base, fp, NOUVEAU_BO_VRAM); + } + } + + return fp; +} + +static void +nv30_transfer_rect_blit(XFER_ARGS) +{ + struct nv04_resource *fp = nv30_transfer_rect_fragprog(nv30); + struct nouveau_heap *vp = nv30_transfer_rect_vertprog(nv30); + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nouveau_pushbuf_refn refs[] = { + { fp->bo, fp->domain | NOUVEAU_BO_RD }, + { src->bo, src->domain | NOUVEAU_BO_RD }, + { dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR }, + }; + u32 texfmt, texswz; + u32 format, stride; + + if (nouveau_pushbuf_space(push, 512, 8, 0) || + nouveau_pushbuf_refn (push, refs, sizeof(refs) / sizeof(refs[0]))) + return; + + /* various switches depending on cpp of the transfer */ + switch (dst->cpp) { + case 4: + format = NV30_3D_RT_FORMAT_COLOR_A8R8G8B8 | + NV30_3D_RT_FORMAT_ZETA_Z24S8; + texfmt = NV40_3D_TEX_FORMAT_FORMAT_A8R8G8B8; + texswz = 0x0000aae4; + break; + case 2: + format = NV30_3D_RT_FORMAT_COLOR_R5G6B5 | + NV30_3D_RT_FORMAT_ZETA_Z16; + texfmt = NV40_3D_TEX_FORMAT_FORMAT_R5G6B5; + texswz = 0x0000a9e4; + break; + case 1: + format = NV30_3D_RT_FORMAT_COLOR_B8 | + NV30_3D_RT_FORMAT_ZETA_Z16; + texfmt = NV40_3D_TEX_FORMAT_FORMAT_L8; + texswz = 0x0000aaff; + break; + default: + assert(0); + return; + } + + /* render target */ + if (!dst->pitch) { + format |= NV30_3D_RT_FORMAT_TYPE_SWIZZLED; + format |= util_logbase2(dst->w) << 16; + format |= util_logbase2(dst->h) << 24; + stride = 64; + } else { + format |= NV30_3D_RT_FORMAT_TYPE_LINEAR; + stride = dst->pitch; + } + + BEGIN_NV04(push, NV30_3D(VIEWPORT_HORIZ), 2); + PUSH_DATA (push, dst->w << 16); + PUSH_DATA (push, dst->h << 16); + BEGIN_NV04(push, NV30_3D(RT_HORIZ), 5); + PUSH_DATA (push, dst->w << 16); + PUSH_DATA (push, dst->h << 16); + PUSH_DATA (push, format); + PUSH_DATA (push, stride); + PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0); + BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1); + PUSH_DATA (push, NV30_3D_RT_ENABLE_COLOR0); + + nv30->dirty |= NV30_NEW_FRAMEBUFFER; + + /* viewport state */ + BEGIN_NV04(push, NV30_3D(VIEWPORT_TRANSLATE_X), 8); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 1.0); + BEGIN_NV04(push, NV30_3D(DEPTH_RANGE_NEAR), 2); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 1.0); + + nv30->dirty |= NV30_NEW_VIEWPORT; + + /* blend state */ + BEGIN_NV04(push, NV30_3D(COLOR_LOGIC_OP_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(DITHER_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(BLEND_FUNC_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(COLOR_MASK), 1); + PUSH_DATA (push, 0x01010101); + + nv30->dirty |= NV30_NEW_BLEND; + + /* depth-stencil-alpha state */ + BEGIN_NV04(push, NV30_3D(DEPTH_WRITE_ENABLE), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(0)), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(1)), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(ALPHA_FUNC_ENABLE), 1); + PUSH_DATA (push, 0); + + nv30->dirty |= NV30_NEW_ZSA; + + /* rasterizer state */ + BEGIN_NV04(push, NV30_3D(SHADE_MODEL), 1); + PUSH_DATA (push, NV30_3D_SHADE_MODEL_FLAT); + BEGIN_NV04(push, NV30_3D(CULL_FACE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(POLYGON_MODE_FRONT), 2); + PUSH_DATA (push, NV30_3D_POLYGON_MODE_FRONT_FILL); + PUSH_DATA (push, NV30_3D_POLYGON_MODE_BACK_FILL); + BEGIN_NV04(push, NV30_3D(POLYGON_OFFSET_FILL_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(POLYGON_STIPPLE_ENABLE), 1); + PUSH_DATA (push, 0); + + nv30->state.scissor_off = 0; + nv30->dirty |= NV30_NEW_RASTERIZER; + + /* vertex program */ + BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1); + PUSH_DATA (push, vp->start); + BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2); + PUSH_DATA (push, 0x00000101); /* attrib: 0, 8 */ + PUSH_DATA (push, 0x00004000); /* result: hpos, tex0 */ + BEGIN_NV04(push, NV30_3D(ENGINE), 1); + PUSH_DATA (push, 0x00000103); + BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1); + PUSH_DATA (push, 0x00000000); + + nv30->dirty |= NV30_NEW_VERTPROG; + nv30->dirty |= NV30_NEW_CLIP; + + /* fragment program */ + BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1); + PUSH_RELOC(push, fp->bo, fp->offset, fp->domain | + NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + NV30_3D_FP_ACTIVE_PROGRAM_DMA0, + NV30_3D_FP_ACTIVE_PROGRAM_DMA1); + BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1); + PUSH_DATA (push, 0x02000000); + + nv30->state.fragprog = NULL; + nv30->dirty |= NV30_NEW_FRAGPROG; + + /* texture */ + texfmt |= 1 << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT; + texfmt |= NV30_3D_TEX_FORMAT_NO_BORDER; + texfmt |= NV40_3D_TEX_FORMAT_RECT; + texfmt |= 0x00008000; + if (src->d < 2) + texfmt |= NV30_3D_TEX_FORMAT_DIMS_2D; + else + texfmt |= NV30_3D_TEX_FORMAT_DIMS_3D; + if (src->pitch) + texfmt |= NV40_3D_TEX_FORMAT_LINEAR; + + BEGIN_NV04(push, NV30_3D(TEX_OFFSET(0)), 8); + PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0); + PUSH_RELOC(push, src->bo, texfmt, NOUVEAU_BO_OR, + NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1); + PUSH_DATA (push, NV30_3D_TEX_WRAP_S_CLAMP_TO_EDGE | + NV30_3D_TEX_WRAP_T_CLAMP_TO_EDGE | + NV30_3D_TEX_WRAP_R_CLAMP_TO_EDGE); + PUSH_DATA (push, NV40_3D_TEX_ENABLE_ENABLE); + PUSH_DATA (push, texswz); + switch (filter) { + case BILINEAR: + PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_LINEAR | + NV30_3D_TEX_FILTER_MAG_LINEAR | 0x00002000); + break; + default: + PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_NEAREST | + NV30_3D_TEX_FILTER_MAG_NEAREST | 0x00002000); + break; + } + PUSH_DATA (push, (src->w << 16) | src->h); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV40_3D(TEX_SIZE1(0)), 1); + PUSH_DATA (push, 0x00100000 | src->pitch); + BEGIN_NV04(push, SUBC_3D(0x0b40), 1); + PUSH_DATA (push, src->d < 2 ? 0x00000001 : 0x00000000); + BEGIN_NV04(push, NV40_3D(TEX_CACHE_CTL), 1); + PUSH_DATA (push, 1); + + nv30->fragprog.dirty_samplers |= 1; + nv30->dirty |= NV30_NEW_FRAGTEX; + + /* blit! */ + BEGIN_NV04(push, NV30_3D(SCISSOR_HORIZ), 2); + PUSH_DATA (push, (dst->x1 - dst->x0) << 16 | dst->x0); + PUSH_DATA (push, (dst->y1 - dst->y0) << 16 | dst->y0); + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_QUADS); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3); + PUSH_DATAf(push, src->x0); + PUSH_DATAf(push, src->y0); + PUSH_DATAf(push, src->z); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1); + PUSH_DATA (push, (dst->y0 << 16) | dst->x0); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3); + PUSH_DATAf(push, src->x1); + PUSH_DATAf(push, src->y0); + PUSH_DATAf(push, src->z); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1); + PUSH_DATA (push, (dst->y0 << 16) | dst->x1); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3); + PUSH_DATAf(push, src->x1); + PUSH_DATAf(push, src->y1); + PUSH_DATAf(push, src->z); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1); + PUSH_DATA (push, (dst->y1 << 16) | dst->x1); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3); + PUSH_DATAf(push, src->x0); + PUSH_DATAf(push, src->y1); + PUSH_DATAf(push, src->z); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1); + PUSH_DATA (push, (dst->y1 << 16) | dst->x0); + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP); +} + +static boolean +nv30_transfer_sifm(XFER_ARGS) +{ + if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2) + return FALSE; + + if (src->d > 1 || dst->d > 1) + return FALSE; + + if (dst->offset & 63) + return FALSE; + + if (!dst->pitch) { + if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2) + return FALSE; + } else { + if (dst->domain != NOUVEAU_BO_VRAM) + return FALSE; + if (dst->pitch & 63) + return FALSE; + } + + return TRUE; +} + +static void +nv30_transfer_rect_sifm(XFER_ARGS) + +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nouveau_pushbuf_refn refs[] = { + { src->bo, src->domain | NOUVEAU_BO_RD }, + { dst->bo, dst->domain | NOUVEAU_BO_WR }, + }; + struct nv04_fifo *fifo = push->channel->data; + unsigned si_fmt, si_arg; + unsigned ss_fmt; + + switch (dst->cpp) { + case 4: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_A8R8G8B8; break; + case 2: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_R5G6B5; break; + default: + ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_Y8; + break; + } + + switch (src->cpp) { + case 4: si_fmt = NV03_SIFM_COLOR_FORMAT_A8R8G8B8; break; + case 2: si_fmt = NV03_SIFM_COLOR_FORMAT_R5G6B5; break; + default: + si_fmt = NV03_SIFM_COLOR_FORMAT_AY8; + break; + } + + if (filter == NEAREST) { + si_arg = NV03_SIFM_FORMAT_ORIGIN_CENTER; + si_arg |= NV03_SIFM_FORMAT_FILTER_POINT_SAMPLE; + } else { + si_arg = NV03_SIFM_FORMAT_ORIGIN_CORNER; + si_arg |= NV03_SIFM_FORMAT_FILTER_BILINEAR; + } + + if (nouveau_pushbuf_space(push, 32, 6, 0) || + nouveau_pushbuf_refn (push, refs, 2)) + return; + + if (dst->pitch) { + BEGIN_NV04(push, NV04_SF2D(DMA_IMAGE_SOURCE), 2); + PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart); + PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart); + BEGIN_NV04(push, NV04_SF2D(FORMAT), 4); + PUSH_DATA (push, ss_fmt); + PUSH_DATA (push, dst->pitch << 16 | dst->pitch); + PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0); + PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0); + BEGIN_NV04(push, NV05_SIFM(SURFACE), 1); + PUSH_DATA (push, nv30->screen->surf2d->handle); + } else { + BEGIN_NV04(push, NV04_SSWZ(DMA_IMAGE), 1); + PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart); + BEGIN_NV04(push, NV04_SSWZ(FORMAT), 2); + PUSH_DATA (push, ss_fmt | (util_logbase2(dst->w) << 16) | + (util_logbase2(dst->h) << 24)); + PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0); + BEGIN_NV04(push, NV05_SIFM(SURFACE), 1); + PUSH_DATA (push, nv30->screen->swzsurf->handle); + } + + BEGIN_NV04(push, NV03_SIFM(DMA_IMAGE), 1); + PUSH_RELOC(push, src->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart); + BEGIN_NV04(push, NV03_SIFM(COLOR_FORMAT), 8); + PUSH_DATA (push, si_fmt); + PUSH_DATA (push, NV03_SIFM_OPERATION_SRCCOPY); + PUSH_DATA (push, ( dst->y0 << 16) | dst->x0); + PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0)); + PUSH_DATA (push, ( dst->y0 << 16) | dst->x0); + PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0)); + PUSH_DATA (push, ((src->x1 - src->x0) << 20) / (dst->x1 - dst->x0)); + PUSH_DATA (push, ((src->y1 - src->y0) << 20) / (dst->y1 - dst->y0)); + BEGIN_NV04(push, NV03_SIFM(SIZE), 4); + PUSH_DATA (push, align(src->h, 2) << 16 | align(src->w, 2)); + PUSH_DATA (push, src->pitch | si_arg); + PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0); + PUSH_DATA (push, (src->y0 << 20) | src->x0 << 4); +} + +/* The NOP+OFFSET_OUT stuff after each M2MF transfer *is* actually required + * to prevent some odd things from happening, easily reproducible by + * attempting to do conditional rendering that has a M2MF transfer done + * some time before it. 0x1e98 will fail with a DMA_W_PROTECTION (assuming + * that name is still accurate on nv4x) error. + */ + +static boolean +nv30_transfer_m2mf(XFER_ARGS) +{ + if (!src->pitch || !dst->pitch) + return FALSE; + if (nv30_transfer_scaled(src, dst)) + return FALSE; + return TRUE; +} + +static void +nv30_transfer_rect_m2mf(XFER_ARGS) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nouveau_pushbuf_refn refs[] = { + { src->bo, src->domain | NOUVEAU_BO_RD }, + { dst->bo, dst->domain | NOUVEAU_BO_WR }, + }; + struct nv04_fifo *fifo = push->channel->data; + unsigned src_offset = src->offset; + unsigned dst_offset = dst->offset; + unsigned w = dst->x1 - dst->x0; + unsigned h = dst->y1 - dst->y0; + + src_offset += (src->y0 * src->pitch) + (src->x0 * src->cpp); + dst_offset += (dst->y0 * dst->pitch) + (dst->x0 * dst->cpp); + + BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2); + PUSH_DATA (push, (src->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart); + PUSH_DATA (push, (dst->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart); + + while (h) { + unsigned lines = (h > 2047) ? 2047 : h; + + if (nouveau_pushbuf_space(push, 13, 2, 0) || + nouveau_pushbuf_refn (push, refs, 2)) + return; + + BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8); + PUSH_RELOC(push, src->bo, src_offset, NOUVEAU_BO_LOW, 0, 0); + PUSH_RELOC(push, dst->bo, dst_offset, NOUVEAU_BO_LOW, 0, 0); + PUSH_DATA (push, src->pitch); + PUSH_DATA (push, dst->pitch); + PUSH_DATA (push, w * src->cpp); + PUSH_DATA (push, lines); + PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 | + NV03_M2MF_FORMAT_OUTPUT_INC_1); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1); + PUSH_DATA (push, 0x00000000); + + h -= lines; + src_offset += src->pitch * lines; + dst_offset += dst->pitch * lines; + } +} + +static boolean +nv30_transfer_cpu(XFER_ARGS) +{ + if (nv30_transfer_scaled(src, dst)) + return FALSE; + return TRUE; +} + +static char * +linear_ptr(struct nv30_rect *rect, char *base, int x, int y, int z) +{ + return base + (y * rect->pitch) + (x * rect->cpp); +} + +static INLINE unsigned +swizzle2d(unsigned v, unsigned s) +{ + v = (v | (v << 8)) & 0x00ff00ff; + v = (v | (v << 4)) & 0x0f0f0f0f; + v = (v | (v << 2)) & 0x33333333; + v = (v | (v << 1)) & 0x55555555; + return v << s; +} + +static char * +swizzle2d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z) +{ + unsigned k = util_logbase2(MIN2(rect->w, rect->h)); + unsigned km = (1 << k) - 1; + unsigned nx = rect->w >> k; + unsigned tx = x >> k; + unsigned ty = y >> k; + unsigned m; + + m = swizzle2d(x & km, 0); + m |= swizzle2d(y & km, 1); + m += ((ty * nx) + tx) << k << k; + + return base + (m * rect->cpp); +} + +static char * +swizzle3d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z) +{ + unsigned w = rect->w >> 1; + unsigned h = rect->h >> 1; + unsigned d = rect->d >> 1; + unsigned i = 0, o; + unsigned v = 0; + + do { + o = i; + if (w) { + v |= (x & 1) << i++; + x >>= 1; + w >>= 1; + } + if (h) { + v |= (y & 1) << i++; + y >>= 1; + h >>= 1; + } + if (d) { + v |= (z & 1) << i++; + z >>= 1; + d >>= 1; + } + } while(o != i); + + return base + (v * rect->cpp); +} + +typedef char *(*get_ptr_t)(struct nv30_rect *, char *, int, int, int); + +static INLINE get_ptr_t +get_ptr(struct nv30_rect *rect) +{ + if (rect->pitch) + return linear_ptr; + + if (rect->d <= 1) + return swizzle2d_ptr; + + return swizzle3d_ptr; +} + +static void +nv30_transfer_rect_cpu(XFER_ARGS) +{ + get_ptr_t sp = get_ptr(src); + get_ptr_t dp = get_ptr(dst); + char *srcmap, *dstmap; + int x, y; + + nouveau_bo_map(src->bo, NOUVEAU_BO_RD, nv30->base.client); + nouveau_bo_map(dst->bo, NOUVEAU_BO_WR, nv30->base.client); + srcmap = src->bo->map + src->offset; + dstmap = dst->bo->map + dst->offset; + + for (y = 0; y < (dst->y1 - dst->y0); y++) { + for (x = 0; x < (dst->x1 - dst->x0); x++) { + memcpy(dp(dst, dstmap, dst->x0 + x, dst->y0 + y, dst->z), + sp(src, srcmap, src->x0 + x, src->y0 + y, src->z), dst->cpp); + } + } +} + +void +nv30_transfer_rect(struct nv30_context *nv30, enum nv30_transfer_filter filter, + struct nv30_rect *src, struct nv30_rect *dst) +{ + static const struct { + char *name; + boolean (*possible)(XFER_ARGS); + void (*execute)(XFER_ARGS); + } *method, methods[] = { + { "m2mf", nv30_transfer_m2mf, nv30_transfer_rect_m2mf }, + { "sifm", nv30_transfer_sifm, nv30_transfer_rect_sifm }, + { "blit", nv30_transfer_blit, nv30_transfer_rect_blit }, + { "rect", nv30_transfer_cpu, nv30_transfer_rect_cpu }, + {} + }; + + method = methods - 1; + while ((++method)->possible) { + if (method->possible(nv30, filter, src, dst)) { + method->execute(nv30, filter, src, dst); + return; + } + } + + assert(0); +} + +void +nv30_transfer_push_data(struct nouveau_context *nv, + struct nouveau_bo *bo, unsigned offset, unsigned domain, + unsigned size, void *data) +{ + /* use ifc, or scratch + copy_data? */ + fprintf(stderr, "nv30: push_data not implemented\n"); +} + +void +nv30_transfer_copy_data(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned d_off, unsigned d_dom, + struct nouveau_bo *src, unsigned s_off, unsigned s_dom, + unsigned size) +{ + struct nv04_fifo *fifo = nv->screen->channel->data; + struct nouveau_pushbuf_refn refs[] = { + { src, s_dom | NOUVEAU_BO_RD }, + { dst, d_dom | NOUVEAU_BO_WR }, + }; + struct nouveau_pushbuf *push = nv->pushbuf; + unsigned pages, lines; + + pages = size >> 12; + size -= (pages << 12); + + BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2); + PUSH_DATA (push, (s_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart); + PUSH_DATA (push, (d_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart); + + while (pages) { + lines = (pages > 2047) ? 2047 : pages; + pages -= lines; + + if (nouveau_pushbuf_space(push, 13, 2, 0) || + nouveau_pushbuf_refn (push, refs, 2)) + return; + + BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8); + PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0); + PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0); + PUSH_DATA (push, 4096); + PUSH_DATA (push, 4096); + PUSH_DATA (push, 4096); + PUSH_DATA (push, lines); + PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 | + NV03_M2MF_FORMAT_OUTPUT_INC_1); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1); + PUSH_DATA (push, 0x00000000); + + s_off += (lines << 12); + d_off += (lines << 12); + } + + if (size) { + if (nouveau_pushbuf_space(push, 13, 2, 0) || + nouveau_pushbuf_refn (push, refs, 2)) + return; + + BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8); + PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0); + PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0); + PUSH_DATA (push, size); + PUSH_DATA (push, size); + PUSH_DATA (push, size); + PUSH_DATA (push, 1); + PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 | + NV03_M2MF_FORMAT_OUTPUT_INC_1); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1); + PUSH_DATA (push, 0x00000000); + } +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_transfer.h b/src/gallium/drivers/nouveau/nv30/nv30_transfer.h new file mode 100644 index 00000000000..3fa6cd0c029 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_transfer.h @@ -0,0 +1,40 @@ +#ifndef __NV30_TRANSFER_H__ +#define __NV30_TRANSFER_H__ + +struct nv30_rect { + struct nouveau_bo *bo; + unsigned offset; + unsigned domain; + unsigned pitch; + unsigned cpp; + unsigned w; + unsigned h; + unsigned d; + unsigned z; + unsigned x0; + unsigned x1; + unsigned y0; + unsigned y1; +}; + +enum nv30_transfer_filter { + NEAREST = 0, + BILINEAR +}; + +void +nv30_transfer_rect(struct nv30_context *, enum nv30_transfer_filter filter, + struct nv30_rect *, struct nv30_rect *); + +void +nv30_transfer_push_data(struct nouveau_context *, + struct nouveau_bo *, unsigned offset, unsigned domain, + unsigned size, void *data); + +void +nv30_transfer_copy_data(struct nouveau_context *, + struct nouveau_bo *, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *, unsigned srcoff, unsigned srcdom, + unsigned size); + +#endif diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c new file mode 100644 index 00000000000..d9b3c3ed04a --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c @@ -0,0 +1,627 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "translate/translate.h" + +#include "nouveau_fence.h" +#include "nv_object.xml.h" +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_format.h" + +static void +nv30_emit_vtxattr(struct nv30_context *nv30, struct pipe_vertex_buffer *vb, + struct pipe_vertex_element *ve, unsigned attr) +{ + const unsigned nc = util_format_get_nr_components(ve->src_format); + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nv04_resource *res = nv04_resource(vb->buffer); + const struct util_format_description *desc = + util_format_description(ve->src_format); + const void *data; + float v[4]; + + data = nouveau_resource_map_offset(&nv30->base, res, vb->buffer_offset + + ve->src_offset, NOUVEAU_BO_RD); + + desc->unpack_rgba_float(v, 0, data, 0, 1, 1); + + switch (nc) { + case 4: + BEGIN_NV04(push, NV30_3D(VTX_ATTR_4F(attr)), 4); + PUSH_DATAf(push, v[0]); + PUSH_DATAf(push, v[1]); + PUSH_DATAf(push, v[2]); + PUSH_DATAf(push, v[3]); + break; + case 3: + BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(attr)), 3); + PUSH_DATAf(push, v[0]); + PUSH_DATAf(push, v[1]); + PUSH_DATAf(push, v[2]); + break; + case 2: + BEGIN_NV04(push, NV30_3D(VTX_ATTR_2F(attr)), 2); + PUSH_DATAf(push, v[0]); + PUSH_DATAf(push, v[1]); + break; + case 1: + BEGIN_NV04(push, NV30_3D(VTX_ATTR_1F(attr)), 1); + PUSH_DATAf(push, v[0]); + break; + default: + assert(0); + break; + } +} + +static INLINE void +nv30_vbuf_range(struct nv30_context *nv30, int vbi, + uint32_t *base, uint32_t *size) +{ + assert(nv30->vbo_max_index != ~0); + *base = nv30->vbo_min_index * nv30->vtxbuf[vbi].stride; + *size = (nv30->vbo_max_index - + nv30->vbo_min_index + 1) * nv30->vtxbuf[vbi].stride; +} + +static void +nv30_prevalidate_vbufs(struct nv30_context *nv30) +{ + struct pipe_vertex_buffer *vb; + struct nv04_resource *buf; + int i; + uint32_t base, size; + + nv30->vbo_fifo = nv30->vbo_user = 0; + + for (i = 0; i < nv30->num_vtxbufs; i++) { + vb = &nv30->vtxbuf[i]; + if (!vb->stride || !vb->buffer) /* NOTE: user_buffer not implemented */ + continue; + buf = nv04_resource(vb->buffer); + + /* NOTE: user buffers with temporary storage count as mapped by GPU */ + if (!nouveau_resource_mapped_by_gpu(vb->buffer)) { + if (nv30->vbo_push_hint) { + nv30->vbo_fifo = ~0; + continue; + } else { + if (buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) { + nv30->vbo_user |= 1 << i; + assert(vb->stride > vb->buffer_offset); + nv30_vbuf_range(nv30, i, &base, &size); + nouveau_user_buffer_upload(&nv30->base, buf, base, size); + } else { + nouveau_buffer_migrate(&nv30->base, buf, NOUVEAU_BO_GART); + } + nv30->base.vbo_dirty = TRUE; + } + } + } +} + +static void +nv30_update_user_vbufs(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + uint32_t base, offset, size; + int i; + uint32_t written = 0; + + for (i = 0; i < nv30->vertex->num_elements; i++) { + struct pipe_vertex_element *ve = &nv30->vertex->pipe[i]; + const int b = ve->vertex_buffer_index; + struct pipe_vertex_buffer *vb = &nv30->vtxbuf[b]; + struct nv04_resource *buf = nv04_resource(vb->buffer); + + if (!(nv30->vbo_user & (1 << b))) + continue; + + if (!vb->stride) { + nv30_emit_vtxattr(nv30, vb, ve, i); + continue; + } + nv30_vbuf_range(nv30, b, &base, &size); + + if (!(written & (1 << b))) { + written |= 1 << b; + nouveau_user_buffer_upload(&nv30->base, buf, base, size); + } + + offset = vb->buffer_offset + ve->src_offset; + + BEGIN_NV04(push, NV30_3D(VTXBUF(i)), 1); + PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP, buf, offset, + NOUVEAU_BO_LOW | NOUVEAU_BO_RD, + 0, NV30_3D_VTXBUF_DMA1); + } + nv30->base.vbo_dirty = TRUE; +} + +static INLINE void +nv30_release_user_vbufs(struct nv30_context *nv30) +{ + uint32_t vbo_user = nv30->vbo_user; + + while (vbo_user) { + int i = ffs(vbo_user) - 1; + vbo_user &= ~(1 << i); + + nouveau_buffer_release_gpu_storage(nv04_resource(nv30->vtxbuf[i].buffer)); + } + + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_VTXTMP); +} + +void +nv30_vbo_validate(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nv30_vertex_stateobj *vertex = nv30->vertex; + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + unsigned i, redefine; + + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_VTXBUF); + if (!nv30->vertex || nv30->draw_flags) + return; + + if (unlikely(vertex->need_conversion)) { + nv30->vbo_fifo = ~0; + nv30->vbo_user = 0; + } else { + nv30_prevalidate_vbufs(nv30); + } + + if (!PUSH_SPACE(push, 128)) + return; + + redefine = MAX2(vertex->num_elements, nv30->state.num_vtxelts); + BEGIN_NV04(push, NV30_3D(VTXFMT(0)), redefine); + + for (i = 0; i < vertex->num_elements; i++) { + ve = &vertex->pipe[i]; + vb = &nv30->vtxbuf[ve->vertex_buffer_index]; + + if (likely(vb->stride) || nv30->vbo_fifo) + PUSH_DATA (push, (vb->stride << 8) | vertex->element[i].state); + else + PUSH_DATA (push, NV30_3D_VTXFMT_TYPE_V32_FLOAT); + } + + for (; i < nv30->state.num_vtxelts; i++) { + PUSH_DATA (push, NV30_3D_VTXFMT_TYPE_V32_FLOAT); + } + + for (i = 0; i < vertex->num_elements; i++) { + struct nv04_resource *res; + unsigned offset; + boolean user; + + ve = &vertex->pipe[i]; + vb = &nv30->vtxbuf[ve->vertex_buffer_index]; + user = (nv30->vbo_user & (1 << ve->vertex_buffer_index)); + + res = nv04_resource(vb->buffer); + + if (nv30->vbo_fifo || unlikely(vb->stride == 0)) { + if (!nv30->vbo_fifo) + nv30_emit_vtxattr(nv30, vb, ve, i); + continue; + } + + offset = ve->src_offset + vb->buffer_offset; + + BEGIN_NV04(push, NV30_3D(VTXBUF(i)), 1); + PUSH_RESRC(push, NV30_3D(VTXBUF(i)), user ? BUFCTX_VTXTMP : BUFCTX_VTXBUF, + res, offset, NOUVEAU_BO_LOW | NOUVEAU_BO_RD, + 0, NV30_3D_VTXBUF_DMA1); + } + + nv30->state.num_vtxelts = vertex->num_elements; +} + +static void * +nv30_vertex_state_create(struct pipe_context *pipe, unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nv30_vertex_stateobj *so; + struct translate_key transkey; + unsigned i; + + assert(num_elements); + + so = MALLOC(sizeof(*so) + sizeof(*so->element) * num_elements); + if (!so) + return NULL; + memcpy(so->pipe, elements, sizeof(*elements) * num_elements); + so->num_elements = num_elements; + so->need_conversion = FALSE; + + transkey.nr_elements = 0; + transkey.output_stride = 0; + + for (i = 0; i < num_elements; i++) { + const struct pipe_vertex_element *ve = &elements[i]; + const unsigned vbi = ve->vertex_buffer_index; + enum pipe_format fmt = ve->src_format; + + so->element[i].state = nv30_vtxfmt(pipe->screen, fmt)->hw; + if (!so->element[i].state) { + switch (util_format_get_nr_components(fmt)) { + case 1: fmt = PIPE_FORMAT_R32_FLOAT; break; + case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break; + case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break; + case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break; + default: + assert(0); + FREE(so); + return NULL; + } + so->element[i].state = nv30_vtxfmt(pipe->screen, fmt)->hw; + so->need_conversion = TRUE; + } + + if (1) { + unsigned j = transkey.nr_elements++; + + transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL; + transkey.element[j].input_format = ve->src_format; + transkey.element[j].input_buffer = vbi; + transkey.element[j].input_offset = ve->src_offset; + transkey.element[j].instance_divisor = ve->instance_divisor; + + transkey.element[j].output_format = fmt; + transkey.element[j].output_offset = transkey.output_stride; + transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3; + } + } + + so->translate = translate_create(&transkey); + so->vtx_size = transkey.output_stride / 4; + so->vtx_per_packet_max = NV04_PFIFO_MAX_PACKET_LEN / MAX2(so->vtx_size, 1); + return so; +} + +static void +nv30_vertex_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_vertex_stateobj *so = hwcso; + + if (so->translate) + so->translate->release(so->translate); + FREE(hwcso); +} + +static void +nv30_vertex_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->vertex = hwcso; + nv30->dirty |= NV30_NEW_VERTEX; +} + +static void +nv30_draw_arrays(struct nv30_context *nv30, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + unsigned prim; + + prim = nv30_prim_gl(mode); + + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, prim); + while (count) { + const unsigned mpush = 2047 * 256; + unsigned npush = (count > mpush) ? mpush : count; + unsigned wpush = ((npush + 255) & ~255) >> 8; + + count -= npush; + + BEGIN_NI04(push, NV30_3D(VB_VERTEX_BATCH), wpush); + while (npush >= 256) { + PUSH_DATA (push, 0xff000000 | start); + start += 256; + npush -= 256; + } + + if (npush) + PUSH_DATA (push, ((npush - 1) << 24) | start); + } + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP); +} + +static void +nv30_draw_elements_inline_u08(struct nouveau_pushbuf *push, const uint8_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + BEGIN_NV04(push, NV30_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, *map++); + } + + count >>= 1; + while (count) { + unsigned npush = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); + count -= npush; + + BEGIN_NI04(push, NV30_3D(VB_ELEMENT_U16), npush); + while (npush--) { + PUSH_DATA (push, (map[1] << 16) | map[0]); + map += 2; + } + } + +} + +static void +nv30_draw_elements_inline_u16(struct nouveau_pushbuf *push, const uint16_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + BEGIN_NV04(push, NV30_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, *map++); + } + + count >>= 1; + while (count) { + unsigned npush = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); + count -= npush; + + BEGIN_NI04(push, NV30_3D(VB_ELEMENT_U16), npush); + while (npush--) { + PUSH_DATA (push, (map[1] << 16) | map[0]); + map += 2; + } + } +} + +static void +nv30_draw_elements_inline_u32(struct nouveau_pushbuf *push, const uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + while (count) { + const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_NI04(push, NV30_3D(VB_ELEMENT_U32), nr); + PUSH_DATAp(push, map, nr); + + map += nr; + count -= nr; + } +} + +static void +nv30_draw_elements_inline_u32_short(struct nouveau_pushbuf *push, + const uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + BEGIN_NV04(push, NV30_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, *map++); + } + + count >>= 1; + while (count) { + unsigned npush = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);; + count -= npush; + + BEGIN_NI04(push, NV30_3D(VB_ELEMENT_U16), npush); + while (npush--) { + PUSH_DATA (push, (map[1] << 16) | map[0]); + map += 2; + } + } +} + +static void +nv30_draw_elements(struct nv30_context *nv30, boolean shorten, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count, int32_t index_bias) +{ + const unsigned index_size = nv30->idxbuf.index_size; + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nouveau_object *eng3d = nv30->screen->eng3d; + unsigned prim = nv30_prim_gl(mode); + +#if 0 /*XXX*/ + if (index_bias != nv30->state.index_bias) { + BEGIN_NV04(push, NV30_3D(VB_ELEMENT_BASE), 1); + PUSH_DATA (push, index_bias); + nv30->state.index_bias = index_bias; + } +#endif + + if (eng3d->oclass == NV40_3D_CLASS && index_size > 1 && + nv30->idxbuf.buffer) { + struct nv04_resource *res = nv04_resource(nv30->idxbuf.buffer); + unsigned offset = nv30->idxbuf.offset; + + assert(nouveau_resource_mapped_by_gpu(&res->base)); + + BEGIN_NV04(push, NV30_3D(IDXBUF_OFFSET), 2); + PUSH_RESRC(push, NV30_3D(IDXBUF_OFFSET), BUFCTX_IDXBUF, res, offset, + NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); + PUSH_MTHD (push, NV30_3D(IDXBUF_FORMAT), BUFCTX_IDXBUF, res->bo, + (index_size == 2) ? 0x00000010 : 0x00000000, + res->domain | NOUVEAU_BO_RD, + 0, NV30_3D_IDXBUF_FORMAT_DMA1); + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, prim); + while (count) { + const unsigned mpush = 2047 * 256; + unsigned npush = (count > mpush) ? mpush : count; + unsigned wpush = ((npush + 255) & ~255) >> 8; + + count -= npush; + + BEGIN_NI04(push, NV30_3D(VB_INDEX_BATCH), wpush); + while (npush >= 256) { + PUSH_DATA (push, 0xff000000 | start); + start += 256; + npush -= 256; + } + + if (npush) + PUSH_DATA (push, ((npush - 1) << 24) | start); + } + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP); + PUSH_RESET(push, BUFCTX_IDXBUF); + } else { + const void *data; + if (nv30->idxbuf.buffer) + data = nouveau_resource_map_offset(&nv30->base, + nv04_resource(nv30->idxbuf.buffer), + nv30->idxbuf.offset, NOUVEAU_BO_RD); + else + data = nv30->idxbuf.user_buffer; + if (!data) + return; + + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, prim); + switch (index_size) { + case 1: + nv30_draw_elements_inline_u08(push, data, start, count); + break; + case 2: + nv30_draw_elements_inline_u16(push, data, start, count); + break; + case 4: + if (shorten) + nv30_draw_elements_inline_u32_short(push, data, start, count); + else + nv30_draw_elements_inline_u32(push, data, start, count); + break; + default: + assert(0); + return; + } + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP); + } +} + +static void +nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_pushbuf *push = nv30->base.pushbuf; + + /* For picking only a few vertices from a large user buffer, push is better, + * if index count is larger and we expect repeated vertices, suggest upload. + */ + nv30->vbo_push_hint = /* the 64 is heuristic */ + !(info->indexed && + ((info->max_index - info->min_index + 64) < info->count)); + + nv30->vbo_min_index = info->min_index; + nv30->vbo_max_index = info->max_index; + + if (nv30->vbo_push_hint != !!nv30->vbo_fifo) + nv30->dirty |= NV30_NEW_ARRAYS; + + push->user_priv = &nv30->bufctx; + if (nv30->vbo_user && !(nv30->dirty & (NV30_NEW_VERTEX | NV30_NEW_ARRAYS))) + nv30_update_user_vbufs(nv30); + + nv30_state_validate(nv30, TRUE); + if (nv30->draw_flags) { + nv30_render_vbo(pipe, info); + return; + } else + if (nv30->vbo_fifo) { + nv30_push_vbo(nv30, info); + return; + } + + if (nv30->base.vbo_dirty) { + BEGIN_NV04(push, NV30_3D(VTX_CACHE_INVALIDATE_1710), 1); + PUSH_DATA (push, 0); + nv30->base.vbo_dirty = FALSE; + } + + if (!info->indexed) { + nv30_draw_arrays(nv30, + info->mode, info->start, info->count, + info->instance_count); + } else { + boolean shorten = info->max_index <= 65535; + + if (info->primitive_restart != nv30->state.prim_restart) { + if (info->primitive_restart) { + BEGIN_NV04(push, NV40_3D(PRIM_RESTART_ENABLE), 2); + PUSH_DATA (push, 1); + PUSH_DATA (push, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } else { + BEGIN_NV04(push, NV40_3D(PRIM_RESTART_ENABLE), 1); + PUSH_DATA (push, 0); + } + nv30->state.prim_restart = info->primitive_restart; + } else + if (info->primitive_restart) { + BEGIN_NV04(push, NV40_3D(PRIM_RESTART_INDEX), 1); + PUSH_DATA (push, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } + + nv30_draw_elements(nv30, shorten, + info->mode, info->start, info->count, + info->instance_count, info->index_bias); + } + + nv30_state_release(nv30); + nv30_release_user_vbufs(nv30); +} + +void +nv30_vbo_init(struct pipe_context *pipe) +{ + pipe->create_vertex_elements_state = nv30_vertex_state_create; + pipe->delete_vertex_elements_state = nv30_vertex_state_delete; + pipe->bind_vertex_elements_state = nv30_vertex_state_bind; + pipe->draw_vbo = nv30_draw_vbo; +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c new file mode 100644 index 00000000000..7bf05ddfe0a --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c @@ -0,0 +1,258 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "util/u_dynarray.h" +#include "tgsi/tgsi_parse.h" + +#include "nv_object.xml.h" +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_state.h" + +static void +nv30_vertprog_destroy(struct nv30_vertprog *vp) +{ + util_dynarray_fini(&vp->branch_relocs); + nouveau_heap_free(&vp->exec); + FREE(vp->insns); + vp->insns = NULL; + vp->nr_insns = 0; + + util_dynarray_fini(&vp->const_relocs); + nouveau_heap_free(&vp->data); + FREE(vp->consts); + vp->consts = NULL; + vp->nr_consts = 0; + + vp->translated = FALSE; +} + +void +nv30_vertprog_validate(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nouveau_object *eng3d = nv30->screen->eng3d; + struct nv30_vertprog *vp = nv30->vertprog.program; + struct nv30_fragprog *fp = nv30->fragprog.program; + boolean upload_code = FALSE; + boolean upload_data = FALSE; + unsigned i; + + if (nv30->dirty & NV30_NEW_FRAGPROG) { + if (memcmp(vp->texcoord, fp->texcoord, sizeof(vp->texcoord))) { + if (vp->translated) + nv30_vertprog_destroy(vp); + memcpy(vp->texcoord, fp->texcoord, sizeof(vp->texcoord)); + } + } + + if (nv30->rast && nv30->rast->pipe.clip_plane_enable != vp->enabled_ucps) { + vp->enabled_ucps = nv30->rast->pipe.clip_plane_enable; + if (vp->translated) + nv30_vertprog_destroy(vp); + } + + if (!vp->translated) { + vp->translated = _nvfx_vertprog_translate(nv30, vp); + if (!vp->translated) { + nv30->draw_flags |= NV30_NEW_VERTPROG; + return; + } + nv30->dirty |= NV30_NEW_VERTPROG; + } + + if (!vp->exec) { + struct nouveau_heap *heap = nv30->screen->vp_exec_heap; + struct nv30_shader_reloc *reloc = vp->branch_relocs.data; + unsigned nr_reloc = vp->branch_relocs.size / sizeof(*reloc); + uint32_t *inst, target; + + if (nouveau_heap_alloc(heap, vp->nr_insns, &vp->exec, &vp->exec)) { + while (heap->next && heap->size < vp->nr_insns) { + struct nouveau_heap **evict = heap->next->priv; + nouveau_heap_free(evict); + } + + if (nouveau_heap_alloc(heap, vp->nr_insns, &vp->exec, &vp->exec)) { + nv30->draw_flags |= NV30_NEW_VERTPROG; + return; + } + } + + if (eng3d->oclass < NV40_3D_CLASS) { + while (nr_reloc--) { + inst = vp->insns[reloc->location].data; + target = vp->exec->start + reloc->target; + + inst[2] &= ~0x000007fc; + inst[2] |= target << 2; + reloc++; + } + } else { + while (nr_reloc--) { + inst = vp->insns[reloc->location].data; + target = vp->exec->start + reloc->target; + + inst[2] &= ~0x0000003f; + inst[2] |= target >> 3; + inst[3] &= ~0xe0000000; + inst[3] |= target << 29; + reloc++; + } + } + + upload_code = TRUE; + } + + if (vp->nr_consts && !vp->data) { + struct nouveau_heap *heap = nv30->screen->vp_data_heap; + struct nv30_shader_reloc *reloc = vp->const_relocs.data; + unsigned nr_reloc = vp->const_relocs.size / sizeof(*reloc); + uint32_t *inst, target; + + if (nouveau_heap_alloc(heap, vp->nr_consts, vp, &vp->data)) { + while (heap->next && heap->size < vp->nr_consts) { + struct nv30_vertprog *evp = heap->next->priv; + nouveau_heap_free(&evp->data); + } + + if (nouveau_heap_alloc(heap, vp->nr_consts, vp, &vp->data)) { + nv30->draw_flags |= NV30_NEW_VERTPROG; + return; + } + } + + if (eng3d->oclass < NV40_3D_CLASS) { + while (nr_reloc--) { + inst = vp->insns[reloc->location].data; + target = vp->data->start + reloc->target; + + inst[1] &= ~0x0007fc000; + inst[1] |= (target & 0x1ff) << 14; + reloc++; + } + } else { + while (nr_reloc--) { + inst = vp->insns[reloc->location].data; + target = vp->data->start + reloc->target; + + inst[1] &= ~0x0001ff000; + inst[1] |= (target & 0x1ff) << 12; + reloc++; + } + } + + upload_code = TRUE; + upload_data = TRUE; + } + + if (vp->nr_consts) { + struct nv04_resource *res = nv04_resource(nv30->vertprog.constbuf); + + for (i = 0; i < vp->nr_consts; i++) { + struct nv30_vertprog_data *data = &vp->consts[i]; + + if (data->index < 0) { + if (!upload_data) + continue; + } else { + float *constbuf = (float *)res->data; + if (!upload_data && + !memcmp(data->value, &constbuf[data->index * 4], 16)) + continue; + memcpy(data->value, &constbuf[data->index * 4], 16); + } + + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5); + PUSH_DATA (push, vp->data->start + i); + PUSH_DATAp(push, data->value, 4); + } + } + + if (upload_code) { + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1); + PUSH_DATA (push, vp->exec->start); + for (i = 0; i < vp->nr_insns; i++) { + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); + PUSH_DATAp(push, vp->insns[i].data, 4); + } + } + + if (nv30->dirty & (NV30_NEW_VERTPROG | NV30_NEW_FRAGPROG)) { + BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1); + PUSH_DATA (push, vp->exec->start); + if (eng3d->oclass < NV40_3D_CLASS) { + BEGIN_NV04(push, NV30_3D(ENGINE), 1); + PUSH_DATA (push, 0x00000013); /* vp instead of ff, somehow */ + } else { + BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2); + PUSH_DATA (push, vp->ir); + PUSH_DATA (push, vp->or | fp->vp_or); + BEGIN_NV04(push, NV30_3D(ENGINE), 1); + PUSH_DATA (push, 0x00000011); + } + } +} + +static void * +nv30_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv30_vertprog *vp = CALLOC_STRUCT(nv30_vertprog); + if (!vp) + return NULL; + + vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + tgsi_scan_shader(vp->pipe.tokens, &vp->info); + return vp; +} + +static void +nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_vertprog *vp = hwcso; + + if (vp->translated) + nv30_vertprog_destroy(vp); + FREE((void *)vp->pipe.tokens); + FREE(vp); +} + +static void +nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->vertprog.program = hwcso; + nv30->dirty |= NV30_NEW_VERTPROG; +} + +void +nv30_vertprog_init(struct pipe_context *pipe) +{ + pipe->create_vs_state = nv30_vp_state_create; + pipe->bind_vs_state = nv30_vp_state_bind; + pipe->delete_vs_state = nv30_vp_state_delete; +} diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h new file mode 100644 index 00000000000..5556e0c77bd --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h @@ -0,0 +1,176 @@ +#ifndef __NV30_SHADER_H__ +#define __NV30_SHADER_H__ + +/* Vertex programs instruction set + * + * 128bit opcodes, split into 4 32-bit ones for ease of use. + * + * Non-native instructions + * ABS - MOV + NV40_VP_INST0_DEST_ABS + * POW - EX2 + MUL + LG2 + * SUB - ADD, second source negated + * SWZ - MOV + * XPD - + * + * Register access + * - Only one INPUT can be accessed per-instruction (move extras into TEMPs) + * - Only one CONST can be accessed per-instruction (move extras into TEMPs) + * + * Relative Addressing + * According to the value returned for + * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB + * + * there are only two address registers available. The destination in the + * ARL instruction is set to TEMP <n> (The temp isn't actually written). + * + * When using vanilla ARB_v_p, the proprietary driver will squish both the + * available ADDRESS regs into the first hardware reg in the X and Y + * components. + * + * To use an address reg as an index into consts, the CONST_SRC is set to + * (const_base + offset) and INDEX_CONST is set. + * + * To access the second address reg use ADDR_REG_SELECT_1. A particular + * component of the address regs is selected with ADDR_SWZ. + * + * Only one address register can be accessed per instruction. + * + * Conditional execution (see NV_vertex_program{2,3} for details) Conditional + * execution of an instruction is enabled by setting COND_TEST_ENABLE, and + * selecting the condition which will allow the test to pass with + * COND_{FL,LT,...}. It is possible to swizzle the values in the condition + * register, which allows for testing against an individual component. + * + * Branching: + * + * The BRA/CAL instructions seem to follow a slightly different opcode + * layout. The destination instruction ID (IADDR) overlaps a source field. + * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO + * command, and is incremented automatically on each UPLOAD_INST FIFO + * command. + * + * Conditional branching is achieved by using the condition tests described + * above. There doesn't appear to be dedicated looping instructions, but + * this can be done using a temp reg + conditional branching. + * + * Subroutines may be uploaded before the main program itself, but the first + * executed instruction is determined by the PROGRAM_START_ID FIFO command. + * + */ + +/* DWORD 0 */ + +/* guess that this is the same as nv40 */ +#define NV30_VP_INST_INDEX_INPUT (1 << 27) + +#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ +#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ +#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */ +#define NV30_VP_INST_VEC_RESULT (1 << 20) +#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16 +#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16) +#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15) +#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 16) +#define NV30_VP_INST_COND_TEST_ENABLE (1<<14) +#define NV30_VP_INST_COND_SHIFT 11 +#define NV30_VP_INST_COND_MASK (0x07 << 11) +#define NV30_VP_INST_COND_SWZ_X_SHIFT 9 +#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9) +#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7 +#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7) +#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5 +#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5) +#define NV30_VP_INST_COND_SWZ_W_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3) +#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3) +#define NV30_VP_INST_ADDR_SWZ_SHIFT 1 +#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1) +#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0 +#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0) + +/* DWORD 1 */ +#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28 +#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28) +#define NV30_VP_INST_VEC_OPCODE_SHIFT 23 +#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23) +#define NV30_VP_INST_CONST_SRC_SHIFT 14 +#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14) +#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/ +#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/ +#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/ + +/* Please note: the IADDR fields overlap other fields because they are used + * only for branch instructions. See Branching: label above + * + * DWORD 2 + */ +#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/ +#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */ +#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/ +#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/ +#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/ +#define NV30_VP_INST_IADDR_SHIFT 2 +#define NV30_VP_INST_IADDR_MASK (0x1FF << 2) /* NV30_VP_SRC2_LOW_MASK << 28 */ + +/* DWORD 3 */ +#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/ +#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/ +#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24 +#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24) +#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20 +#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20) +#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16 +#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16) +#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ +#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ +#define NV30_VP_INST_DEST_SHIFT 2 +#define NV30_VP_INST_DEST_MASK (0x1F << 2) +# define NV30_VP_INST_DEST_POS 0 +# define NV30_VP_INST_DEST_BFC0 1 +# define NV30_VP_INST_DEST_BFC1 2 +# define NV30_VP_INST_DEST_COL0 3 +# define NV30_VP_INST_DEST_COL1 4 +# define NV30_VP_INST_DEST_FOGC 5 +# define NV30_VP_INST_DEST_PSZ 6 +# define NV30_VP_INST_DEST_TC(n) (8+(n)) +# define NV30_VP_INST_DEST_CLP(n) (17 + (n)) + +/* guess that this is the same as nv40 */ +#define NV30_VP_INST_INDEX_CONST (1 << 1) + +/* Useful to split the source selection regs into their pieces */ +#define NV30_VP_SRC0_HIGH_SHIFT 6 +#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0 +#define NV30_VP_SRC0_LOW_MASK 0x0000003F +#define NV30_VP_SRC2_HIGH_SHIFT 4 +#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0 +#define NV30_VP_SRC2_LOW_MASK 0x0000000F + + +/* Source-register definition - matches NV20 exactly */ +#define NV30_VP_SRC_NEGATE (1<<14) +#define NV30_VP_SRC_SWZ_X_SHIFT 12 +#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12) +#define NV30_VP_SRC_SWZ_Y_SHIFT 10 +#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10) +#define NV30_VP_SRC_SWZ_Z_SHIFT 8 +#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8) +#define NV30_VP_SRC_SWZ_W_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6) +#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6) +#define NV30_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0) +#define NV30_VP_SRC_REG_TYPE_SHIFT 0 +#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0) +#define NV30_VP_SRC_REG_TYPE_TEMP 1 +#define NV30_VP_SRC_REG_TYPE_INPUT 2 +#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */ + +#include "nv30/nvfx_shader.h" + +#endif diff --git a/src/gallium/drivers/nouveau/nv30/nv30_winsys.h b/src/gallium/drivers/nouveau/nv30/nv30_winsys.h new file mode 100644 index 00000000000..5cee5df60ce --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv30_winsys.h @@ -0,0 +1,158 @@ +#ifndef __NV30_WINSYS_H__ +#define __NV30_WINSYS_H__ + +#include <string.h> +#include "nouveau_winsys.h" +#include "nouveau_buffer.h" + +/*XXX: rnn */ +#define NV40_3D_VTXTEX_OFFSET(i) (0x0900 + ((i) * 0x20)) // 401e80 +#define NV40_3D_VTXTEX_FORMAT(i) (0x0904 + ((i) * 0x20)) // 401e90 +#define NV40_3D_VTXTEX_WRAP(i) (0x0908 + ((i) * 0x20)) // 401ea0 +#define NV40_3D_VTXTEX_ENABLE(i) (0x090c + ((i) * 0x20)) // 401eb0 +#define NV40_3D_VTXTEX_SWZ(i) (0x0910 + ((i) * 0x20)) // 401ec0 +#define NV40_3D_VTXTEX_FILTER(i) (0x0914 + ((i) * 0x20)) // 401ed0 +#define NV40_3D_VTXTEX_SIZE(i) (0x0918 + ((i) * 0x20)) // 401ee0 +#define NV40_3D_VTXTEX_BCOL(i) (0x091c + ((i) * 0x20)) // 401ef0 +#define NV30_3D_VTX_CACHE_INVALIDATE_1710 0x1710 +#define NV30_3D_R1718 0x1718 +#define NV40_3D_PRIM_RESTART_ENABLE 0x1dac +#define NV40_3D_PRIM_RESTART_INDEX 0x1db0 + +static INLINE void +PUSH_RELOC(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t offset, + uint32_t flags, uint32_t vor, uint32_t tor) +{ + nouveau_pushbuf_reloc(push, bo, offset, flags, vor, tor); +} + +static INLINE struct nouveau_bufctx * +bufctx(struct nouveau_pushbuf *push) +{ + struct nouveau_bufctx **pctx = push->user_priv; + return *pctx; +} + +static INLINE void +PUSH_RESET(struct nouveau_pushbuf *push, int bin) +{ + nouveau_bufctx_reset(bufctx(push), bin); +} + +static INLINE void +PUSH_REFN(struct nouveau_pushbuf *push, int bin, + struct nouveau_bo *bo, uint32_t access) +{ + nouveau_bufctx_refn(bufctx(push), bin, bo, access); +} + +static INLINE void +PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd, int bin, + struct nouveau_bo *bo, uint32_t offset, uint32_t access) +{ + nouveau_bufctx_mthd(bufctx(push), bin, (1 << 18) | (subc << 13) | mthd, + bo, offset, access | NOUVEAU_BO_LOW, 0, 0)->priv = NULL; + PUSH_DATA(push, bo->offset + offset); +} + +static INLINE void +PUSH_MTHDo(struct nouveau_pushbuf *push, int subc, int mthd, int bin, + struct nouveau_bo *bo, uint32_t access, uint32_t vor, uint32_t tor) +{ + nouveau_bufctx_mthd(bufctx(push), bin, (1 << 18) | (subc << 13) | mthd, + bo, 0, access | NOUVEAU_BO_OR, vor, tor)->priv = NULL; + if (bo->flags & NOUVEAU_BO_VRAM) + PUSH_DATA(push, vor); + else + PUSH_DATA(push, tor); +} + +static INLINE void +PUSH_MTHDs(struct nouveau_pushbuf *push, int subc, int mthd, int bin, + struct nouveau_bo *bo, uint32_t data, uint32_t access, + uint32_t vor, uint32_t tor) +{ + nouveau_bufctx_mthd(bufctx(push), bin, (1 << 18) | (subc << 13) | mthd, + bo, data, access | NOUVEAU_BO_OR, vor, tor)->priv = NULL; + if (bo->flags & NOUVEAU_BO_VRAM) + PUSH_DATA(push, data | vor); + else + PUSH_DATA(push, data | tor); +} + +static INLINE struct nouveau_bufref * +PUSH_MTHD(struct nouveau_pushbuf *push, int subc, int mthd, int bin, + struct nouveau_bo *bo, uint32_t data, uint32_t access, + uint32_t vor, uint32_t tor) +{ + struct nouveau_bufref *bref = + nouveau_bufctx_mthd(bufctx(push), bin, (1 << 18) | (subc << 13) | mthd, + bo, data, access | NOUVEAU_BO_OR, vor, tor); + if (access & NOUVEAU_BO_LOW) + data += bo->offset; + if (bo->flags & NOUVEAU_BO_VRAM) + data |= vor; + else + data |= tor; + PUSH_DATA(push, data); + bref->priv = NULL; + return bref; +} + +static INLINE void +PUSH_RESRC(struct nouveau_pushbuf *push, int subc, int mthd, int bin, + struct nv04_resource *r, uint32_t data, uint32_t access, + uint32_t vor, uint32_t tor) +{ + PUSH_MTHD(push, subc, mthd, bin, r->bo, r->offset + data, + r->domain | access, vor, tor)->priv = r; +} + +static INLINE void +BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, int size) +{ + PUSH_SPACE(push, size + 1); + PUSH_DATA (push, 0x00000000 | (size << 18) | (subc << 13) | mthd); +} + +static INLINE void +BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, int size) +{ + PUSH_SPACE(push, size + 1); + PUSH_DATA (push, 0x40000000 | (size << 18) | (subc << 13) | mthd); +} + +/* subchannel assignment + * + * 0: <1.0.0 - used by kernel for m2mf + * 1.0.0 - used by kernel for nvsw + * + * 1: <1.0.0 - used by kernel for nvsw + * 1.0.0 - free for userspace + * + * 2-7: free for userspace on all kernel versions + */ + +#define SUBC_M2MF(mthd) 2, (mthd) +#define NV03_M2MF(mthd) SUBC_M2MF(NV03_M2MF_##mthd) + +#define SUBC_SF2D(mthd) 3, (mthd) +#define NV04_SF2D(mthd) SUBC_SF2D(NV04_SURFACE_2D_##mthd) + +#define SUBC_SSWZ(mthd) 4, (mthd) +#define NV04_SSWZ(mthd) SUBC_SSWZ(NV04_SURFACE_SWZ_##mthd) + +#define SUBC_SIFM(mthd) 5, (mthd) +#define NV03_SIFM(mthd) SUBC_SIFM(NV03_SIFM_##mthd) +#define NV05_SIFM(mthd) SUBC_SIFM(NV05_SIFM_##mthd) + +#define SUBC_3D(mthd) 7, (mthd) +#define NV30_3D(mthd) SUBC_3D(NV30_3D_##mthd) +#define NV40_3D(mthd) SUBC_3D(NV40_3D_##mthd) + +#define NV01_SUBC(subc, mthd) SUBC_##subc((NV01_SUBCHAN_##mthd)) +#define NV11_SUBC(subc, mthd) SUBC_##subc((NV11_SUBCHAN_##mthd)) + +#define NV04_GRAPH(subc, mthd) SUBC_##subc((NV04_GRAPH_##mthd)) + +#endif diff --git a/src/gallium/drivers/nouveau/nv30/nv40_vertprog.h b/src/gallium/drivers/nouveau/nv30/nv40_vertprog.h new file mode 100644 index 00000000000..b369ced886e --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv40_vertprog.h @@ -0,0 +1,178 @@ +#ifndef __NV40_SHADER_H__ +#define __NV40_SHADER_H__ + +/* Vertex programs instruction set + * + * The NV40 instruction set is very similar to NV30. Most fields are in + * a slightly different position in the instruction however. + * + * Merged instructions + * In some cases it is possible to put two instructions into one opcode + * slot. The rules for when this is OK is not entirely clear to me yet. + * + * There are separate writemasks and dest temp register fields for each + * grouping of instructions. There is however only one field with the + * ID of a result register. Writing to temp/result regs is selected by + * setting VEC_RESULT/SCA_RESULT. + * + * Temporary registers + * The source/dest temp register fields have been extended by 1 bit, to + * give a total of 32 temporary registers. + * + * Relative Addressing + * NV40 can use an address register to index into vertex attribute regs. + * This is done by putting the offset value into INPUT_SRC and setting + * the INDEX_INPUT flag. + * + * Conditional execution (see NV_vertex_program{2,3} for details) + * There is a second condition code register on NV40, it's use is enabled + * by setting the COND_REG_SELECT_1 flag. + * + * Texture lookup + * TODO + */ + +/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ +#define NV40_VP_INST_VEC_RESULT (1 << 30) +/* uncertain.. */ +#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) +/* use address reg as index into attribs */ +#define NV40_VP_INST_INDEX_INPUT (1 << 27) +#define NV40_VP_INST_SATURATE (1 << 26) +#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) +#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV40_VP_INST_SRC2_ABS (1 << 23) +#define NV40_VP_INST_SRC1_ABS (1 << 22) +#define NV40_VP_INST_SRC0_ABS (1 << 21) +#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 +#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x3F << 15) +#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) +#define NV40_VP_INST_COND_SHIFT 10 +#define NV40_VP_INST_COND_MASK (0x7 << 10) +#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 +#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) +#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 +#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) +#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 +#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) +#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) +#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) +#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 +#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) +#define NV40_VP_INST0_KNOWN ( \ + NV40_VP_INST_INDEX_INPUT | \ + NV40_VP_INST_COND_REG_SELECT_1 | \ + NV40_VP_INST_ADDR_REG_SELECT_1 | \ + NV40_VP_INST_SRC2_ABS | \ + NV40_VP_INST_SRC1_ABS | \ + NV40_VP_INST_SRC0_ABS | \ + NV40_VP_INST_VEC_DEST_TEMP_MASK | \ + NV40_VP_INST_COND_TEST_ENABLE | \ + NV40_VP_INST_COND_MASK | \ + NV40_VP_INST_COND_SWZ_ALL_MASK | \ + NV40_VP_INST_ADDR_SWZ_MASK) + +/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ +#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 +#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) +#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 +#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) +#define NV40_VP_INST_CONST_SRC_SHIFT 12 +#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) +#define NV40_VP_INST_INPUT_SRC_SHIFT 8 +#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) +#define NV40_VP_INST_SRC0H_SHIFT 0 +#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) +#define NV40_VP_INST1_KNOWN ( \ + NV40_VP_INST_VEC_OPCODE_MASK | \ + NV40_VP_INST_SCA_OPCODE_MASK | \ + NV40_VP_INST_CONST_SRC_MASK | \ + NV40_VP_INST_INPUT_SRC_MASK | \ + NV40_VP_INST_SRC0H_MASK \ + ) + +/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ +#define NV40_VP_INST_SRC0L_SHIFT 23 +#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) +#define NV40_VP_INST_SRC1_SHIFT 6 +#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) +#define NV40_VP_INST_SRC2H_SHIFT 0 +#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) +#define NV40_VP_INST_IADDRH_SHIFT 0 +#define NV40_VP_INST_IADDRH_MASK (0x3F << 0) + +/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ +#define NV40_VP_INST_IADDRL_SHIFT 29 +#define NV40_VP_INST_IADDRL_MASK (7 << 29) +#define NV40_VP_INST_SRC2L_SHIFT 21 +#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) +#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 +#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) +# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) +# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) +# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) +# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) +#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 +#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) +# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) +# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) +# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) +# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) +#define NV40_VP_INST_SCA_RESULT (1 << 12) +#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 +#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) +#define NV40_VP_INST_DEST_SHIFT 2 +#define NV40_VP_INST_DEST_MASK (31 << 2) +# define NV40_VP_INST_DEST_POS 0 +# define NV40_VP_INST_DEST_COL0 1 +# define NV40_VP_INST_DEST_COL1 2 +# define NV40_VP_INST_DEST_BFC0 3 +# define NV40_VP_INST_DEST_BFC1 4 +# define NV40_VP_INST_DEST_FOGC 5 +# define NV40_VP_INST_DEST_PSZ 6 +# define NV40_VP_INST_DEST_TC0 7 +# define NV40_VP_INST_DEST_TC(n) (7+n) +# define NV40_VP_INST_DEST_TEMP 0x1F +#define NV40_VP_INST_INDEX_CONST (1 << 1) +#define NV40_VP_INST3_KNOWN ( \ + NV40_VP_INST_SRC2L_MASK |\ + NV40_VP_INST_SCA_WRITEMASK_MASK |\ + NV40_VP_INST_VEC_WRITEMASK_MASK |\ + NV40_VP_INST_SCA_DEST_TEMP_MASK |\ + NV40_VP_INST_DEST_MASK |\ + NV40_VP_INST_INDEX_CONST) + +/* Useful to split the source selection regs into their pieces */ +#define NV40_VP_SRC0_HIGH_SHIFT 9 +#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 +#define NV40_VP_SRC0_LOW_MASK 0x000001FF +#define NV40_VP_SRC2_HIGH_SHIFT 11 +#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 +#define NV40_VP_SRC2_LOW_MASK 0x000007FF + +/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ +#define NV40_VP_SRC_NEGATE (1 << 16) +#define NV40_VP_SRC_SWZ_X_SHIFT 14 +#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) +#define NV40_VP_SRC_SWZ_Y_SHIFT 12 +#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) +#define NV40_VP_SRC_SWZ_Z_SHIFT 10 +#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) +#define NV40_VP_SRC_SWZ_W_SHIFT 8 +#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) +#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 +#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) +#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) +#define NV40_VP_SRC_REG_TYPE_SHIFT 0 +#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) +# define NV40_VP_SRC_REG_TYPE_UNK0 0 +# define NV40_VP_SRC_REG_TYPE_TEMP 1 +# define NV40_VP_SRC_REG_TYPE_INPUT 2 +# define NV40_VP_SRC_REG_TYPE_CONST 3 + +#include "nv30/nvfx_shader.h" + +#endif diff --git a/src/gallium/drivers/nouveau/nv30/nv40_verttex.c b/src/gallium/drivers/nouveau/nv30/nv40_verttex.c new file mode 100644 index 00000000000..9a7163c448f --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nv40_verttex.c @@ -0,0 +1,100 @@ +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#include "util/u_inlines.h" +#include "nv30/nv30_context.h" + +void +nv40_verttex_validate(struct nv30_context *nv30) +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + unsigned dirty = nv30->vertprog.dirty_samplers; + + while (dirty) { + unsigned unit = ffs(dirty) - 1; + struct nv30_sampler_view *sv = (void *)nv30->fragprog.textures[unit]; + struct nv30_sampler_state *ss = nv30->fragprog.samplers[unit]; + + if (ss && sv) { + } else { + BEGIN_NV04(push, NV40_3D(VTXTEX_ENABLE(unit)), 1); + PUSH_DATA (push, 0); + } + } + + nv30->vertprog.dirty_samplers = 0; +} + +static void +nv40_verttex_sampler_states_bind(struct pipe_context *pipe, + unsigned nr, void **hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + unsigned i; + + for (i = 0; i < nr; i++) { + nv30->vertprog.samplers[i] = hwcso[i]; + nv30->vertprog.dirty_samplers |= (1 << i); + } + + for (; i < nv30->vertprog.num_samplers; i++) { + nv30->vertprog.samplers[i] = NULL; + nv30->vertprog.dirty_samplers |= (1 << i); + } + + nv30->vertprog.num_samplers = nr; + nv30->dirty |= NV30_NEW_VERTTEX; +} + + +static void +nv40_verttex_set_sampler_views(struct pipe_context *pipe, unsigned nr, + struct pipe_sampler_view **views) +{ + struct nv30_context *nv30 = nv30_context(pipe); + unsigned i; + + for (i = 0; i < nr; i++) { + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_VERTTEX(i)); + pipe_sampler_view_reference(&nv30->vertprog.textures[i], views[i]); + nv30->vertprog.dirty_samplers |= (1 << i); + } + + for (; i < nv30->vertprog.num_textures; i++) { + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_VERTTEX(i)); + pipe_sampler_view_reference(&nv30->vertprog.textures[i], NULL); + nv30->vertprog.dirty_samplers |= (1 << i); + } + + nv30->vertprog.num_textures = nr; + nv30->dirty |= NV30_NEW_VERTTEX; +} + +void +nv40_verttex_init(struct pipe_context *pipe) +{ + pipe->bind_vertex_sampler_states = nv40_verttex_sampler_states_bind; + pipe->set_vertex_sampler_views = nv40_verttex_set_sampler_views; +} diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c new file mode 100644 index 00000000000..4751ec80de5 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c @@ -0,0 +1,1251 @@ +#include <float.h> +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_linkage.h" +#include "util/u_inlines.h" +#include "util/u_debug.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_ureg.h" + +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_context.h" +#include "nv30/nvfx_shader.h" + +struct nvfx_fpc { + struct nv30_fragprog *fp; + + unsigned max_temps; + unsigned long long r_temps; + unsigned long long r_temps_discard; + struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nvfx_reg r_input[PIPE_MAX_SHADER_INPUTS]; + struct nvfx_reg *r_temp; + + int num_regs; + + unsigned inst_offset; + unsigned have_const; + + struct util_dynarray imm_data; + + struct nvfx_reg* r_imm; + unsigned nr_imm; + + struct util_dynarray if_stack; + //struct util_dynarray loop_stack; + struct util_dynarray label_relocs; +}; + +static INLINE struct nvfx_reg +temp(struct nvfx_fpc *fpc) +{ + int idx = __builtin_ctzll(~fpc->r_temps); + + if (idx >= fpc->max_temps) { + NOUVEAU_ERR("out of temps!!\n"); + assert(0); + return nvfx_reg(NVFXSR_TEMP, 0); + } + + fpc->r_temps |= (1ULL << idx); + fpc->r_temps_discard |= (1ULL << idx); + return nvfx_reg(NVFXSR_TEMP, idx); +} + +static INLINE void +release_temps(struct nvfx_fpc *fpc) +{ + fpc->r_temps &= ~fpc->r_temps_discard; + fpc->r_temps_discard = 0ULL; +} + +static inline struct nvfx_reg +nvfx_fp_imm(struct nvfx_fpc *fpc, float a, float b, float c, float d) +{ + float v[4] = {a, b, c, d}; + int idx = fpc->imm_data.size >> 4; + + memcpy(util_dynarray_grow(&fpc->imm_data, sizeof(float) * 4), v, 4 * sizeof(float)); + return nvfx_reg(NVFXSR_IMM, idx); +} + +static void +grow_insns(struct nvfx_fpc *fpc, int size) +{ + struct nv30_fragprog *fp = fpc->fp; + + fp->insn_len += size; + fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); +} + +static void +emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_src src) +{ + struct nv30_fragprog *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + uint32_t sr = 0; + + switch (src.reg.type) { + case NVFXSR_INPUT: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); + hw[0] |= (src.reg.index << NVFX_FP_OP_INPUT_SRC_SHIFT); + break; + case NVFXSR_OUTPUT: + sr |= NVFX_FP_REG_SRC_HALF; + /* fall-through */ + case NVFXSR_TEMP: + sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); + sr |= (src.reg.index << NVFX_FP_REG_SRC_SHIFT); + break; + case NVFXSR_IMM: + if (!fpc->have_const) { + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + fpc->have_const = 1; + } + + memcpy(&fp->insn[fpc->inst_offset + 4], + (float*)fpc->imm_data.data + src.reg.index * 4, + sizeof(uint32_t) * 4); + + sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); + break; + case NVFXSR_CONST: + if (!fpc->have_const) { + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + fpc->have_const = 1; + } + + { + struct nv30_fragprog_data *fpd; + + fp->consts = realloc(fp->consts, ++fp->nr_consts * + sizeof(*fpd)); + fpd = &fp->consts[fp->nr_consts - 1]; + fpd->offset = fpc->inst_offset + 4; + fpd->index = src.reg.index; + memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); + } + + sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); + break; + case NVFXSR_NONE: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NVFX_FP_REG_NEGATE; + + if (src.abs) + hw[1] |= (1 << (29 + pos)); + + sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT)); + + hw[pos + 1] |= sr; +} + +static void +emit_dst(struct nvfx_fpc *fpc, struct nvfx_reg dst) +{ + struct nv30_fragprog *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + + switch (dst.type) { + case NVFXSR_OUTPUT: + if (dst.index == 1) + fp->fp_control |= 0x0000000e; + else { + hw[0] |= NVFX_FP_OP_OUT_REG_HALF; + dst.index <<= 1; + } + /* fall-through */ + case NVFXSR_TEMP: + if (fpc->num_regs < (dst.index + 1)) + fpc->num_regs = dst.index + 1; + break; + case NVFXSR_NONE: + hw[0] |= (1 << 30); + break; + default: + assert(0); + } + + hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT); +} + +static void +nvfx_fp_emit(struct nvfx_fpc *fpc, struct nvfx_insn insn) +{ + struct nv30_fragprog *fp = fpc->fp; + uint32_t *hw; + + fpc->inst_offset = fp->insn_len; + fpc->have_const = 0; + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + memset(hw, 0, sizeof(uint32_t) * 4); + + if (insn.op == NVFX_FP_OP_OPCODE_KIL) + fp->fp_control |= NV30_3D_FP_CONTROL_USES_KIL; + hw[0] |= (insn.op << NVFX_FP_OP_OPCODE_SHIFT); + hw[0] |= (insn.mask << NVFX_FP_OP_OUTMASK_SHIFT); + hw[2] |= (insn.scale << NVFX_FP_OP_DST_SCALE_SHIFT); + + if (insn.sat) + hw[0] |= NVFX_FP_OP_OUT_SAT; + + if (insn.cc_update) + hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (insn.cc_test << NVFX_FP_OP_COND_SHIFT); + hw[1] |= ((insn.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (insn.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | + (insn.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | + (insn.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); + + if(insn.unit >= 0) + { + hw[0] |= (insn.unit << NVFX_FP_OP_TEX_UNIT_SHIFT); + } + + emit_dst(fpc, insn.dst); + emit_src(fpc, 0, insn.src[0]); + emit_src(fpc, 1, insn.src[1]); + emit_src(fpc, 2, insn.src[2]); +} + +#define arith(s,o,d,m,s0,s1,s2) \ + nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \ + (d), (m), (s0), (s1), (s2)) + +#define tex(s,o,u,d,m,s0,s1,s2) \ + nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, (u), \ + (d), (m), (s0), none, none) + +/* IF src.x != 0, as TGSI specifies */ +static void +nv40_fp_if(struct nvfx_fpc *fpc, struct nvfx_src src) +{ + const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); + struct nvfx_insn insn = arith(0, MOV, none.reg, NVFX_FP_MASK_X, src, none, none); + uint32_t *hw; + insn.cc_update = 1; + nvfx_fp_emit(fpc, insn); + + fpc->inst_offset = fpc->fp->insn_len; + grow_insns(fpc, 4); + hw = &fpc->fp->insn[fpc->inst_offset]; + /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ + hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) | + NV40_FP_OP_OUT_NONE | + (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); + /* Use .xxxx swizzle so that we check only src[0].x*/ + hw[1] = (0 << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (0 << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | + (0 << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | + (0 << NVFX_FP_OP_COND_SWZ_W_SHIFT) | + (NVFX_FP_OP_COND_NE << NVFX_FP_OP_COND_SHIFT); + hw[2] = 0; /* | NV40_FP_OP_OPCODE_IS_BRANCH | else_offset */ + hw[3] = 0; /* | endif_offset */ + util_dynarray_append(&fpc->if_stack, unsigned, fpc->inst_offset); +} + +/* IF src.x != 0, as TGSI specifies */ +static void +nv40_fp_cal(struct nvfx_fpc *fpc, unsigned target) +{ + struct nvfx_relocation reloc; + uint32_t *hw; + fpc->inst_offset = fpc->fp->insn_len; + grow_insns(fpc, 4); + hw = &fpc->fp->insn[fpc->inst_offset]; + /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ + hw[0] = (NV40_FP_OP_BRA_OPCODE_CAL << NVFX_FP_OP_OPCODE_SHIFT); + /* Use .xxxx swizzle so that we check only src[0].x*/ + hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | + (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */ + hw[3] = 0; + reloc.target = target; + reloc.location = fpc->inst_offset + 2; + util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); +} + +static void +nv40_fp_ret(struct nvfx_fpc *fpc) +{ + uint32_t *hw; + fpc->inst_offset = fpc->fp->insn_len; + grow_insns(fpc, 4); + hw = &fpc->fp->insn[fpc->inst_offset]; + /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ + hw[0] = (NV40_FP_OP_BRA_OPCODE_RET << NVFX_FP_OP_OPCODE_SHIFT); + /* Use .xxxx swizzle so that we check only src[0].x*/ + hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | + (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */ + hw[3] = 0; +} + +static void +nv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target) +{ + struct nvfx_relocation reloc; + uint32_t *hw; + fpc->inst_offset = fpc->fp->insn_len; + grow_insns(fpc, 4); + hw = &fpc->fp->insn[fpc->inst_offset]; + /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ + hw[0] = (NV40_FP_OP_BRA_OPCODE_REP << NVFX_FP_OP_OPCODE_SHIFT) | + NV40_FP_OP_OUT_NONE | + (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); + /* Use .xxxx swizzle so that we check only src[0].x*/ + hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | + (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | + (count << NV40_FP_OP_REP_COUNT1_SHIFT) | + (count << NV40_FP_OP_REP_COUNT2_SHIFT) | + (count << NV40_FP_OP_REP_COUNT3_SHIFT); + hw[3] = 0; /* | end_offset */ + reloc.target = target; + reloc.location = fpc->inst_offset + 3; + util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); + //util_dynarray_append(&fpc->loop_stack, unsigned, target); +} + +/* warning: this only works forward, and probably only if not inside any IF */ +static void +nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target) +{ + struct nvfx_relocation reloc; + uint32_t *hw; + fpc->inst_offset = fpc->fp->insn_len; + grow_insns(fpc, 4); + hw = &fpc->fp->insn[fpc->inst_offset]; + /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ + hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) | + NV40_FP_OP_OUT_NONE | + (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); + /* Use .xxxx swizzle so that we check only src[0].x*/ + hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (NVFX_FP_OP_COND_FL << NVFX_FP_OP_COND_SHIFT); + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | else_offset */ + hw[3] = 0; /* | endif_offset */ + reloc.target = target; + reloc.location = fpc->inst_offset + 2; + util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); + reloc.target = target; + reloc.location = fpc->inst_offset + 3; + util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); +} + +static void +nv40_fp_brk(struct nvfx_fpc *fpc) +{ + uint32_t *hw; + fpc->inst_offset = fpc->fp->insn_len; + grow_insns(fpc, 4); + hw = &fpc->fp->insn[fpc->inst_offset]; + /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ + hw[0] = (NV40_FP_OP_BRA_OPCODE_BRK << NVFX_FP_OP_OPCODE_SHIFT) | + NV40_FP_OP_OUT_NONE; + /* Use .xxxx swizzle so that we check only src[0].x*/ + hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; + hw[3] = 0; +} + +static INLINE struct nvfx_src +tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc) +{ + struct nvfx_src src; + + switch (fsrc->Register.File) { + case TGSI_FILE_INPUT: + src.reg = fpc->r_input[fsrc->Register.Index]; + break; + case TGSI_FILE_CONSTANT: + src.reg = nvfx_reg(NVFXSR_CONST, fsrc->Register.Index); + break; + case TGSI_FILE_IMMEDIATE: + assert(fsrc->Register.Index < fpc->nr_imm); + src.reg = fpc->r_imm[fsrc->Register.Index]; + break; + case TGSI_FILE_TEMPORARY: + src.reg = fpc->r_temp[fsrc->Register.Index]; + break; + /* NV40 fragprog result regs are just temps, so this is simple */ + case TGSI_FILE_OUTPUT: + src.reg = fpc->r_result[fsrc->Register.Index]; + break; + default: + NOUVEAU_ERR("bad src file\n"); + src.reg.index = 0; + src.reg.type = 0; + break; + } + + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; + src.indirect = 0; + src.indirect_reg = 0; + src.indirect_swz = 0; + return src; +} + +static INLINE struct nvfx_reg +tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) { + switch (fdst->Register.File) { + case TGSI_FILE_OUTPUT: + return fpc->r_result[fdst->Register.Index]; + case TGSI_FILE_TEMPORARY: + return fpc->r_temp[fdst->Register.Index]; + case TGSI_FILE_NULL: + return nvfx_reg(NVFXSR_NONE, 0); + default: + NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); + return nvfx_reg(NVFXSR_NONE, 0); + } +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W; + return mask; +} + +static boolean +nvfx_fragprog_parse_instruction(struct nv30_context* nvfx, struct nvfx_fpc *fpc, + const struct tgsi_full_instruction *finst) +{ + const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); + struct nvfx_insn insn; + struct nvfx_src src[3], tmp; + struct nvfx_reg dst; + int mask, sat, unit = 0; + int ai = -1, ci = -1, ii = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(fpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->Src[i]; + + switch (fsrc->Register.File) { + case TGSI_FILE_INPUT: + if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG && (0 + || fsrc->Register.SwizzleX == PIPE_SWIZZLE_ALPHA + || fsrc->Register.SwizzleY == PIPE_SWIZZLE_ALPHA + || fsrc->Register.SwizzleZ == PIPE_SWIZZLE_ALPHA + || fsrc->Register.SwizzleW == PIPE_SWIZZLE_ALPHA + )) { + /* hardware puts 0 in fogcoord.w, but GL/Gallium want 1 there */ + struct nvfx_src addend = nvfx_src(nvfx_fp_imm(fpc, 0, 0, 0, 1)); + addend.swz[0] = fsrc->Register.SwizzleX; + addend.swz[1] = fsrc->Register.SwizzleY; + addend.swz[2] = fsrc->Register.SwizzleZ; + addend.swz[3] = fsrc->Register.SwizzleW; + src[i] = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, ADD, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), addend, none)); + } else if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); + } + break; + case TGSI_FILE_CONSTANT: + if ((ci == -1 && ii == -1) || + ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); + } + break; + case TGSI_FILE_IMMEDIATE: + if ((ci == -1 && ii == -1) || + ii == fsrc->Register.Index) { + ii = fsrc->Register.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + case TGSI_FILE_SAMPLER: + unit = fsrc->Register.Index; + break; + case TGSI_FILE_OUTPUT: + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(fpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); + sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, abs(src[0]), none, none)); + break; + case TGSI_OPCODE_ADD: + nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_CEIL: + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, neg(src[0]), none, none)); + nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, neg(tmp), none, none)); + break; + case TGSI_OPCODE_CMP: + insn = arith(0, MOV, none.reg, mask, src[0], none, none); + insn.cc_update = 1; + nvfx_fp_emit(fpc, insn); + + insn = arith(sat, MOV, dst, mask, src[2], none, none); + insn.cc_test = NVFX_COND_GE; + nvfx_fp_emit(fpc, insn); + + insn = arith(sat, MOV, dst, mask, src[1], none, none); + insn.cc_test = NVFX_COND_LT; + nvfx_fp_emit(fpc, insn); + break; + case TGSI_OPCODE_COS: + nvfx_fp_emit(fpc, arith(sat, COS, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_DDX: + if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none)); + nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none)); + nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none)); + nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none)); + } else { + nvfx_fp_emit(fpc, arith(sat, DDX, dst, mask, src[0], none, none)); + } + break; + case TGSI_OPCODE_DDY: + if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none)); + nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none)); + nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none)); + nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none)); + } else { + nvfx_fp_emit(fpc, arith(sat, DDY, dst, mask, src[0], none, none)); + } + break; + case TGSI_OPCODE_DP2: + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1], none)); + nvfx_fp_emit(fpc, arith(0, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none)); + break; + case TGSI_OPCODE_DP3: + nvfx_fp_emit(fpc, arith(sat, DP3, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_DP4: + nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_DPH: + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[1], none)); + nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none)); + break; + case TGSI_OPCODE_DST: + nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_EX2: + nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_FLR: + nvfx_fp_emit(fpc, arith(sat, FLR, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_FRC: + nvfx_fp_emit(fpc, arith(sat, FRC, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_KILL: + nvfx_fp_emit(fpc, arith(0, KIL, none.reg, 0, none, none, none)); + break; + case TGSI_OPCODE_KILL_IF: + insn = arith(0, MOV, none.reg, NVFX_FP_MASK_ALL, src[0], none, none); + insn.cc_update = 1; + nvfx_fp_emit(fpc, insn); + + insn = arith(0, KIL, none.reg, 0, none, none, none); + insn.cc_test = NVFX_COND_LT; + nvfx_fp_emit(fpc, insn); + break; + case TGSI_OPCODE_LG2: + nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_LIT: + if(!nvfx->is_nv4x) + nvfx_fp_emit(fpc, arith(sat, LIT_NV30, dst, mask, src[0], none, none)); + else { + /* we use FLT_MIN, so that log2 never gives -infinity, and thus multiplication by + * specular 0 always gives 0, so that ex2 gives 1, to satisfy the 0^0 = 1 requirement + * + * NOTE: if we start using half precision, we might need an fp16 FLT_MIN here instead + */ + struct nvfx_src maxs = nvfx_src(nvfx_fp_imm(fpc, 0, FLT_MIN, 0, 0)); + tmp = nvfx_src(temp(fpc)); + if (ci>= 0 || ii >= 0) { + nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, maxs, none, none)); + maxs = tmp; + } + nvfx_fp_emit(fpc, arith(0, MAX, tmp.reg, NVFX_FP_MASK_Y | NVFX_FP_MASK_W, swz(src[0], X, X, X, Y), swz(maxs, X, X, Y, Y), none)); + nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), none, none)); + nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), swz(src[0], W, W, W, W), none)); + nvfx_fp_emit(fpc, arith(sat, LITEX2_NV40, dst, mask, swz(tmp, Y, Y, W, W), none, none)); + } + break; + case TGSI_OPCODE_LRP: + if(!nvfx->is_nv4x) + nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2])); + else { + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2])); + nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], tmp)); + } + break; + case TGSI_OPCODE_MAD: + nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], src[2])); + break; + case TGSI_OPCODE_MAX: + nvfx_fp_emit(fpc, arith(sat, MAX, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_MIN: + nvfx_fp_emit(fpc, arith(sat, MIN, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_MOV: + nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_MUL: + nvfx_fp_emit(fpc, arith(sat, MUL, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_POW: + if(!nvfx->is_nv4x) + nvfx_fp_emit(fpc, arith(sat, POW_NV30, dst, mask, src[0], src[1], none)); + else { + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); + nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none)); + nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none)); + } + break; + case TGSI_OPCODE_RCP: + nvfx_fp_emit(fpc, arith(sat, RCP, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_RFL: + if(!nvfx->is_nv4x) + nvfx_fp_emit(fpc, arith(0, RFL_NV30, dst, mask, src[0], src[1], none)); + else { + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[0], none)); + nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_Y, src[0], src[1], none)); + insn = arith(0, DIV, tmp.reg, NVFX_FP_MASK_Z, swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); + insn.scale = NVFX_FP_OP_DST_SCALE_2X; + nvfx_fp_emit(fpc, insn); + nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]))); + } + break; + case TGSI_OPCODE_RSQ: + if(!nvfx->is_nv4x) + nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none)); + else { + tmp = nvfx_src(temp(fpc)); + insn = arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none); + insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X; + nvfx_fp_emit(fpc, insn); + nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none)); + } + break; + case TGSI_OPCODE_SCS: + /* avoid overwriting the source */ + if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X) + { + if (mask & NVFX_FP_MASK_X) + nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); + if (mask & NVFX_FP_MASK_Y) + nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none)); + } + else + { + if (mask & NVFX_FP_MASK_Y) + nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none)); + if (mask & NVFX_FP_MASK_X) + nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); + } + break; + case TGSI_OPCODE_SEQ: + nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SFL: + nvfx_fp_emit(fpc, arith(sat, SFL, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SGE: + nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SGT: + nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SIN: + nvfx_fp_emit(fpc, arith(sat, SIN, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_SLE: + nvfx_fp_emit(fpc, arith(sat, SLE, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SLT: + nvfx_fp_emit(fpc, arith(sat, SLT, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SNE: + nvfx_fp_emit(fpc, arith(sat, SNE, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SSG: + { + struct nvfx_src minones = swz(nvfx_src(nvfx_fp_imm(fpc, -1, -1, -1, -1)), X, X, X, X); + + insn = arith(sat, MOV, dst, mask, src[0], none, none); + insn.cc_update = 1; + nvfx_fp_emit(fpc, insn); + + insn = arith(0, STR, dst, mask, none, none, none); + insn.cc_test = NVFX_COND_GT; + nvfx_fp_emit(fpc, insn); + + if(!sat) { + insn = arith(0, MOV, dst, mask, minones, none, none); + insn.cc_test = NVFX_COND_LT; + nvfx_fp_emit(fpc, insn); + } + break; + } + case TGSI_OPCODE_STR: + nvfx_fp_emit(fpc, arith(sat, STR, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SUB: + nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], neg(src[1]), none)); + break; + case TGSI_OPCODE_TEX: + nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_TRUNC: + tmp = nvfx_src(temp(fpc)); + insn = arith(0, MOV, none.reg, mask, src[0], none, none); + insn.cc_update = 1; + nvfx_fp_emit(fpc, insn); + + nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, abs(src[0]), none, none)); + nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, tmp, none, none)); + + insn = arith(sat, MOV, dst, mask, neg(tmp), none, none); + insn.cc_test = NVFX_COND_LT; + nvfx_fp_emit(fpc, insn); + break; + case TGSI_OPCODE_TXB: + nvfx_fp_emit(fpc, tex(sat, TXB, unit, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_TXL: + if(nvfx->is_nv4x) + nvfx_fp_emit(fpc, tex(sat, TXL_NV40, unit, dst, mask, src[0], none, none)); + else /* unsupported on nv30, use TEX and hope they like it */ + nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_TXP: + nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_XPD: + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none)); + nvfx_fp_emit(fpc, arith(sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp))); + break; + + case TGSI_OPCODE_IF: + // MOVRC0 R31 (TR0.xyzw), R<src>: + // IF (NE.xxxx) ELSE <else> END <end> + if(!nvfx->use_nv4x) + goto nv3x_cflow; + nv40_fp_if(fpc, src[0]); + break; + + case TGSI_OPCODE_ELSE: + { + uint32_t *hw; + if(!nvfx->use_nv4x) + goto nv3x_cflow; + assert(util_dynarray_contains(&fpc->if_stack, unsigned)); + hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)]; + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len; + break; + } + + case TGSI_OPCODE_ENDIF: + { + uint32_t *hw; + if(!nvfx->use_nv4x) + goto nv3x_cflow; + assert(util_dynarray_contains(&fpc->if_stack, unsigned)); + hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)]; + if(!hw[2]) + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len; + hw[3] = fpc->fp->insn_len; + break; + } + + case TGSI_OPCODE_BRA: + /* This can in limited cases be implemented with an IF with the else and endif labels pointing to the target */ + /* no state tracker uses this, so don't implement this for now */ + assert(0); + nv40_fp_bra(fpc, finst->Label.Label); + break; + + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + /* nothing to do here */ + break; + + case TGSI_OPCODE_CAL: + if(!nvfx->use_nv4x) + goto nv3x_cflow; + nv40_fp_cal(fpc, finst->Label.Label); + break; + + case TGSI_OPCODE_RET: + if(!nvfx->use_nv4x) + goto nv3x_cflow; + nv40_fp_ret(fpc); + break; + + case TGSI_OPCODE_BGNLOOP: + if(!nvfx->use_nv4x) + goto nv3x_cflow; + /* TODO: we should support using two nested REPs to allow a > 255 iteration count */ + nv40_fp_rep(fpc, 255, finst->Label.Label); + break; + + case TGSI_OPCODE_ENDLOOP: + break; + + case TGSI_OPCODE_BRK: + if(!nvfx->use_nv4x) + goto nv3x_cflow; + nv40_fp_brk(fpc); + break; + + case TGSI_OPCODE_CONT: + { + static int warned = 0; + if(!warned) { + NOUVEAU_ERR("Sorry, the continue keyword is not implemented: ignoring it.\n"); + warned = 1; + } + break; + } + + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + +out: + release_temps(fpc); + return TRUE; +nv3x_cflow: + { + static int warned = 0; + if(!warned) { + NOUVEAU_ERR( + "Sorry, control flow instructions are not supported in hardware on nv3x: ignoring them\n" + "If rendering is incorrect, try to disable GLSL support in the application.\n"); + warned = 1; + } + } + goto out; +} + +static boolean +nvfx_fragprog_parse_decl_input(struct nv30_context *nvfx, struct nvfx_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + unsigned idx = fdec->Range.First; + unsigned hw; + + switch (fdec->Semantic.Name) { + case TGSI_SEMANTIC_POSITION: + hw = NVFX_FP_OP_INPUT_SRC_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + hw = NVFX_FP_OP_INPUT_SRC_COL0 + fdec->Semantic.Index; + break; + case TGSI_SEMANTIC_FOG: + hw = NVFX_FP_OP_INPUT_SRC_FOGC; + break; + case TGSI_SEMANTIC_FACE: + hw = NV40_FP_OP_INPUT_SRC_FACING; + break; + case TGSI_SEMANTIC_TEXCOORD: + assert(fdec->Semantic.Index < 8); + fpc->fp->texcoord[fdec->Semantic.Index] = fdec->Semantic.Index; + fpc->fp->texcoords |= (1 << fdec->Semantic.Index); + fpc->fp->vp_or |= (0x00004000 << fdec->Semantic.Index); + hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.Index); + break; + case TGSI_SEMANTIC_GENERIC: + case TGSI_SEMANTIC_PCOORD: + /* will be assigned to remaining TC slots later */ + return TRUE; + default: + assert(0); + return FALSE; + } + + fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw); + return TRUE; +} + +static boolean +nvfx_fragprog_assign_generic(struct nv30_context *nvfx, struct nvfx_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + unsigned num_texcoords = nvfx->use_nv4x ? 10 : 8; + unsigned idx = fdec->Range.First; + unsigned hw; + + switch (fdec->Semantic.Name) { + case TGSI_SEMANTIC_GENERIC: + case TGSI_SEMANTIC_PCOORD: + for (hw = 0; hw < num_texcoords; hw++) { + if (fpc->fp->texcoord[hw] == 0xffff) { + if (hw <= 7) { + fpc->fp->texcoords |= (0x1 << hw); + fpc->fp->vp_or |= (0x00004000 << hw); + } else { + fpc->fp->vp_or |= (0x00001000 << (hw - 8)); + } + if (fdec->Semantic.Name == TGSI_SEMANTIC_PCOORD) { + fpc->fp->texcoord[hw] = 0xfffe; + fpc->fp->point_sprite_control |= (0x00000100 << hw); + } else { + fpc->fp->texcoord[hw] = fdec->Semantic.Index + 8; + } + hw = NVFX_FP_OP_INPUT_SRC_TC(hw); + fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw); + return TRUE; + } + } + return FALSE; + default: + return TRUE; + } +} + +static boolean +nvfx_fragprog_parse_decl_output(struct nv30_context* nvfx, struct nvfx_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + unsigned idx = fdec->Range.First; + unsigned hw; + + switch (fdec->Semantic.Name) { + case TGSI_SEMANTIC_POSITION: + hw = 1; + break; + case TGSI_SEMANTIC_COLOR: + hw = ~0; + switch (fdec->Semantic.Index) { + case 0: hw = 0; break; + case 1: hw = 2; break; + case 2: hw = 3; break; + case 3: hw = 4; break; + } + if(hw > ((nvfx->use_nv4x) ? 4 : 2)) { + NOUVEAU_ERR("bad rcol index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw); + fpc->r_temps |= (1ULL << hw); + return TRUE; +} + +static boolean +nvfx_fragprog_prepare(struct nv30_context* nvfx, struct nvfx_fpc *fpc) +{ + struct tgsi_parse_context p; + int high_temp = -1, i; + + fpc->r_imm = CALLOC(fpc->fp->info.immediate_count, sizeof(struct nvfx_reg)); + + tgsi_parse_init(&p, fpc->fp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_INPUT: + if (!nvfx_fragprog_parse_decl_input(nvfx, fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_OUTPUT: + if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_TEMPORARY: + if (fdec->Range.Last > high_temp) { + high_temp = + fdec->Range.Last; + } + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *imm; + + imm = &p.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(fpc->nr_imm < fpc->fp->info.immediate_count); + + fpc->r_imm[fpc->nr_imm++] = nvfx_fp_imm(fpc, imm->u[0].Float, imm->u[1].Float, imm->u[2].Float, imm->u[3].Float); + break; + } + default: + break; + } + } + tgsi_parse_free(&p); + + tgsi_parse_init(&p, fpc->fp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const struct tgsi_full_declaration *fdec; + tgsi_parse_token(&p); + switch(p.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_INPUT: + if (!nvfx_fragprog_assign_generic(nvfx, fpc, fdec)) + goto out_err; + break; + default: + break; + } + break; + default: + break; + } + } + tgsi_parse_free(&p); + + if (++high_temp) { + fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg)); + for (i = 0; i < high_temp; i++) + fpc->r_temp[i] = temp(fpc); + fpc->r_temps_discard = 0ULL; + } + + return TRUE; + +out_err: + FREE(fpc->r_temp); + fpc->r_temp = NULL; + + tgsi_parse_free(&p); + return FALSE; +} + +DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", FALSE) + +void +_nvfx_fragprog_translate(struct nv30_context *nvfx, struct nv30_fragprog *fp, + boolean emulate_sprite_flipping) +{ + struct tgsi_parse_context parse; + struct nvfx_fpc *fpc = NULL; + struct util_dynarray insns; + + fp->translated = FALSE; + fp->point_sprite_control = 0; + fp->vp_or = 0; + + fpc = CALLOC_STRUCT(nvfx_fpc); + if (!fpc) + goto out_err; + + fpc->max_temps = nvfx->use_nv4x ? 48 : 32; + fpc->fp = fp; + fpc->num_regs = 2; + memset(fp->texcoord, 0xff, sizeof(fp->texcoord)); + + for (unsigned i = 0; i < fp->info.num_properties; ++i) { + switch (fp->info.properties[i].name) { + case TGSI_PROPERTY_FS_COORD_ORIGIN: + if (fp->info.properties[i].data[0]) + fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED; + break; + case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: + if (fp->info.properties[i].data[0]) + fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_CENTER_INTEGER; + break; + case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: + if (fp->info.properties[i].data[0]) + fp->rt_enable |= NV30_3D_RT_ENABLE_MRT; + break; + default: + break; + } + } + + if (!nvfx_fragprog_prepare(nvfx, fpc)) + goto out_err; + + tgsi_parse_init(&parse, fp->pipe.tokens); + util_dynarray_init(&insns); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + + util_dynarray_append(&insns, unsigned, fp->insn_len); + finst = &parse.FullToken.FullInstruction; + if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + util_dynarray_append(&insns, unsigned, fp->insn_len); + + for(unsigned i = 0; i < fpc->label_relocs.size; i += sizeof(struct nvfx_relocation)) + { + struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)fpc->label_relocs.data + i); + fp->insn[label_reloc->location] |= ((unsigned*)insns.data)[label_reloc->target]; + } + util_dynarray_fini(&insns); + + if(!nvfx->is_nv4x) + fp->fp_control |= (fpc->num_regs-1)/2; + else + fp->fp_control |= fpc->num_regs << NV40_3D_FP_CONTROL_TEMP_COUNT__SHIFT; + + /* Terminate final instruction */ + if(fp->insn) + fp->insn[fpc->inst_offset] |= 0x00000001; + + /* Append NOP + END instruction for branches to the end of the program */ + fpc->inst_offset = fp->insn_len; + grow_insns(fpc, 4); + fp->insn[fpc->inst_offset + 0] = 0x00000001; + fp->insn[fpc->inst_offset + 1] = 0x00000000; + fp->insn[fpc->inst_offset + 2] = 0x00000000; + fp->insn[fpc->inst_offset + 3] = 0x00000000; + + if(debug_get_option_nvfx_dump_fp()) + { + debug_printf("\n"); + tgsi_dump(fp->pipe.tokens, 0); + + debug_printf("\n%s fragment program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x"); + for (unsigned i = 0; i < fp->insn_len; i += 4) + debug_printf("%3u: %08x %08x %08x %08x\n", i >> 2, fp->insn[i], fp->insn[i + 1], fp->insn[i + 2], fp->insn[i + 3]); + debug_printf("\n"); + } + + fp->translated = TRUE; + +out: + tgsi_parse_free(&parse); + if(fpc) + { + FREE(fpc->r_temp); + util_dynarray_fini(&fpc->if_stack); + util_dynarray_fini(&fpc->label_relocs); + util_dynarray_fini(&fpc->imm_data); + //util_dynarray_fini(&fpc->loop_stack); + FREE(fpc); + } + + return; + +out_err: + _debug_printf("Error: failed to compile this fragment program:\n"); + tgsi_dump(fp->pipe.tokens, 0); + goto out; +} + +static inline void +nvfx_fp_memcpy(void* dst, const void* src, size_t len) +{ +#ifndef PIPE_ARCH_BIG_ENDIAN + memcpy(dst, src, len); +#else + size_t i; + for(i = 0; i < len; i += 4) { + uint32_t v = *(uint32_t*)((char*)src + i); + *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16); + } +#endif +} diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_shader.h b/src/gallium/drivers/nouveau/nv30/nvfx_shader.h new file mode 100644 index 00000000000..987e1b043dd --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nvfx_shader.h @@ -0,0 +1,525 @@ +#ifndef __NVFX_SHADER_H__ +#define __NVFX_SHADER_H__ + +#include <stdint.h> + +#include "pipe/p_compiler.h" + +#define NVFX_SWZ_IDENTITY ((3 << 6) | (2 << 4) | (1 << 2) | (0 << 0)) + +/* this will resolve to either the NV30 or the NV40 version + * depending on the current hardware */ +/* unusual, but very fast and compact method */ +#define NVFX_VP(c) ((NV30_VP_##c) + (nv30->is_nv4x & ((NV40_VP_##c) - (NV30_VP_##c)))) + +#define NVFX_VP_INST_SLOT_VEC 0 +#define NVFX_VP_INST_SLOT_SCA 1 + +#define NVFX_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */ +#define NVFX_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */ +#define NVFX_VP_INST_IN_NORMAL 2 +#define NVFX_VP_INST_IN_COL0 3 /* Should probably confirm them all though */ +#define NVFX_VP_INST_IN_COL1 4 +#define NVFX_VP_INST_IN_FOGC 5 +#define NVFX_VP_INST_IN_TC0 8 +#define NVFX_VP_INST_IN_TC(n) (8+n) + +#define NVFX_VP_INST_SCA_OP_NOP 0x00 +#define NVFX_VP_INST_SCA_OP_MOV 0x01 +#define NVFX_VP_INST_SCA_OP_RCP 0x02 +#define NVFX_VP_INST_SCA_OP_RCC 0x03 +#define NVFX_VP_INST_SCA_OP_RSQ 0x04 +#define NVFX_VP_INST_SCA_OP_EXP 0x05 +#define NVFX_VP_INST_SCA_OP_LOG 0x06 +#define NVFX_VP_INST_SCA_OP_LIT 0x07 +#define NVFX_VP_INST_SCA_OP_BRA 0x09 +#define NVFX_VP_INST_SCA_OP_CAL 0x0B +#define NVFX_VP_INST_SCA_OP_RET 0x0C +#define NVFX_VP_INST_SCA_OP_LG2 0x0D +#define NVFX_VP_INST_SCA_OP_EX2 0x0E +#define NVFX_VP_INST_SCA_OP_SIN 0x0F +#define NVFX_VP_INST_SCA_OP_COS 0x10 + +#define NV40_VP_INST_SCA_OP_PUSHA 0x13 +#define NV40_VP_INST_SCA_OP_POPA 0x14 + +#define NVFX_VP_INST_VEC_OP_NOP 0x00 +#define NVFX_VP_INST_VEC_OP_MOV 0x01 +#define NVFX_VP_INST_VEC_OP_MUL 0x02 +#define NVFX_VP_INST_VEC_OP_ADD 0x03 +#define NVFX_VP_INST_VEC_OP_MAD 0x04 +#define NVFX_VP_INST_VEC_OP_DP3 0x05 +#define NVFX_VP_INST_VEC_OP_DPH 0x06 +#define NVFX_VP_INST_VEC_OP_DP4 0x07 +#define NVFX_VP_INST_VEC_OP_DST 0x08 +#define NVFX_VP_INST_VEC_OP_MIN 0x09 +#define NVFX_VP_INST_VEC_OP_MAX 0x0A +#define NVFX_VP_INST_VEC_OP_SLT 0x0B +#define NVFX_VP_INST_VEC_OP_SGE 0x0C +#define NVFX_VP_INST_VEC_OP_ARL 0x0D +#define NVFX_VP_INST_VEC_OP_FRC 0x0E +#define NVFX_VP_INST_VEC_OP_FLR 0x0F +#define NVFX_VP_INST_VEC_OP_SEQ 0x10 +#define NVFX_VP_INST_VEC_OP_SFL 0x11 +#define NVFX_VP_INST_VEC_OP_SGT 0x12 +#define NVFX_VP_INST_VEC_OP_SLE 0x13 +#define NVFX_VP_INST_VEC_OP_SNE 0x14 +#define NVFX_VP_INST_VEC_OP_STR 0x15 +#define NVFX_VP_INST_VEC_OP_SSG 0x16 +#define NVFX_VP_INST_VEC_OP_ARR 0x17 +#define NVFX_VP_INST_VEC_OP_ARA 0x18 + +#define NV40_VP_INST_VEC_OP_TXL 0x19 + +/* DWORD 3 */ +#define NVFX_VP_INST_LAST (1 << 0) + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0: OPDEST + * 0: program end + * 1-6: destination register + * 7: destination register is fp16?? (use for outputs) + * 8: set condition code + * 9: writemask x + * 10: writemask y + * 11: writemask z + * 12: writemask w + * 13-16: source attribute register number (e.g. COL0) + * 17-20: texture unit number + * 21: expand value on texture operation (x -> 2x - 1) + * 22-23: precision 0 = fp32, 1 = fp16, 2 = s1.10 fixed, 3 = s0.8 fixed (nv40-only)) + * 24-29: opcode + * 30: no destination + * 31: saturate + * 1 - SRC0 + * 0-17: see common source fields + * 18: execute if condition code less + * 19: execute if condition code equal + * 20: execute if condition code greater + * 21-22: condition code swizzle x source component + * 23-24: condition code swizzle y source component + * 25-26: condition code swizzle z source component + * 27-28: condition code swizzle w source component + * 29: source 0 absolute + * 30: always 0 in renouveau tests + * 31: always 0 in renouveau tests + * 2 - SRC1 + * 0-17: see common source fields + * 18: source 1 absolute + * 19-20: input precision 0 = fp32, 1 = fp16, 2 = s1.10 fixed, 3 = ??? + * 21-27: always 0 in renouveau tests + * 28-30: scale (0 = 1x, 1 = 2x, 2 = 4x, 3 = 8x, 4 = ???, 5, = 1/2, 6 = 1/4, 7 = 1/8) + * 31: opcode is branch + * 3 - SRC2 + * 0-17: see common source fields + * 18: source 2 absolute + * 19-29: address register displacement + * 30: use index register + * 31: disable perspective-correct interpolation? + * +* Common fields of 0, 1, 2 - SRC + * 0-1: source register type (0 = temp, 1 = input, 2 = immediate, 3 = ???) + * 2-7: source temp register index + * 8: source register is fp16?? + * 9-10: source swizzle x source component + * 11-12: source swizzle y source component + * 13-14: source swizzle z source component + * 15-16: source swizzle w source component + * 17: negate + + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 + * otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO + * is implemented simply by not writing to the relevant components of the destination. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + * + * NV40 Looping + * Loops appear to be fairly expensive on NV40 at least, the proprietary + * driver goes to a lot of effort to avoid using the native looping + * instructions. If the total number of *executed* instructions between + * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. + * The maximum loop count is 255. + * + */ + +//== Opcode / Destination selection == +#define NVFX_FP_OP_PROGRAM_END (1 << 0) +#define NVFX_FP_OP_OUT_REG_SHIFT 1 +#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */ +#define NV40_FP_OP_OUT_REG_MASK (63 << 1) +/* Needs to be set when writing outputs to get expected result.. */ +#define NVFX_FP_OP_OUT_REG_HALF (1 << 7) +#define NVFX_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NVFX_FP_OP_OUTMASK_SHIFT 9 +#define NVFX_FP_OP_OUTMASK_MASK (0xF << 9) +# define NVFX_FP_OP_OUT_X (1<<9) +# define NVFX_FP_OP_OUT_Y (1<<10) +# define NVFX_FP_OP_OUT_Z (1<<11) +# define NVFX_FP_OP_OUT_W (1<<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NVFX_FP_OP_INPUT_SRC_SHIFT 13 +#define NVFX_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NVFX_FP_OP_INPUT_SRC_POSITION 0x0 +# define NVFX_FP_OP_INPUT_SRC_COL0 0x1 +# define NVFX_FP_OP_INPUT_SRC_COL1 0x2 +# define NVFX_FP_OP_INPUT_SRC_FOGC 0x3 +# define NVFX_FP_OP_INPUT_SRC_TC0 0x4 +# define NVFX_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +# define NV40_FP_OP_INPUT_SRC_FACING 0xE +#define NVFX_FP_OP_TEX_UNIT_SHIFT 17 +#define NVFX_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */ +#define NVFX_FP_OP_PRECISION_SHIFT 22 +#define NVFX_FP_OP_PRECISION_MASK (3 << 22) +# define NVFX_FP_PRECISION_FP32 0 +# define NVFX_FP_PRECISION_FP16 1 +# define NVFX_FP_PRECISION_FX12 2 +#define NVFX_FP_OP_OPCODE_SHIFT 24 +#define NVFX_FP_OP_OPCODE_MASK (0x3F << 24) +/* NV30/NV40 fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_NOP 0x00 +#define NVFX_FP_OP_OPCODE_MOV 0x01 +#define NVFX_FP_OP_OPCODE_MUL 0x02 +#define NVFX_FP_OP_OPCODE_ADD 0x03 +#define NVFX_FP_OP_OPCODE_MAD 0x04 +#define NVFX_FP_OP_OPCODE_DP3 0x05 +#define NVFX_FP_OP_OPCODE_DP4 0x06 +#define NVFX_FP_OP_OPCODE_DST 0x07 +#define NVFX_FP_OP_OPCODE_MIN 0x08 +#define NVFX_FP_OP_OPCODE_MAX 0x09 +#define NVFX_FP_OP_OPCODE_SLT 0x0A +#define NVFX_FP_OP_OPCODE_SGE 0x0B +#define NVFX_FP_OP_OPCODE_SLE 0x0C +#define NVFX_FP_OP_OPCODE_SGT 0x0D +#define NVFX_FP_OP_OPCODE_SNE 0x0E +#define NVFX_FP_OP_OPCODE_SEQ 0x0F +#define NVFX_FP_OP_OPCODE_FRC 0x10 +#define NVFX_FP_OP_OPCODE_FLR 0x11 +#define NVFX_FP_OP_OPCODE_KIL 0x12 +#define NVFX_FP_OP_OPCODE_PK4B 0x13 +#define NVFX_FP_OP_OPCODE_UP4B 0x14 +#define NVFX_FP_OP_OPCODE_DDX 0x15 /* can only write XY */ +#define NVFX_FP_OP_OPCODE_DDY 0x16 /* can only write XY */ +#define NVFX_FP_OP_OPCODE_TEX 0x17 +#define NVFX_FP_OP_OPCODE_TXP 0x18 +#define NVFX_FP_OP_OPCODE_TXD 0x19 +#define NVFX_FP_OP_OPCODE_RCP 0x1A +#define NVFX_FP_OP_OPCODE_EX2 0x1C +#define NVFX_FP_OP_OPCODE_LG2 0x1D +#define NVFX_FP_OP_OPCODE_STR 0x20 +#define NVFX_FP_OP_OPCODE_SFL 0x21 +#define NVFX_FP_OP_OPCODE_COS 0x22 +#define NVFX_FP_OP_OPCODE_SIN 0x23 +#define NVFX_FP_OP_OPCODE_PK2H 0x24 +#define NVFX_FP_OP_OPCODE_UP2H 0x25 +#define NVFX_FP_OP_OPCODE_PK4UB 0x27 +#define NVFX_FP_OP_OPCODE_UP4UB 0x28 +#define NVFX_FP_OP_OPCODE_PK2US 0x29 +#define NVFX_FP_OP_OPCODE_UP2US 0x2A +#define NVFX_FP_OP_OPCODE_DP2A 0x2E +#define NVFX_FP_OP_OPCODE_TXB 0x31 +#define NVFX_FP_OP_OPCODE_DIV 0x3A + +/* NV30 only fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_RSQ_NV30 0x1B +#define NVFX_FP_OP_OPCODE_LIT_NV30 0x1E +#define NVFX_FP_OP_OPCODE_LRP_NV30 0x1F +#define NVFX_FP_OP_OPCODE_POW_NV30 0x26 +#define NVFX_FP_OP_OPCODE_RFL_NV30 0x36 + +/* NV40 only fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_TXL_NV40 0x2F +#define NVFX_FP_OP_OPCODE_LITEX2_NV40 0x3C + +/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ +#define NV40_FP_OP_BRA_OPCODE_BRK 0x0 +#define NV40_FP_OP_BRA_OPCODE_CAL 0x1 +#define NV40_FP_OP_BRA_OPCODE_IF 0x2 +#define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 +#define NV40_FP_OP_BRA_OPCODE_REP 0x4 +#define NV40_FP_OP_BRA_OPCODE_RET 0x5 + +#define NV40_FP_OP_OUT_NONE (1 << 30) +#define NVFX_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NVFX_FP_OP_SRC0_ABS (1 << 29) +#define NVFX_FP_OP_COND_SWZ_W_SHIFT 27 +#define NVFX_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NVFX_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NVFX_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NVFX_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NVFX_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NVFX_FP_OP_COND_SWZ_X_SHIFT 21 +#define NVFX_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NVFX_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NVFX_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NVFX_FP_OP_COND_SHIFT 18 +#define NVFX_FP_OP_COND_MASK (0x07 << 18) +# define NVFX_FP_OP_COND_FL 0 +# define NVFX_FP_OP_COND_LT 1 +# define NVFX_FP_OP_COND_EQ 2 +# define NVFX_FP_OP_COND_LE 3 +# define NVFX_FP_OP_COND_GT 4 +# define NVFX_FP_OP_COND_NE 5 +# define NVFX_FP_OP_COND_GE 6 +# define NVFX_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) +#define NVFX_FP_OP_DST_SCALE_SHIFT 28 +#define NVFX_FP_OP_DST_SCALE_MASK (3 << 28) +#define NVFX_FP_OP_DST_SCALE_1X 0 +#define NVFX_FP_OP_DST_SCALE_2X 1 +#define NVFX_FP_OP_DST_SCALE_4X 2 +#define NVFX_FP_OP_DST_SCALE_8X 3 +#define NVFX_FP_OP_DST_SCALE_INV_2X 5 +#define NVFX_FP_OP_DST_SCALE_INV_4X 6 +#define NVFX_FP_OP_DST_SCALE_INV_8X 7 +#define NVFX_FP_OP_SRC1_ABS (1 << 18) + +/* SRC1 LOOP */ +#define NV40_FP_OP_LOOP_INCR_SHIFT 19 +#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) +#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 +#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) +#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 +#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) + +/* SRC1 IF: absolute offset in dwords */ +#define NV40_FP_OP_ELSE_OFFSET_SHIFT 0 +#define NV40_FP_OP_ELSE_OFFSET_MASK (0x7FFFFFFF << 0) + +/* SRC1 CAL */ +#define NV40_FP_OP_SUB_OFFSET_SHIFT 0 +#define NV40_FP_OP_SUB_OFFSET_MASK (0x7FFFFFFF << 0) + +/* SRC1 REP + * I have no idea why there are 3 count values here.. but they + * have always been filled with the same value in my tests so + * far.. + */ +#define NV40_FP_OP_REP_COUNT1_SHIFT 2 +#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) +#define NV40_FP_OP_REP_COUNT2_SHIFT 10 +#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) +#define NV40_FP_OP_REP_COUNT3_SHIFT 19 +#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) + +/* SRC2 REP/IF: absolute offset in dwords */ +#define NV40_FP_OP_END_OFFSET_SHIFT 0 +#define NV40_FP_OP_END_OFFSET_MASK (0x7FFFFFFF << 0) + +/* high order bits of SRC2 */ +#define NVFX_FP_OP_INDEX_INPUT (1 << 30) +#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 +#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) + +//== Register selection == +#define NVFX_FP_REG_TYPE_SHIFT 0 +#define NVFX_FP_REG_TYPE_MASK (3 << 0) +# define NVFX_FP_REG_TYPE_TEMP 0 +# define NVFX_FP_REG_TYPE_INPUT 1 +# define NVFX_FP_REG_TYPE_CONST 2 +#define NVFX_FP_REG_SRC_SHIFT 2 +#define NV30_FP_REG_SRC_MASK (31 << 2) +#define NV40_FP_REG_SRC_MASK (63 << 2) +#define NVFX_FP_REG_SRC_HALF (1 << 8) +#define NVFX_FP_REG_SWZ_ALL_SHIFT 9 +#define NVFX_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NVFX_FP_REG_SWZ_X_SHIFT 9 +#define NVFX_FP_REG_SWZ_X_MASK (3 << 9) +#define NVFX_FP_REG_SWZ_Y_SHIFT 11 +#define NVFX_FP_REG_SWZ_Y_MASK (3 << 11) +#define NVFX_FP_REG_SWZ_Z_SHIFT 13 +#define NVFX_FP_REG_SWZ_Z_MASK (3 << 13) +#define NVFX_FP_REG_SWZ_W_SHIFT 15 +#define NVFX_FP_REG_SWZ_W_MASK (3 << 15) +# define NVFX_FP_SWIZZLE_X 0 +# define NVFX_FP_SWIZZLE_Y 1 +# define NVFX_FP_SWIZZLE_Z 2 +# define NVFX_FP_SWIZZLE_W 3 +#define NVFX_FP_REG_NEGATE (1 << 17) + +#define NVFXSR_NONE 0 +#define NVFXSR_OUTPUT 1 +#define NVFXSR_INPUT 2 +#define NVFXSR_TEMP 3 +#define NVFXSR_CONST 5 +#define NVFXSR_IMM 6 + +#define NVFX_COND_FL 0 +#define NVFX_COND_LT 1 +#define NVFX_COND_EQ 2 +#define NVFX_COND_LE 3 +#define NVFX_COND_GT 4 +#define NVFX_COND_NE 5 +#define NVFX_COND_GE 6 +#define NVFX_COND_TR 7 + +/* Yes, this are ordered differently... */ + +#define NVFX_VP_MASK_X 8 +#define NVFX_VP_MASK_Y 4 +#define NVFX_VP_MASK_Z 2 +#define NVFX_VP_MASK_W 1 +#define NVFX_VP_MASK_ALL 0xf + +#define NVFX_FP_MASK_X 1 +#define NVFX_FP_MASK_Y 2 +#define NVFX_FP_MASK_Z 4 +#define NVFX_FP_MASK_W 8 +#define NVFX_FP_MASK_ALL 0xf + +#define NVFX_SWZ_X 0 +#define NVFX_SWZ_Y 1 +#define NVFX_SWZ_Z 2 +#define NVFX_SWZ_W 3 + +#define swz(s,x,y,z,w) nvfx_src_swz((s), NVFX_SWZ_##x, NVFX_SWZ_##y, NVFX_SWZ_##z, NVFX_SWZ_##w) +#define neg(s) nvfx_src_neg((s)) +#define abs(s) nvfx_src_abs((s)) + +struct nvfx_reg { + int8_t type; + int32_t index; +}; + +struct nvfx_src { + struct nvfx_reg reg; + + uint8_t indirect : 1; + uint8_t indirect_reg : 1; + uint8_t indirect_swz : 2; + uint8_t negate : 1; + uint8_t abs : 1; + uint8_t swz[4]; +}; + +struct nvfx_insn +{ + uint8_t op; + char scale; + int8_t unit; + uint8_t mask; + uint8_t cc_swz[4]; + + uint8_t sat : 1; + uint8_t cc_update : 1; + uint8_t cc_update_reg : 1; + uint8_t cc_test : 3; + uint8_t cc_test_reg : 1; + + struct nvfx_reg dst; + struct nvfx_src src[3]; +}; + +static INLINE struct nvfx_insn +nvfx_insn(boolean sat, unsigned op, int unit, struct nvfx_reg dst, unsigned mask, struct nvfx_src s0, struct nvfx_src s1, struct nvfx_src s2) +{ + struct nvfx_insn insn = { + .op = op, + .scale = 0, + .unit = unit, + .sat = sat, + .mask = mask, + .cc_update = 0, + .cc_update_reg = 0, + .cc_test = NVFX_COND_TR, + .cc_test_reg = 0, + .cc_swz = { 0, 1, 2, 3 }, + .dst = dst, + .src = {s0, s1, s2} + }; + return insn; +} + +static INLINE struct nvfx_reg +nvfx_reg(int type, int index) +{ + struct nvfx_reg temp = { + .type = type, + .index = index, + }; + return temp; +} + +static INLINE struct nvfx_src +nvfx_src(struct nvfx_reg reg) +{ + struct nvfx_src temp = { + .reg = reg, + .abs = 0, + .negate = 0, + .swz = { 0, 1, 2, 3 }, + .indirect = 0, + }; + return temp; +} + +static INLINE struct nvfx_src +nvfx_src_swz(struct nvfx_src src, int x, int y, int z, int w) +{ + struct nvfx_src dst = src; + + dst.swz[NVFX_SWZ_X] = src.swz[x]; + dst.swz[NVFX_SWZ_Y] = src.swz[y]; + dst.swz[NVFX_SWZ_Z] = src.swz[z]; + dst.swz[NVFX_SWZ_W] = src.swz[w]; + return dst; +} + +static INLINE struct nvfx_src +nvfx_src_neg(struct nvfx_src src) +{ + src.negate = !src.negate; + return src; +} + +static INLINE struct nvfx_src +nvfx_src_abs(struct nvfx_src src) +{ + src.abs = 1; + return src; +} + +struct nvfx_relocation { + unsigned location; + unsigned target; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c new file mode 100644 index 00000000000..3ae51ef9e82 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c @@ -0,0 +1,1133 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_linkage.h" +#include "util/u_debug.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_ureg.h" + +#include "draw/draw_context.h" + +#include "nv30/nv30-40_3d.xml.h" +#include "nv30/nv30_context.h" +#include "nv30/nv30_resource.h" + +/* TODO (at least...): + * 1. Indexed consts + ARL + * 3. NV_vp11, NV_vp2, NV_vp3 features + * - extra arith opcodes + * - branching + * - texture sampling + * - indexed attribs + * - indexed results + * 4. bugs + */ + +#include "nv30/nv30_vertprog.h" +#include "nv30/nv40_vertprog.h" + +struct nvfx_loop_entry { + unsigned brk_target; + unsigned cont_target; +}; + +struct nvfx_vpc { + struct nv30_context* nv30; + struct pipe_shader_state pipe; + struct nv30_vertprog *vp; + struct tgsi_shader_info* info; + + struct nv30_vertprog_exec *vpi; + + unsigned r_temps; + unsigned r_temps_discard; + struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nvfx_reg *r_address; + struct nvfx_reg *r_temp; + struct nvfx_reg *r_const; + struct nvfx_reg r_0_1; + + struct nvfx_reg *imm; + unsigned nr_imm; + + int hpos_idx; + int cvtx_idx; + + struct util_dynarray label_relocs; + struct util_dynarray loop_stack; +}; + +static struct nvfx_reg +temp(struct nvfx_vpc *vpc) +{ + int idx = ffs(~vpc->r_temps) - 1; + + if (idx < 0) { + NOUVEAU_ERR("out of temps!!\n"); + assert(0); + return nvfx_reg(NVFXSR_TEMP, 0); + } + + vpc->r_temps |= (1 << idx); + vpc->r_temps_discard |= (1 << idx); + return nvfx_reg(NVFXSR_TEMP, idx); +} + +static inline void +release_temps(struct nvfx_vpc *vpc) +{ + vpc->r_temps &= ~vpc->r_temps_discard; + vpc->r_temps_discard = 0; +} + +static struct nvfx_reg +constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w) +{ + struct nv30_vertprog *vp = vpc->vp; + struct nv30_vertprog_data *vpd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < vp->nr_consts; idx++) { + if (vp->consts[idx].index == pipe) + return nvfx_reg(NVFXSR_CONST, idx); + } + } + + idx = vp->nr_consts++; + vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); + vpd = &vp->consts[idx]; + + vpd->index = pipe; + vpd->value[0] = x; + vpd->value[1] = y; + vpd->value[2] = z; + vpd->value[3] = w; + return nvfx_reg(NVFXSR_CONST, idx); +} + +#define arith(s,t,o,d,m,s0,s1,s2) \ + nvfx_insn((s), (NVFX_VP_INST_SLOT_##t << 7) | NVFX_VP_INST_##t##_OP_##o, -1, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv30_context *nv30, struct nvfx_vpc *vpc, uint32_t *hw, + int pos, struct nvfx_src src) +{ + struct nv30_vertprog *vp = vpc->vp; + uint32_t sr = 0; + struct nvfx_relocation reloc; + + switch (src.reg.type) { + case NVFXSR_TEMP: + sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT)); + sr |= (src.reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT)); + break; + case NVFXSR_INPUT: + sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); + vp->ir |= (1 << src.reg.index); + hw[1] |= (src.reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT)); + break; + case NVFXSR_CONST: + sr |= (NVFX_VP(SRC_REG_TYPE_CONST) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); + if (src.reg.index < 256 && src.reg.index >= -256) { + reloc.location = vp->nr_insns - 1; + reloc.target = src.reg.index; + util_dynarray_append(&vp->const_relocs, struct nvfx_relocation, reloc); + } else { + hw[1] |= (src.reg.index << NVFX_VP(INST_CONST_SRC_SHIFT)) & + NVFX_VP(INST_CONST_SRC_MASK); + } + break; + case NVFXSR_NONE: + sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NVFX_VP(SRC_NEGATE); + + if (src.abs) + hw[0] |= (1 << (21 + pos)); + + sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) | + (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) | + (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) | + (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT))); + + if(src.indirect) { + if(src.reg.type == NVFXSR_CONST) + hw[3] |= NVFX_VP(INST_INDEX_CONST); + else if(src.reg.type == NVFXSR_INPUT) + hw[0] |= NVFX_VP(INST_INDEX_INPUT); + else + assert(0); + + if(src.indirect_reg) + hw[0] |= NVFX_VP(INST_ADDR_REG_SELECT_1); + hw[0] |= src.indirect_swz << NVFX_VP(INST_ADDR_SWZ_SHIFT); + } + + switch (pos) { + case 0: + hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >> + NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT); + hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) << + NVFX_VP(INST_SRC0L_SHIFT); + break; + case 1: + hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT); + break; + case 2: + hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >> + NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT); + hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) << + NVFX_VP(INST_SRC2L_SHIFT); + break; + default: + assert(0); + } +} + +static void +emit_dst(struct nv30_context *nv30, struct nvfx_vpc *vpc, uint32_t *hw, + int slot, struct nvfx_reg dst) +{ + struct nv30_vertprog *vp = vpc->vp; + + switch (dst.type) { + case NVFXSR_NONE: + if(!nv30->is_nv4x) + hw[0] |= NV30_VP_INST_DEST_TEMP_ID_MASK; + else { + hw[3] |= NV40_VP_INST_DEST_MASK; + if (slot == 0) + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; + else + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + } + break; + case NVFXSR_TEMP: + if(!nv30->is_nv4x) + hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); + else { + hw[3] |= NV40_VP_INST_DEST_MASK; + if (slot == 0) + hw[0] |= (dst.index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + else + hw[3] |= (dst.index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + } + break; + case NVFXSR_OUTPUT: + /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */ + if(nv30->is_nv4x) { + switch (dst.index) { + case NV30_VP_INST_DEST_CLP(0): + dst.index = NVFX_VP(INST_DEST_FOGC); + vp->or |= (1 << 6); + break; + case NV30_VP_INST_DEST_CLP(1): + dst.index = NVFX_VP(INST_DEST_FOGC); + vp->or |= (1 << 7); + break; + case NV30_VP_INST_DEST_CLP(2): + dst.index = NVFX_VP(INST_DEST_FOGC); + vp->or |= (1 << 8); + break; + case NV30_VP_INST_DEST_CLP(3): + dst.index = NVFX_VP(INST_DEST_PSZ); + vp->or |= (1 << 9); + break; + case NV30_VP_INST_DEST_CLP(4): + dst.index = NVFX_VP(INST_DEST_PSZ); + vp->or |= (1 << 10); + break; + case NV30_VP_INST_DEST_CLP(5): + dst.index = NVFX_VP(INST_DEST_PSZ); + vp->or |= (1 << 11); + break; + case NV40_VP_INST_DEST_COL0: vp->or |= (1 << 0); break; + case NV40_VP_INST_DEST_COL1: vp->or |= (1 << 1); break; + case NV40_VP_INST_DEST_BFC0: vp->or |= (1 << 2); break; + case NV40_VP_INST_DEST_BFC1: vp->or |= (1 << 3); break; + case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; + case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + } + } + + if(!nv30->is_nv4x) { + hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); + hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK; + + /*XXX: no way this is entirely correct, someone needs to + * figure out what exactly it is. + */ + hw[3] |= 0x800; + } else { + hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); + if (slot == 0) { + hw[0] |= NV40_VP_INST_VEC_RESULT; + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; + } else { + hw[3] |= NV40_VP_INST_SCA_RESULT; + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + } + } + break; + default: + assert(0); + } +} + +static void +nvfx_vp_emit(struct nvfx_vpc *vpc, struct nvfx_insn insn) +{ + struct nv30_context *nv30 = vpc->nv30; + struct nv30_vertprog *vp = vpc->vp; + unsigned slot = insn.op >> 7; + unsigned op = insn.op & 0x7f; + uint32_t *hw; + + vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); + vpc->vpi = &vp->insns[vp->nr_insns - 1]; + memset(vpc->vpi, 0, sizeof(*vpc->vpi)); + + hw = vpc->vpi->data; + + if (insn.cc_test != NVFX_COND_TR) + hw[0] |= NVFX_VP(INST_COND_TEST_ENABLE); + hw[0] |= (insn.cc_test << NVFX_VP(INST_COND_SHIFT)); + hw[0] |= ((insn.cc_swz[0] << NVFX_VP(INST_COND_SWZ_X_SHIFT)) | + (insn.cc_swz[1] << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) | + (insn.cc_swz[2] << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) | + (insn.cc_swz[3] << NVFX_VP(INST_COND_SWZ_W_SHIFT))); + if(insn.cc_update) + hw[0] |= NVFX_VP(INST_COND_UPDATE_ENABLE); + + if(insn.sat) { + assert(nv30->use_nv4x); + if(nv30->use_nv4x) + hw[0] |= NV40_VP_INST_SATURATE; + } + + if(!nv30->is_nv4x) { + if(slot == 0) + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); + else { + hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT); + hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT); + } +// hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK); +// hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT)); + + if (insn.dst.type == NVFXSR_OUTPUT) { + if (slot) + hw[3] |= (insn.mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); + else + hw[3] |= (insn.mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); + } else { + if (slot) + hw[3] |= (insn.mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); + else + hw[3] |= (insn.mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); + } + } else { + if (slot == 0) { + hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + hw[3] |= (insn.mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); + } else { + hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK ; + hw[3] |= (insn.mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + } + } + + emit_dst(nv30, vpc, hw, slot, insn.dst); + emit_src(nv30, vpc, hw, 0, insn.src[0]); + emit_src(nv30, vpc, hw, 1, insn.src[1]); + emit_src(nv30, vpc, hw, 2, insn.src[2]); + +// if(insn.src[0].indirect || op == NVFX_VP_INST_VEC_OP_ARL) +// hw[3] |= NV40_VP_INST_SCA_RESULT; +} + +static inline struct nvfx_src +tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nvfx_src src; + + switch (fsrc->Register.File) { + case TGSI_FILE_INPUT: + src.reg = nvfx_reg(NVFXSR_INPUT, fsrc->Register.Index); + break; + case TGSI_FILE_CONSTANT: + if(fsrc->Register.Indirect) { + src.reg = vpc->r_const[0]; + src.reg.index = fsrc->Register.Index; + } else { + src.reg = vpc->r_const[fsrc->Register.Index]; + } + break; + case TGSI_FILE_IMMEDIATE: + src.reg = vpc->imm[fsrc->Register.Index]; + break; + case TGSI_FILE_TEMPORARY: + src.reg = vpc->r_temp[fsrc->Register.Index]; + break; + default: + NOUVEAU_ERR("bad src file\n"); + src.reg.index = 0; + src.reg.type = -1; + break; + } + + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; + src.indirect = 0; + src.indirect_reg = 0; + src.indirect_swz = 0; + + if(fsrc->Register.Indirect) { + if(fsrc->Indirect.File == TGSI_FILE_ADDRESS && + (fsrc->Register.File == TGSI_FILE_CONSTANT || + fsrc->Register.File == TGSI_FILE_INPUT)) { + src.indirect = 1; + src.indirect_reg = fsrc->Indirect.Index; + src.indirect_swz = fsrc->Indirect.Swizzle; + } else { + src.reg.index = 0; + src.reg.type = -1; + } + } + + return src; +} + +static INLINE struct nvfx_reg +tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nvfx_reg dst; + + switch (fdst->Register.File) { + case TGSI_FILE_NULL: + dst = nvfx_reg(NVFXSR_NONE, 0); + break; + case TGSI_FILE_OUTPUT: + dst = vpc->r_result[fdst->Register.Index]; + break; + case TGSI_FILE_TEMPORARY: + dst = vpc->r_temp[fdst->Register.Index]; + break; + case TGSI_FILE_ADDRESS: + dst = vpc->r_address[fdst->Register.Index]; + break; + default: + NOUVEAU_ERR("bad dst file %i\n", fdst->Register.File); + dst.index = 0; + dst.type = 0; + break; + } + + return dst; +} + +static inline int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_VP_MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_VP_MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_VP_MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_VP_MASK_W; + return mask; +} + +static boolean +nvfx_vertprog_parse_instruction(struct nv30_context *nv30, struct nvfx_vpc *vpc, + unsigned idx, const struct tgsi_full_instruction *finst) +{ + struct nvfx_src src[3], tmp; + struct nvfx_reg dst; + struct nvfx_reg final_dst; + struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); + struct nvfx_insn insn; + struct nvfx_relocation reloc; + struct nvfx_loop_entry loop; + boolean sat = FALSE; + int mask; + int ai = -1, ci = -1, ii = -1; + int i; + unsigned sub_depth = 0; + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(vpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->Src[i]; + + switch (fsrc->Register.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, + tgsi_src(vpc, fsrc), none, none)); + } + break; + case TGSI_FILE_CONSTANT: + if ((ci == -1 && ii == -1) || + ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, + tgsi_src(vpc, fsrc), none, none)); + } + break; + case TGSI_FILE_IMMEDIATE: + if ((ci == -1 && ii == -1) || + ii == fsrc->Register.Index) { + ii = fsrc->Register.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, + tgsi_src(vpc, fsrc), none, none)); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + if(src[i].reg.type < 0) + return FALSE; + } + + if(finst->Dst[0].Register.File == TGSI_FILE_ADDRESS && + finst->Instruction.Opcode != TGSI_OPCODE_ARL) + return FALSE; + + final_dst = dst = tgsi_dst(vpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); + if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) { + assert(finst->Instruction.Opcode != TGSI_OPCODE_ARL); + if (nv30->use_nv4x) + sat = TRUE; + else + if(dst.type != NVFXSR_TEMP) + dst = temp(vpc); + } + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, abs(src[0]), none, none)); + break; + case TGSI_OPCODE_ADD: + nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, src[1])); + break; + case TGSI_OPCODE_ARL: + nvfx_vp_emit(vpc, arith(0, VEC, ARL, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_CEIL: + tmp = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, neg(src[0]), none, none)); + nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none)); + break; + case TGSI_OPCODE_CMP: + insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); + insn.cc_update = 1; + nvfx_vp_emit(vpc, insn); + + insn = arith(sat, VEC, MOV, dst, mask, src[2], none, none); + insn.cc_test = NVFX_COND_GE; + nvfx_vp_emit(vpc, insn); + + insn = arith(sat, VEC, MOV, dst, mask, src[1], none, none); + insn.cc_test = NVFX_COND_LT; + nvfx_vp_emit(vpc, insn); + break; + case TGSI_OPCODE_COS: + nvfx_vp_emit(vpc, arith(sat, SCA, COS, dst, mask, none, none, src[0])); + break; + case TGSI_OPCODE_DP2: + tmp = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X | NVFX_VP_MASK_Y, src[0], src[1], none)); + nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, swz(tmp, X, X, X, X), none, swz(tmp, Y, Y, Y, Y))); + break; + case TGSI_OPCODE_DP3: + nvfx_vp_emit(vpc, arith(sat, VEC, DP3, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_DP4: + nvfx_vp_emit(vpc, arith(sat, VEC, DP4, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_DPH: + nvfx_vp_emit(vpc, arith(sat, VEC, DPH, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_DST: + nvfx_vp_emit(vpc, arith(sat, VEC, DST, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_EX2: + nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, src[0])); + break; + case TGSI_OPCODE_EXP: + nvfx_vp_emit(vpc, arith(sat, SCA, EXP, dst, mask, none, none, src[0])); + break; + case TGSI_OPCODE_FLR: + nvfx_vp_emit(vpc, arith(sat, VEC, FLR, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_FRC: + nvfx_vp_emit(vpc, arith(sat, VEC, FRC, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_LG2: + nvfx_vp_emit(vpc, arith(sat, SCA, LG2, dst, mask, none, none, src[0])); + break; + case TGSI_OPCODE_LIT: + nvfx_vp_emit(vpc, arith(sat, SCA, LIT, dst, mask, none, none, src[0])); + break; + case TGSI_OPCODE_LOG: + nvfx_vp_emit(vpc, arith(sat, SCA, LOG, dst, mask, none, none, src[0])); + break; + case TGSI_OPCODE_LRP: + tmp = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2])); + nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], tmp)); + break; + case TGSI_OPCODE_MAD: + nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], src[2])); + break; + case TGSI_OPCODE_MAX: + nvfx_vp_emit(vpc, arith(sat, VEC, MAX, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_MIN: + nvfx_vp_emit(vpc, arith(sat, VEC, MIN, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_MOV: + nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_MUL: + nvfx_vp_emit(vpc, arith(sat, VEC, MUL, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_POW: + tmp = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, SCA, LG2, tmp.reg, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X))); + nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none)); + nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X))); + break; + case TGSI_OPCODE_RCP: + nvfx_vp_emit(vpc, arith(sat, SCA, RCP, dst, mask, none, none, src[0])); + break; + case TGSI_OPCODE_RSQ: + nvfx_vp_emit(vpc, arith(sat, SCA, RSQ, dst, mask, none, none, abs(src[0]))); + break; + case TGSI_OPCODE_SEQ: + nvfx_vp_emit(vpc, arith(sat, VEC, SEQ, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SFL: + nvfx_vp_emit(vpc, arith(sat, VEC, SFL, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SGE: + nvfx_vp_emit(vpc, arith(sat, VEC, SGE, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SGT: + nvfx_vp_emit(vpc, arith(sat, VEC, SGT, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SIN: + nvfx_vp_emit(vpc, arith(sat, SCA, SIN, dst, mask, none, none, src[0])); + break; + case TGSI_OPCODE_SLE: + nvfx_vp_emit(vpc, arith(sat, VEC, SLE, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SLT: + nvfx_vp_emit(vpc, arith(sat, VEC, SLT, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SNE: + nvfx_vp_emit(vpc, arith(sat, VEC, SNE, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SSG: + nvfx_vp_emit(vpc, arith(sat, VEC, SSG, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_STR: + nvfx_vp_emit(vpc, arith(sat, VEC, STR, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SUB: + nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, neg(src[1]))); + break; + case TGSI_OPCODE_TRUNC: + tmp = nvfx_src(temp(vpc)); + insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); + insn.cc_update = 1; + nvfx_vp_emit(vpc, insn); + + nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, abs(src[0]), none, none)); + nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, tmp, none, none)); + + insn = arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none); + insn.cc_test = NVFX_COND_LT; + nvfx_vp_emit(vpc, insn); + break; + case TGSI_OPCODE_XPD: + tmp = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none)); + nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp))); + break; + case TGSI_OPCODE_IF: + insn = arith(0, VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none); + insn.cc_update = 1; + nvfx_vp_emit(vpc, insn); + + reloc.location = vpc->vp->nr_insns; + reloc.target = finst->Label.Label + 1; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + + insn = arith(0, SCA, BRA, none.reg, 0, none, none, none); + insn.cc_test = NVFX_COND_EQ; + insn.cc_swz[0] = insn.cc_swz[1] = insn.cc_swz[2] = insn.cc_swz[3] = 0; + nvfx_vp_emit(vpc, insn); + break; + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_BRA: + case TGSI_OPCODE_CAL: + reloc.location = vpc->vp->nr_insns; + reloc.target = finst->Label.Label; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + + if(finst->Instruction.Opcode == TGSI_OPCODE_CAL) + insn = arith(0, SCA, CAL, none.reg, 0, none, none, none); + else + insn = arith(0, SCA, BRA, none.reg, 0, none, none, none); + nvfx_vp_emit(vpc, insn); + break; + case TGSI_OPCODE_RET: + if(sub_depth || !vpc->vp->enabled_ucps) { + tmp = none; + tmp.swz[0] = tmp.swz[1] = tmp.swz[2] = tmp.swz[3] = 0; + nvfx_vp_emit(vpc, arith(0, SCA, RET, none.reg, 0, none, none, tmp)); + } else { + reloc.location = vpc->vp->nr_insns; + reloc.target = vpc->info->num_instructions; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); + } + break; + case TGSI_OPCODE_BGNSUB: + ++sub_depth; + break; + case TGSI_OPCODE_ENDSUB: + --sub_depth; + break; + case TGSI_OPCODE_ENDIF: + /* nothing to do here */ + break; + case TGSI_OPCODE_BGNLOOP: + loop.cont_target = idx; + loop.brk_target = finst->Label.Label + 1; + util_dynarray_append(&vpc->loop_stack, struct nvfx_loop_entry, loop); + break; + case TGSI_OPCODE_ENDLOOP: + loop = util_dynarray_pop(&vpc->loop_stack, struct nvfx_loop_entry); + + reloc.location = vpc->vp->nr_insns; + reloc.target = loop.cont_target; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + + nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); + break; + case TGSI_OPCODE_CONT: + loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry); + + reloc.location = vpc->vp->nr_insns; + reloc.target = loop.cont_target; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + + nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); + break; + case TGSI_OPCODE_BRK: + loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry); + + reloc.location = vpc->vp->nr_insns; + reloc.target = loop.brk_target; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + + nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); + break; + case TGSI_OPCODE_END: + assert(!sub_depth); + if(vpc->vp->enabled_ucps) { + if(idx != (vpc->info->num_instructions - 1)) { + reloc.location = vpc->vp->nr_insns; + reloc.target = vpc->info->num_instructions; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); + } + } else { + if(vpc->vp->nr_insns) + vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; + nvfx_vp_emit(vpc, arith(0, VEC, NOP, none.reg, 0, none, none, none)); + vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; + } + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE && !nv30->use_nv4x) { + if (!vpc->r_0_1.type) + vpc->r_0_1 = constant(vpc, -1, 0, 1, 0, 0); + nvfx_vp_emit(vpc, arith(0, VEC, MAX, dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), X, X, X, X), none)); + nvfx_vp_emit(vpc, arith(0, VEC, MIN, final_dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), Y, Y, Y, Y), none)); + } + + release_temps(vpc); + return TRUE; +} + +static boolean +nvfx_vertprog_parse_decl_output(struct nv30_context *nv30, struct nvfx_vpc *vpc, + const struct tgsi_full_declaration *fdec) +{ + unsigned num_texcoords = nv30->is_nv4x ? 10 : 8; + unsigned idx = fdec->Range.First; + unsigned semantic_index = fdec->Semantic.Index; + int hw = 0, i; + + switch (fdec->Semantic.Name) { + case TGSI_SEMANTIC_POSITION: + hw = NVFX_VP(INST_DEST_POS); + vpc->hpos_idx = idx; + break; + case TGSI_SEMANTIC_CLIPVERTEX: + vpc->r_result[idx] = temp(vpc); + vpc->r_temps_discard = 0; + vpc->cvtx_idx = idx; + return TRUE; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.Index == 0) { + hw = NVFX_VP(INST_DEST_COL0); + } else + if (fdec->Semantic.Index == 1) { + hw = NVFX_VP(INST_DEST_COL1); + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_BCOLOR: + if (fdec->Semantic.Index == 0) { + hw = NVFX_VP(INST_DEST_BFC0); + } else + if (fdec->Semantic.Index == 1) { + hw = NVFX_VP(INST_DEST_BFC1); + } else { + NOUVEAU_ERR("bad bcolour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NVFX_VP(INST_DEST_FOGC); + break; + case TGSI_SEMANTIC_PSIZE: + hw = NVFX_VP(INST_DEST_PSZ); + break; + case TGSI_SEMANTIC_GENERIC: + /* this is really an identifier for VP/FP linkage */ + semantic_index += 8; + /* fall through */ + case TGSI_SEMANTIC_TEXCOORD: + for (i = 0; i < num_texcoords; i++) { + if (vpc->vp->texcoord[i] == semantic_index) { + hw = NVFX_VP(INST_DEST_TC(i)); + break; + } + } + + if (i == num_texcoords) { + vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0); + return TRUE; + } + break; + case TGSI_SEMANTIC_EDGEFLAG: + /* not really an error just a fallback */ + NOUVEAU_ERR("cannot handle edgeflag output\n"); + return FALSE; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw); + return TRUE; +} + +static boolean +nvfx_vertprog_prepare(struct nv30_context *nv30, struct nvfx_vpc *vpc) +{ + struct tgsi_parse_context p; + int high_const = -1, high_temp = -1, high_addr = -1, nr_imm = 0, i; + + tgsi_parse_init(&p, vpc->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + nr_imm++; + break; + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_TEMPORARY: + if (fdec->Range.Last > high_temp) { + high_temp = + fdec->Range.Last; + } + break; + case TGSI_FILE_ADDRESS: + if (fdec->Range.Last > high_addr) { + high_addr = + fdec->Range.Last; + } + break; + case TGSI_FILE_CONSTANT: + if (fdec->Range.Last > high_const) { + high_const = + fdec->Range.Last; + } + break; + case TGSI_FILE_OUTPUT: + if (!nvfx_vertprog_parse_decl_output(nv30, vpc, fdec)) + return FALSE; + break; + default: + break; + } + } + break; + default: + break; + } + } + tgsi_parse_free(&p); + + if (nr_imm) { + vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_reg)); + assert(vpc->imm); + } + + if (++high_temp) { + vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg)); + for (i = 0; i < high_temp; i++) + vpc->r_temp[i] = temp(vpc); + } + + if (++high_addr) { + vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_reg)); + for (i = 0; i < high_addr; i++) + vpc->r_address[i] = nvfx_reg(NVFXSR_TEMP, i); + } + + if(++high_const) { + vpc->r_const = CALLOC(high_const, sizeof(struct nvfx_reg)); + for (i = 0; i < high_const; i++) + vpc->r_const[i] = constant(vpc, i, 0, 0, 0, 0); + } + + vpc->r_temps_discard = 0; + return TRUE; +} + +DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE) + +boolean +_nvfx_vertprog_translate(struct nv30_context *nv30, struct nv30_vertprog *vp) +{ + struct tgsi_parse_context parse; + struct nvfx_vpc *vpc = NULL; + struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); + struct util_dynarray insns; + int i, ucps; + + vp->translated = FALSE; + vp->nr_insns = 0; + vp->nr_consts = 0; + + vpc = CALLOC_STRUCT(nvfx_vpc); + if (!vpc) + return FALSE; + vpc->nv30 = nv30; + vpc->vp = vp; + vpc->pipe = vp->pipe; + vpc->info = &vp->info; + vpc->cvtx_idx = -1; + + if (!nvfx_vertprog_prepare(nv30, vpc)) { + FREE(vpc); + return FALSE; + } + + /* Redirect post-transform vertex position to a temp if user clip + * planes are enabled. We need to append code to the vtxprog + * to handle clip planes later. + */ + if (vp->enabled_ucps && vpc->cvtx_idx < 0) { + vpc->r_result[vpc->hpos_idx] = temp(vpc); + vpc->r_temps_discard = 0; + vpc->cvtx_idx = vpc->hpos_idx; + } + + util_dynarray_init(&insns); + + tgsi_parse_init(&parse, vp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(imm->Immediate.NrTokens == 4 + 1); + vpc->imm[vpc->nr_imm++] = + constant(vpc, -1, + imm->u[0].Float, + imm->u[1].Float, + imm->u[2].Float, + imm->u[3].Float); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + unsigned idx = insns.size >> 2; + util_dynarray_append(&insns, unsigned, vp->nr_insns); + finst = &parse.FullToken.FullInstruction; + if (!nvfx_vertprog_parse_instruction(nv30, vpc, idx, finst)) + goto out; + } + break; + default: + break; + } + } + + util_dynarray_append(&insns, unsigned, vp->nr_insns); + + for(unsigned i = 0; i < vpc->label_relocs.size; i += sizeof(struct nvfx_relocation)) + { + struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)vpc->label_relocs.data + i); + struct nvfx_relocation hw_reloc; + + hw_reloc.location = label_reloc->location; + hw_reloc.target = ((unsigned*)insns.data)[label_reloc->target]; + + //debug_printf("hw %u -> tgsi %u = hw %u\n", hw_reloc.location, label_reloc->target, hw_reloc.target); + + util_dynarray_append(&vp->branch_relocs, struct nvfx_relocation, hw_reloc); + } + util_dynarray_fini(&insns); + util_dynarray_trim(&vp->branch_relocs); + + /* XXX: what if we add a RET before?! make sure we jump here...*/ + + /* Write out HPOS if it was redirected to a temp earlier */ + if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) { + struct nvfx_reg hpos = nvfx_reg(NVFXSR_OUTPUT, + NVFX_VP(INST_DEST_POS)); + struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]); + + nvfx_vp_emit(vpc, arith(0, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none)); + } + + /* Insert code to handle user clip planes */ + ucps = vp->enabled_ucps; + while (ucps) { + int i = ffs(ucps) - 1; ucps &= ~(1 << i); + struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, NV30_VP_INST_DEST_CLP(i)); + struct nvfx_src ceqn = nvfx_src(nvfx_reg(NVFXSR_CONST, 512 + i)); + struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->cvtx_idx]); + unsigned mask; + + if(nv30->is_nv4x) + { + switch (i) { + case 0: case 3: mask = NVFX_VP_MASK_Y; break; + case 1: case 4: mask = NVFX_VP_MASK_Z; break; + case 2: case 5: mask = NVFX_VP_MASK_W; break; + default: + NOUVEAU_ERR("invalid clip dist #%d\n", i); + goto out; + } + } + else + mask = NVFX_VP_MASK_X; + + nvfx_vp_emit(vpc, arith(0, VEC, DP4, cdst, mask, htmp, ceqn, none)); + } + + if (vpc->vp->nr_insns) + vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; + + if(debug_get_option_nvfx_dump_vp()) + { + debug_printf("\n"); + tgsi_dump(vpc->pipe.tokens, 0); + + debug_printf("\n%s vertex program:\n", nv30->is_nv4x ? "nv4x" : "nv3x"); + for (i = 0; i < vp->nr_insns; i++) + debug_printf("%3u: %08x %08x %08x %08x\n", i, vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]); + debug_printf("\n"); + } + + vp->translated = TRUE; + +out: + tgsi_parse_free(&parse); + if(vpc) { + util_dynarray_fini(&vpc->label_relocs); + util_dynarray_fini(&vpc->loop_stack); + FREE(vpc->r_temp); + FREE(vpc->r_address); + FREE(vpc->r_const); + FREE(vpc->imm); + FREE(vpc); + } + + return vp->translated; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h new file mode 100644 index 00000000000..dfbef2c6a30 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h @@ -0,0 +1,416 @@ +#ifndef RNNDB_NV50_2D_XML +#define RNNDB_NV50_2D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- rnndb/nv50_2d.xml ( 11113 bytes, from 2011-07-09 13:43:58) +- ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58) +- ./rnndb/nv_object.xml ( 12912 bytes, from 2012-07-12 09:41:09) +- ./rnndb/nvchipsets.xml ( 3736 bytes, from 2012-07-12 09:41:09) +- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-07-09 13:43:58) +- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58) + +Copyright (C) 2006-2011 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + + +#define NV50_2D_DMA_NOTIFY 0x00000180 + +#define NV50_2D_DMA_DST 0x00000184 + +#define NV50_2D_DMA_SRC 0x00000188 + +#define NV50_2D_DMA_COND 0x0000018c + +#define NV50_2D_DST_FORMAT 0x00000200 + +#define NV50_2D_DST_LINEAR 0x00000204 + +#define NV50_2D_DST_TILE_MODE 0x00000208 + +#define NV50_2D_DST_DEPTH 0x0000020c + +#define NV50_2D_DST_LAYER 0x00000210 + +#define NV50_2D_DST_PITCH 0x00000214 + +#define NV50_2D_DST_WIDTH 0x00000218 + +#define NV50_2D_DST_HEIGHT 0x0000021c + +#define NV50_2D_DST_ADDRESS_HIGH 0x00000220 + +#define NV50_2D_DST_ADDRESS_LOW 0x00000224 + +#define NV50_2D_UNK228 0x00000228 + +#define NVC0_2D_UNK228 0x00000228 + +#define NV50_2D_SRC_FORMAT 0x00000230 + +#define NV50_2D_SRC_LINEAR 0x00000234 + +#define NV50_2D_SRC_TILE_MODE 0x00000238 + +#define NV50_2D_SRC_DEPTH 0x0000023c + +#define NV50_2D_SRC_LAYER 0x00000240 + +#define NVC0_2D_UNK0240 0x00000240 + +#define NV50_2D_SRC_PITCH 0x00000244 +#define NV50_2D_SRC_PITCH__MAX 0x00040000 + +#define NV50_2D_SRC_WIDTH 0x00000248 +#define NV50_2D_SRC_WIDTH__MAX 0x00010000 + +#define NV50_2D_SRC_HEIGHT 0x0000024c +#define NV50_2D_SRC_HEIGHT__MAX 0x00010000 + +#define NV50_2D_SRC_ADDRESS_HIGH 0x00000250 + +#define NV50_2D_SRC_ADDRESS_LOW 0x00000254 + +#define NV50_2D_UNK258 0x00000258 + +#define NV50_2D_UNK260 0x00000260 + +#define NV50_2D_COND_ADDRESS_HIGH 0x00000264 + +#define NV50_2D_COND_ADDRESS_LOW 0x00000268 + +#define NV50_2D_COND_MODE 0x0000026c +#define NV50_2D_COND_MODE_NEVER 0x00000000 +#define NV50_2D_COND_MODE_ALWAYS 0x00000001 +#define NV50_2D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NV50_2D_COND_MODE_EQUAL 0x00000003 +#define NV50_2D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NV50_2D_CLIP_X 0x00000280 + +#define NV50_2D_CLIP_Y 0x00000284 + +#define NV50_2D_CLIP_W 0x00000288 + +#define NV50_2D_CLIP_H 0x0000028c + +#define NV50_2D_CLIP_ENABLE 0x00000290 + +#define NV50_2D_COLOR_KEY_FORMAT 0x00000294 +#define NV50_2D_COLOR_KEY_FORMAT_16BPP 0x00000000 +#define NV50_2D_COLOR_KEY_FORMAT_15BPP 0x00000001 +#define NV50_2D_COLOR_KEY_FORMAT_24BPP 0x00000002 +#define NV50_2D_COLOR_KEY_FORMAT_30BPP 0x00000003 +#define NV50_2D_COLOR_KEY_FORMAT_8BPP 0x00000004 +#define NV50_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005 +#define NV50_2D_COLOR_KEY_FORMAT_32BPP 0x00000006 + +#define NV50_2D_COLOR_KEY 0x00000298 + +#define NV50_2D_COLOR_KEY_ENABLE 0x0000029c + +#define NV50_2D_ROP 0x000002a0 + +#define NV50_2D_BETA1 0x000002a4 +#define NV50_2D_BETA1_BETA1__MASK 0x7f800000 +#define NV50_2D_BETA1_BETA1__SHIFT 23 + +#define NV50_2D_BETA4 0x000002a8 +#define NV50_2D_BETA4_B__MASK 0x000000ff +#define NV50_2D_BETA4_B__SHIFT 0 +#define NV50_2D_BETA4_G__MASK 0x0000ff00 +#define NV50_2D_BETA4_G__SHIFT 8 +#define NV50_2D_BETA4_R__MASK 0x00ff0000 +#define NV50_2D_BETA4_R__SHIFT 16 +#define NV50_2D_BETA4_A__MASK 0xff000000 +#define NV50_2D_BETA4_A__SHIFT 24 + +#define NV50_2D_OPERATION 0x000002ac +#define NV50_2D_OPERATION_SRCCOPY_AND 0x00000000 +#define NV50_2D_OPERATION_ROP_AND 0x00000001 +#define NV50_2D_OPERATION_BLEND 0x00000002 +#define NV50_2D_OPERATION_SRCCOPY 0x00000003 +#define NV50_2D_OPERATION_ROP 0x00000004 +#define NV50_2D_OPERATION_SRCCOPY_PREMULT 0x00000005 +#define NV50_2D_OPERATION_BLEND_PREMULT 0x00000006 + +#define NV50_2D_PATTERN_OFFSET 0x000002b0 +#define NV50_2D_PATTERN_OFFSET_X__MASK 0x0000003f +#define NV50_2D_PATTERN_OFFSET_X__SHIFT 0 +#define NV50_2D_PATTERN_OFFSET_Y__MASK 0x00003f00 +#define NV50_2D_PATTERN_OFFSET_Y__SHIFT 8 + +#define NV50_2D_PATTERN_SELECT 0x000002b4 +#define NV50_2D_PATTERN_SELECT_MONO_8X8 0x00000000 +#define NV50_2D_PATTERN_SELECT_MONO_64X1 0x00000001 +#define NV50_2D_PATTERN_SELECT_MONO_1X64 0x00000002 +#define NV50_2D_PATTERN_SELECT_COLOR 0x00000003 + +#define NVC0_2D_UNK2DC 0x000002dc + +#define NVC0_2D_UNK2E0 0x000002e0 + +#define NV50_2D_PATTERN_COLOR_FORMAT 0x000002e8 +#define NV50_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000 +#define NV50_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001 +#define NV50_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002 +#define NV50_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003 +#define NV50_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004 +#define NV50_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005 +#define NV50_2D_PATTERN_COLOR_FORMAT_UNK6 0x00000006 + +#define NV50_2D_PATTERN_MONO_FORMAT 0x000002ec +#define NV50_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000 +#define NV50_2D_PATTERN_MONO_FORMAT_LE 0x00000001 + +#define NV50_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0)) +#define NV50_2D_PATTERN_COLOR__ESIZE 0x00000004 +#define NV50_2D_PATTERN_COLOR__LEN 0x00000002 + +#define NV50_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0)) +#define NV50_2D_PATTERN_BITMAP__ESIZE 0x00000004 +#define NV50_2D_PATTERN_BITMAP__LEN 0x00000002 + +#define NV50_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0)) +#define NV50_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004 +#define NV50_2D_PATTERN_X8R8G8B8__LEN 0x00000040 +#define NV50_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff +#define NV50_2D_PATTERN_X8R8G8B8_B__SHIFT 0 +#define NV50_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00 +#define NV50_2D_PATTERN_X8R8G8B8_G__SHIFT 8 +#define NV50_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000 +#define NV50_2D_PATTERN_X8R8G8B8_R__SHIFT 16 + +#define NV50_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0)) +#define NV50_2D_PATTERN_R5G6B5__ESIZE 0x00000004 +#define NV50_2D_PATTERN_R5G6B5__LEN 0x00000020 +#define NV50_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f +#define NV50_2D_PATTERN_R5G6B5_B0__SHIFT 0 +#define NV50_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0 +#define NV50_2D_PATTERN_R5G6B5_G0__SHIFT 5 +#define NV50_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800 +#define NV50_2D_PATTERN_R5G6B5_R0__SHIFT 11 +#define NV50_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000 +#define NV50_2D_PATTERN_R5G6B5_B1__SHIFT 16 +#define NV50_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000 +#define NV50_2D_PATTERN_R5G6B5_G1__SHIFT 21 +#define NV50_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000 +#define NV50_2D_PATTERN_R5G6B5_R1__SHIFT 27 + +#define NV50_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0)) +#define NV50_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004 +#define NV50_2D_PATTERN_X1R5G5B5__LEN 0x00000020 +#define NV50_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f +#define NV50_2D_PATTERN_X1R5G5B5_B0__SHIFT 0 +#define NV50_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0 +#define NV50_2D_PATTERN_X1R5G5B5_G0__SHIFT 5 +#define NV50_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00 +#define NV50_2D_PATTERN_X1R5G5B5_R0__SHIFT 10 +#define NV50_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000 +#define NV50_2D_PATTERN_X1R5G5B5_B1__SHIFT 16 +#define NV50_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000 +#define NV50_2D_PATTERN_X1R5G5B5_G1__SHIFT 21 +#define NV50_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000 +#define NV50_2D_PATTERN_X1R5G5B5_R1__SHIFT 26 + +#define NV50_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0)) +#define NV50_2D_PATTERN_Y8__ESIZE 0x00000004 +#define NV50_2D_PATTERN_Y8__LEN 0x00000010 +#define NV50_2D_PATTERN_Y8_Y0__MASK 0x000000ff +#define NV50_2D_PATTERN_Y8_Y0__SHIFT 0 +#define NV50_2D_PATTERN_Y8_Y1__MASK 0x0000ff00 +#define NV50_2D_PATTERN_Y8_Y1__SHIFT 8 +#define NV50_2D_PATTERN_Y8_Y2__MASK 0x00ff0000 +#define NV50_2D_PATTERN_Y8_Y2__SHIFT 16 +#define NV50_2D_PATTERN_Y8_Y3__MASK 0xff000000 +#define NV50_2D_PATTERN_Y8_Y3__SHIFT 24 + +#define NVC0_2D_DRAW_COLOR_LONG(i0) (0x00000540 + 0x4*(i0)) +#define NVC0_2D_DRAW_COLOR_LONG__ESIZE 0x00000004 +#define NVC0_2D_DRAW_COLOR_LONG__LEN 0x00000004 + +#define NV50_2D_DRAW_SHAPE 0x00000580 +#define NV50_2D_DRAW_SHAPE_POINTS 0x00000000 +#define NV50_2D_DRAW_SHAPE_LINES 0x00000001 +#define NV50_2D_DRAW_SHAPE_LINE_STRIP 0x00000002 +#define NV50_2D_DRAW_SHAPE_TRIANGLES 0x00000003 +#define NV50_2D_DRAW_SHAPE_RECTANGLES 0x00000004 + +#define NV50_2D_DRAW_COLOR_FORMAT 0x00000584 + +#define NV50_2D_DRAW_COLOR 0x00000588 + +#define NV50_2D_UNK58C 0x0000058c +#define NV50_2D_UNK58C_0 0x00000001 +#define NV50_2D_UNK58C_1 0x00000010 +#define NV50_2D_UNK58C_2 0x00000100 +#define NV50_2D_UNK58C_3 0x00001000 + +#define NV50_2D_DRAW_POINT16 0x000005e0 +#define NV50_2D_DRAW_POINT16_X__MASK 0x0000ffff +#define NV50_2D_DRAW_POINT16_X__SHIFT 0 +#define NV50_2D_DRAW_POINT16_Y__MASK 0xffff0000 +#define NV50_2D_DRAW_POINT16_Y__SHIFT 16 + +#define NV50_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0)) +#define NV50_2D_DRAW_POINT32_X__ESIZE 0x00000008 +#define NV50_2D_DRAW_POINT32_X__LEN 0x00000040 + +#define NV50_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0)) +#define NV50_2D_DRAW_POINT32_Y__ESIZE 0x00000008 +#define NV50_2D_DRAW_POINT32_Y__LEN 0x00000040 + +#define NV50_2D_SIFC_BITMAP_ENABLE 0x00000800 + +#define NV50_2D_SIFC_FORMAT 0x00000804 + +#define NV50_2D_SIFC_BITMAP_FORMAT 0x00000808 +#define NV50_2D_SIFC_BITMAP_FORMAT_I1 0x00000000 +#define NV50_2D_SIFC_BITMAP_FORMAT_I4 0x00000001 +#define NV50_2D_SIFC_BITMAP_FORMAT_I8 0x00000002 + +#define NV50_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c + +#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810 +#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000 +#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001 +#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002 + +#define NV50_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814 + +#define NV50_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818 + +#define NV50_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c + +#define NV50_2D_SIFC_WIDTH 0x00000838 + +#define NV50_2D_SIFC_HEIGHT 0x0000083c + +#define NV50_2D_SIFC_DX_DU_FRACT 0x00000840 + +#define NV50_2D_SIFC_DX_DU_INT 0x00000844 + +#define NV50_2D_SIFC_DY_DV_FRACT 0x00000848 + +#define NV50_2D_SIFC_DY_DV_INT 0x0000084c + +#define NV50_2D_SIFC_DST_X_FRACT 0x00000850 + +#define NV50_2D_SIFC_DST_X_INT 0x00000854 + +#define NV50_2D_SIFC_DST_Y_FRACT 0x00000858 + +#define NV50_2D_SIFC_DST_Y_INT 0x0000085c + +#define NV50_2D_SIFC_DATA 0x00000860 + +#define NV50_2D_UNK0870 0x00000870 + +#define NV50_2D_UNK0880 0x00000880 + +#define NV50_2D_UNK0884 0x00000884 + +#define NV50_2D_UNK0888 0x00000888 + +#define NV50_2D_BLIT_CONTROL 0x0000088c +#define NV50_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001 +#define NV50_2D_BLIT_CONTROL_ORIGIN__SHIFT 0 +#define NV50_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000 +#define NV50_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001 +#define NV50_2D_BLIT_CONTROL_FILTER__MASK 0x00000010 +#define NV50_2D_BLIT_CONTROL_FILTER__SHIFT 4 +#define NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000 +#define NV50_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010 + +#define NV50_2D_BLIT_DST_X 0x000008b0 + +#define NV50_2D_BLIT_DST_Y 0x000008b4 + +#define NV50_2D_BLIT_DST_W 0x000008b8 + +#define NV50_2D_BLIT_DST_H 0x000008bc + +#define NV50_2D_BLIT_DU_DX_FRACT 0x000008c0 + +#define NV50_2D_BLIT_DU_DX_INT 0x000008c4 + +#define NV50_2D_BLIT_DV_DY_FRACT 0x000008c8 + +#define NV50_2D_BLIT_DV_DY_INT 0x000008cc + +#define NV50_2D_BLIT_SRC_X_FRACT 0x000008d0 + +#define NV50_2D_BLIT_SRC_X_INT 0x000008d4 + +#define NV50_2D_BLIT_SRC_Y_FRACT 0x000008d8 + +#define NV50_2D_BLIT_SRC_Y_INT 0x000008dc + +#define NVC0_2D_FIRMWARE(i0) (0x000008e0 + 0x4*(i0)) +#define NVC0_2D_FIRMWARE__ESIZE 0x00000004 +#define NVC0_2D_FIRMWARE__LEN 0x00000020 + + +#endif /* RNNDB_NV50_2D_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h new file mode 100644 index 00000000000..9dff8b2dd13 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h @@ -0,0 +1,2110 @@ +#ifndef RNNDB_NV50_3D_XML +#define RNNDB_NV50_3D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- rnndb/nv50_3d.xml ( 65226 bytes, from 2012-01-28 13:46:30) +- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv_3ddefs.xml ( 16394 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv_object.xml ( 12672 bytes, from 2011-08-11 18:25:12) + +Copyright (C) 2006-2012 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NV50_3D_DMA_NOTIFY 0x00000180 + +#define NV50_3D_DMA_ZETA 0x00000184 + +#define NV50_3D_DMA_QUERY 0x00000188 + +#define NV50_3D_DMA_VTXBUF 0x0000018c + +#define NV50_3D_DMA_LOCAL 0x00000190 + +#define NV50_3D_DMA_STACK 0x00000194 + +#define NV50_3D_DMA_CODE_CB 0x00000198 + +#define NV50_3D_DMA_TSC 0x0000019c + +#define NV50_3D_DMA_TIC 0x000001a0 + +#define NV50_3D_DMA_TEXTURE 0x000001a4 + +#define NV50_3D_DMA_STRMOUT 0x000001a8 + +#define NV50_3D_DMA_CLIPID 0x000001ac + +#define NV50_3D_DMA_COLOR(i0) (0x000001c0 + 0x4*(i0)) +#define NV50_3D_DMA_COLOR__ESIZE 0x00000004 +#define NV50_3D_DMA_COLOR__LEN 0x00000008 + +#define NV50_3D_RT(i0) (0x00000200 + 0x20*(i0)) +#define NV50_3D_RT__ESIZE 0x00000020 +#define NV50_3D_RT__LEN 0x00000008 + +#define NV50_3D_RT_ADDRESS_HIGH(i0) (0x00000200 + 0x20*(i0)) + +#define NV50_3D_RT_ADDRESS_LOW(i0) (0x00000204 + 0x20*(i0)) + +#define NV50_3D_RT_FORMAT(i0) (0x00000208 + 0x20*(i0)) + +#define NV50_3D_RT_TILE_MODE(i0) (0x0000020c + 0x20*(i0)) +#define NV50_3D_RT_TILE_MODE_X__MASK 0x0000000f +#define NV50_3D_RT_TILE_MODE_X__SHIFT 0 +#define NV50_3D_RT_TILE_MODE_Y__MASK 0x000000f0 +#define NV50_3D_RT_TILE_MODE_Y__SHIFT 4 +#define NV50_3D_RT_TILE_MODE_Z__MASK 0x00000f00 +#define NV50_3D_RT_TILE_MODE_Z__SHIFT 8 + +#define NV50_3D_RT_LAYER_STRIDE(i0) (0x00000210 + 0x20*(i0)) +#define NV50_3D_RT_LAYER_STRIDE__SHR 2 + +#define NV50_3D_RT_UNK14(i0) (0x00000214 + 0x20*(i0)) + +#define NV50_3D_VTX_ATTR_1F(i0) (0x00000300 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_1F__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_1F__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_2H(i0) (0x00000340 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_2H__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_2H__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_2H_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_2H_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_2H_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_2H_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_2F_X(i0) (0x00000380 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_2F_X__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_2F_X__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_2F_Y(i0) (0x00000384 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_2F_Y__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_2F_Y__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_3F_X(i0) (0x00000400 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_3F_X__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_3F_X__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_3F_Y(i0) (0x00000404 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_3F_Y__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_3F_Y__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_3F_Z(i0) (0x00000408 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_3F_Z__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_3F_Z__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4F_X(i0) (0x00000500 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_4F_X__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_4F_X__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4F_Y(i0) (0x00000504 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_4F_Y__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_4F_Y__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4F_Z(i0) (0x00000508 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_4F_Z__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_4F_Z__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4F_W(i0) (0x0000050c + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_4F_W__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_4F_W__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4H_0(i0) (0x00000600 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4H_0__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4H_0__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4H_0_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4H_0_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4H_0_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4H_0_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4H_1(i0) (0x00000604 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4H_1__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4H_1__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4H_1_Z__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4H_1_Z__SHIFT 0 +#define NV50_3D_VTX_ATTR_4H_1_W__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4H_1_W__SHIFT 16 + +#define NV50_3D_VTX_ATTR_2I(i0) (0x00000680 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_2I__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_2I__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_2I_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_2I_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_2I_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_2I_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_2NI(i0) (0x000006c0 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_2NI__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_2NI__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_2NI_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_2NI_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_2NI_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_2NI_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4I_0(i0) (0x00000700 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4I_0__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4I_0__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4I_0_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4I_0_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4I_0_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4I_0_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4I_1(i0) (0x00000704 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4I_1__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4I_1__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4I_1_Z__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4I_1_Z__SHIFT 0 +#define NV50_3D_VTX_ATTR_4I_1_W__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4I_1_W__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4NI_0(i0) (0x00000780 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4NI_0__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4NI_0__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4NI_0_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4NI_0_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4NI_0_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4NI_0_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4NI_1(i0) (0x00000784 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4NI_1__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4NI_1__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4NI_1_Z__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4NI_1_Z__SHIFT 0 +#define NV50_3D_VTX_ATTR_4NI_1_W__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4NI_1_W__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4UB(i0) (0x00000800 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_4UB__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_4UB__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4UB_X__MASK 0x000000ff +#define NV50_3D_VTX_ATTR_4UB_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4UB_Y__MASK 0x0000ff00 +#define NV50_3D_VTX_ATTR_4UB_Y__SHIFT 8 +#define NV50_3D_VTX_ATTR_4UB_Z__MASK 0x00ff0000 +#define NV50_3D_VTX_ATTR_4UB_Z__SHIFT 16 +#define NV50_3D_VTX_ATTR_4UB_W__MASK 0xff000000 +#define NV50_3D_VTX_ATTR_4UB_W__SHIFT 24 + +#define NV50_3D_VTX_ATTR_4B(i0) (0x00000840 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_4B__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_4B__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4B_X__MASK 0x000000ff +#define NV50_3D_VTX_ATTR_4B_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4B_Y__MASK 0x0000ff00 +#define NV50_3D_VTX_ATTR_4B_Y__SHIFT 8 +#define NV50_3D_VTX_ATTR_4B_Z__MASK 0x00ff0000 +#define NV50_3D_VTX_ATTR_4B_Z__SHIFT 16 +#define NV50_3D_VTX_ATTR_4B_W__MASK 0xff000000 +#define NV50_3D_VTX_ATTR_4B_W__SHIFT 24 + +#define NV50_3D_VTX_ATTR_4NUB(i0) (0x00000880 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_4NUB__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_4NUB__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4NUB_X__MASK 0x000000ff +#define NV50_3D_VTX_ATTR_4NUB_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4NUB_Y__MASK 0x0000ff00 +#define NV50_3D_VTX_ATTR_4NUB_Y__SHIFT 8 +#define NV50_3D_VTX_ATTR_4NUB_Z__MASK 0x00ff0000 +#define NV50_3D_VTX_ATTR_4NUB_Z__SHIFT 16 +#define NV50_3D_VTX_ATTR_4NUB_W__MASK 0xff000000 +#define NV50_3D_VTX_ATTR_4NUB_W__SHIFT 24 + +#define NV50_3D_VTX_ATTR_4NB(i0) (0x000008c0 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_4NB__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_4NB__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4NB_X__MASK 0x000000ff +#define NV50_3D_VTX_ATTR_4NB_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4NB_Y__MASK 0x0000ff00 +#define NV50_3D_VTX_ATTR_4NB_Y__SHIFT 8 +#define NV50_3D_VTX_ATTR_4NB_Z__MASK 0x00ff0000 +#define NV50_3D_VTX_ATTR_4NB_Z__SHIFT 16 +#define NV50_3D_VTX_ATTR_4NB_W__MASK 0xff000000 +#define NV50_3D_VTX_ATTR_4NB_W__SHIFT 24 + +#define NV50_3D_VERTEX_ARRAY_FETCH(i0) (0x00000900 + 0x10*(i0)) +#define NV50_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010 +#define NV50_3D_VERTEX_ARRAY_FETCH__LEN 0x00000010 +#define NV50_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff +#define NV50_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0 +#define NV50_3D_VERTEX_ARRAY_FETCH_ENABLE 0x20000000 + +#define NV50_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00000904 + 0x10*(i0)) +#define NV50_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010 +#define NV50_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000010 + +#define NV50_3D_VERTEX_ARRAY_START_LOW(i0) (0x00000908 + 0x10*(i0)) +#define NV50_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010 +#define NV50_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000010 + +#define NV50_3D_VERTEX_ARRAY_DIVISOR(i0) (0x0000090c + 0x10*(i0)) +#define NV50_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010 +#define NV50_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_SCALE_X__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_SCALE_Y__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_SCALE_Z__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0)) +#define NV50_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0)) +#define NV50_3D_VIEWPORT_HORIZ__ESIZE 0x00000010 +#define NV50_3D_VIEWPORT_HORIZ__LEN 0x00000010 +#define NV50_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff +#define NV50_3D_VIEWPORT_HORIZ_X__SHIFT 0 +#define NV50_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000 +#define NV50_3D_VIEWPORT_HORIZ_W__SHIFT 16 + +#define NV50_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0)) +#define NV50_3D_VIEWPORT_VERT__ESIZE 0x00000010 +#define NV50_3D_VIEWPORT_VERT__LEN 0x00000010 +#define NV50_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff +#define NV50_3D_VIEWPORT_VERT_Y__SHIFT 0 +#define NV50_3D_VIEWPORT_VERT_H__MASK 0xffff0000 +#define NV50_3D_VIEWPORT_VERT_H__SHIFT 16 + +#define NV50_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0)) +#define NV50_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010 +#define NV50_3D_DEPTH_RANGE_NEAR__LEN 0x00000010 + +#define NV50_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0)) +#define NV50_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010 +#define NV50_3D_DEPTH_RANGE_FAR__LEN 0x00000010 + +#define NV50_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0)) +#define NV50_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008 +#define NV50_3D_CLIP_RECT_HORIZ__LEN 0x00000008 +#define NV50_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff +#define NV50_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0 +#define NV50_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000 +#define NV50_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16 + +#define NV50_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0)) +#define NV50_3D_CLIP_RECT_VERT__ESIZE 0x00000008 +#define NV50_3D_CLIP_RECT_VERT__LEN 0x00000008 +#define NV50_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff +#define NV50_3D_CLIP_RECT_VERT_MIN__SHIFT 0 +#define NV50_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000 +#define NV50_3D_CLIP_RECT_VERT_MAX__SHIFT 16 + +#define NV50_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0)) +#define NV50_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008 +#define NV50_3D_CLIPID_REGION_HORIZ__LEN 0x00000004 +#define NV50_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff +#define NV50_3D_CLIPID_REGION_HORIZ_X__SHIFT 0 +#define NV50_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000 +#define NV50_3D_CLIPID_REGION_HORIZ_W__SHIFT 16 + +#define NV50_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0)) +#define NV50_3D_CLIPID_REGION_VERT__ESIZE 0x00000008 +#define NV50_3D_CLIPID_REGION_VERT__LEN 0x00000004 +#define NV50_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff +#define NV50_3D_CLIPID_REGION_VERT_Y__SHIFT 0 +#define NV50_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000 +#define NV50_3D_CLIPID_REGION_VERT_H__SHIFT 16 + +#define NV50_3D_UNK0D60 0x00000d60 + +#define NV50_3D_UNK0D64 0x00000d64 + +#define NV50_3D_COUNTER_ENABLE 0x00000d68 +#define NV50_3D_COUNTER_ENABLE_VFETCH_VERTICES 0x00000001 +#define NV50_3D_COUNTER_ENABLE_VFETCH_PRIMITIVES 0x00000002 +#define NV50_3D_COUNTER_ENABLE_VP_LAUNCHES 0x00000004 +#define NV50_3D_COUNTER_ENABLE_GP_LAUNCHES 0x00000008 +#define NV50_3D_COUNTER_ENABLE_GP_PRIMITIVES_OUT 0x00000010 +#define NV50_3D_COUNTER_ENABLE_TRANSFORM_FEEDBACK 0x00000020 +#define NV50_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000040 +#define NV50_3D_COUNTER_ENABLE_RAST_PRIMITIVES_PRECLIP 0x00000080 +#define NV50_3D_COUNTER_ENABLE_RAST_PRIMITIVES_POSTCLIP 0x00000100 +#define NV50_3D_COUNTER_ENABLE_FP_PIXELS 0x00000200 +#define NV84_3D_COUNTER_ENABLE_UNK0A 0x00000400 + +#define NV50_3D_UNK0D6C(i0) (0x00000d6c + 0x4*(i0)) +#define NV50_3D_UNK0D6C__ESIZE 0x00000004 +#define NV50_3D_UNK0D6C__LEN 0x00000002 +#define NV50_3D_UNK0D6C_X__MASK 0x0000ffff +#define NV50_3D_UNK0D6C_X__SHIFT 0 +#define NV50_3D_UNK0D6C_Y__MASK 0xffff0000 +#define NV50_3D_UNK0D6C_Y__SHIFT 16 + +#define NV50_3D_VERTEX_BUFFER_FIRST 0x00000d74 + +#define NV50_3D_VERTEX_BUFFER_COUNT 0x00000d78 + +#define NV50_3D_UNK0D7C 0x00000d7c + +#define NV50_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0)) +#define NV50_3D_CLEAR_COLOR__ESIZE 0x00000004 +#define NV50_3D_CLEAR_COLOR__LEN 0x00000004 + +#define NV50_3D_CLEAR_DEPTH 0x00000d90 + +#define NV50_3D_STACK_ADDRESS_HIGH 0x00000d94 + +#define NV50_3D_STACK_ADDRESS_LOW 0x00000d98 + +#define NV50_3D_STACK_SIZE_LOG 0x00000d9c + +#define NV50_3D_CLEAR_STENCIL 0x00000da0 + +#define NV50_3D_STRMOUT_PARAMS_LATCH 0x00000da4 + +#define NV50_3D_STRMOUT_PRIMITIVE_LIMIT 0x00000da8 + +#define NV50_3D_POLYGON_MODE_FRONT 0x00000dac +#define NV50_3D_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV50_3D_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV50_3D_POLYGON_MODE_FRONT_FILL 0x00001b02 + +#define NV50_3D_POLYGON_MODE_BACK 0x00000db0 +#define NV50_3D_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV50_3D_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV50_3D_POLYGON_MODE_BACK_FILL 0x00001b02 + +#define NV50_3D_POLYGON_SMOOTH_ENABLE 0x00000db4 + +#define NV50_3D_UNK0DB8 0x00000db8 + +#define NV50_3D_ZCULL_UNK0DBC 0x00000dbc +#define NV50_3D_ZCULL_UNK0DBC_UNK0 0x00000001 +#define NV50_3D_ZCULL_UNK0DBC_UNK16__MASK 0x00030000 +#define NV50_3D_ZCULL_UNK0DBC_UNK16__SHIFT 16 + +#define NV50_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0 + +#define NV50_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4 + +#define NV50_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8 + +#define NV50_3D_UNK0DCC 0x00000dcc + +#define NV50_3D_VTX_ATTR_MASK_UNK0DD0(i0) (0x00000dd0 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_MASK_UNK0DD0__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_MASK_UNK0DD0__LEN 0x00000002 + +#define NV50_3D_ZCULL_UNK0DD8 0x00000dd8 +#define NV50_3D_ZCULL_UNK0DD8_UNK0__MASK 0x00000007 +#define NV50_3D_ZCULL_UNK0DD8_UNK0__SHIFT 0 +#define NVA3_3D_ZCULL_UNK0DD8_UNK9 0x00000200 +#define NV50_3D_ZCULL_UNK0DD8_UNK16__MASK 0xffff0000 +#define NV50_3D_ZCULL_UNK0DD8_UNK16__SHIFT 16 + +#define NV50_3D_UNK0DDC 0x00000ddc + +#define NV50_3D_UNK0DE0 0x00000de0 + +#define NV50_3D_WATCHDOG_TIMER 0x00000de4 + +#define NV50_3D_UNK0DE8 0x00000de8 + +#define NV50_3D_UNK0DEC 0x00000dec + +#define NV50_3D_UNK0DF0 0x00000df0 +#define NV50_3D_UNK0DF0_UNK0 0x00000001 +#define NV50_3D_UNK0DF0_UNK1__MASK 0x00000ff0 +#define NV50_3D_UNK0DF0_UNK1__SHIFT 4 + +#define NV50_3D_UNK0DF4 0x00000df4 + +#define NV50_3D_WINDOW_OFFSET_X 0x00000df8 + +#define NV50_3D_WINDOW_OFFSET_Y 0x00000dfc + +#define NV50_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0)) +#define NV50_3D_SCISSOR_ENABLE__ESIZE 0x00000010 +#define NV50_3D_SCISSOR_ENABLE__LEN 0x00000010 + +#define NV50_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0)) +#define NV50_3D_SCISSOR_HORIZ__ESIZE 0x00000010 +#define NV50_3D_SCISSOR_HORIZ__LEN 0x00000010 +#define NV50_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff +#define NV50_3D_SCISSOR_HORIZ_MIN__SHIFT 0 +#define NV50_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000 +#define NV50_3D_SCISSOR_HORIZ_MAX__SHIFT 16 + +#define NV50_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0)) +#define NV50_3D_SCISSOR_VERT__ESIZE 0x00000010 +#define NV50_3D_SCISSOR_VERT__LEN 0x00000010 +#define NV50_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff +#define NV50_3D_SCISSOR_VERT_MIN__SHIFT 0 +#define NV50_3D_SCISSOR_VERT_MAX__MASK 0xffff0000 +#define NV50_3D_SCISSOR_VERT_MAX__SHIFT 16 + +#define NV50_3D_CB_ADDR 0x00000f00 +#define NV50_3D_CB_ADDR_ID__MASK 0x003fff00 +#define NV50_3D_CB_ADDR_ID__SHIFT 8 +#define NV50_3D_CB_ADDR_BUFFER__MASK 0x0000007f +#define NV50_3D_CB_ADDR_BUFFER__SHIFT 0 + +#define NV50_3D_CB_DATA(i0) (0x00000f04 + 0x4*(i0)) +#define NV50_3D_CB_DATA__ESIZE 0x00000004 +#define NV50_3D_CB_DATA__LEN 0x00000010 + +#define NV50_3D_LOCAL_WARPS_LOG_ALLOC 0x00000f44 + +#define NV50_3D_LOCAL_WARPS_NO_CLAMP 0x00000f48 + +#define NV50_3D_STACK_WARPS_LOG_ALLOC 0x00000f4c + +#define NV50_3D_STACK_WARPS_NO_CLAMP 0x00000f50 + +#define NV50_3D_STENCIL_BACK_FUNC_REF 0x00000f54 + +#define NV50_3D_STENCIL_BACK_MASK 0x00000f58 + +#define NV50_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c + +#define NV50_3D_UNK0F60(i0) (0x00000f60 + 0x4*(i0)) +#define NV50_3D_UNK0F60__ESIZE 0x00000004 +#define NV50_3D_UNK0F60__LEN 0x00000004 + +#define NV50_3D_GP_ADDRESS_HIGH 0x00000f70 + +#define NV50_3D_GP_ADDRESS_LOW 0x00000f74 + +#define NV50_3D_UNK0F78 0x00000f78 + +#define NV50_3D_VP_ADDRESS_HIGH 0x00000f7c + +#define NV50_3D_VP_ADDRESS_LOW 0x00000f80 + +#define NV50_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84 + +#define NV50_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88 + +#define NV50_3D_UNK0F8C 0x00000f8c + +#define NV50_3D_COLOR_MASK_COMMON 0x00000f90 + +#define NV50_3D_UNK0F94 0x00000f94 + +#define NV50_3D_UNK0F98 0x00000f98 + +#define NV50_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0)) +#define NV50_3D_DEPTH_BOUNDS__ESIZE 0x00000004 +#define NV50_3D_DEPTH_BOUNDS__LEN 0x00000002 + +#define NV50_3D_FP_ADDRESS_HIGH 0x00000fa4 + +#define NV50_3D_FP_ADDRESS_LOW 0x00000fa8 + +#define NV50_3D_UNK0FAC 0x00000fac +#define NV50_3D_UNK0FAC_UNK0 0x00000001 +#define NVA0_3D_UNK0FAC_UNK2 0x00000002 +#define NV50_3D_UNK0FAC_UNK1__MASK 0x000ffff0 +#define NV50_3D_UNK0FAC_UNK1__SHIFT 4 + +#define NV50_3D_UNK0FB0 0x00000fb0 + +#define NV50_3D_UNK0FB4 0x00000fb4 + +#define NV50_3D_UNK0FB8 0x00000fb8 + +#define NV50_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0)) +#define NV50_3D_MSAA_MASK__ESIZE 0x00000004 +#define NV50_3D_MSAA_MASK__LEN 0x00000004 + +#define NV50_3D_CLIPID_ADDRESS_HIGH 0x00000fcc + +#define NV50_3D_CLIPID_ADDRESS_LOW 0x00000fd0 + +#define NV50_3D_SEMANTIC_VIEWPORT 0x00000fd4 +#define NV50_3D_SEMANTIC_VIEWPORT_VIEWPORT_ID__MASK 0x000000ff +#define NV50_3D_SEMANTIC_VIEWPORT_VIEWPORT_ID__SHIFT 0 + +#define NV50_3D_UNK0FD8 0x00000fd8 +#define NV50_3D_UNK0FD8_UNK0 0x00000001 +#define NV50_3D_UNK0FD8_UNK1 0x00000010 + +#define NV50_3D_UNK0FDC 0x00000fdc + +#define NV50_3D_ZETA_ADDRESS_HIGH 0x00000fe0 + +#define NV50_3D_ZETA_ADDRESS_LOW 0x00000fe4 + +#define NV50_3D_ZETA_FORMAT 0x00000fe8 + +#define NV50_3D_ZETA_TILE_MODE 0x00000fec + +#define NV50_3D_ZETA_LAYER_STRIDE 0x00000ff0 +#define NV50_3D_ZETA_LAYER_STRIDE__SHR 2 + +#define NV50_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4 +#define NV50_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000 +#define NV50_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16 +#define NV50_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff +#define NV50_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0 + +#define NV50_3D_SCREEN_SCISSOR_VERT 0x00000ff8 +#define NV50_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000 +#define NV50_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16 +#define NV50_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff +#define NV50_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0 + +#define NV50_3D_UNK0FFC 0x00000ffc + +#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001000 + 0x4*(i0)) +#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004 +#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000010 + +#define NV50_3D_UNK1040(i0) (0x00001040 + 0x4*(i0)) +#define NV50_3D_UNK1040__ESIZE 0x00000004 +#define NV50_3D_UNK1040__LEN 0x00000010 + +#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001080 + 0x8*(i0)) +#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008 +#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000010 + +#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001084 + 0x8*(i0)) +#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008 +#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000010 + +#define NV50_3D_UNK1100 0x00001100 + +#define NV84_3D_UNK1104 0x00001104 +#define NV84_3D_UNK1104_0__MASK 0x0000ffff +#define NV84_3D_UNK1104_0__SHIFT 0 +#define NV84_3D_UNK1104_0__MAX 0x00002000 +#define NV84_3D_UNK1104_0__ALIGN 0x00000040 +#define NV84_3D_UNK1104_1__MASK 0xffff0000 +#define NV84_3D_UNK1104_1__SHIFT 16 +#define NV84_3D_UNK1104_1__MAX 0x00002000 +#define NV84_3D_UNK1104_1__ALIGN 0x00000040 + +#define NV84_3D_UNK1108 0x00001108 +#define NV84_3D_UNK1108_0 0x00000001 +#define NV84_3D_UNK1108_1 0x00000010 + +#define NV84_3D_UNK110C 0x0000110c + +#define NV84_3D_UNK1110 0x00001110 + +#define NV84_3D_WRCACHE_FLUSH 0x00001114 + +#define NV84_3D_VERTEX_ID_BASE 0x00001118 + +#define NV84_3D_PRIMITIVE_ID 0x0000111c + +#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT(i0) (0x00001120 + 0x4*(i0)) +#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT__ESIZE 0x00000004 +#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT__LEN 0x00000004 + +#define NVA3_3D_VP_ATTR_EN_ALT(i0) (0x00001130 + 0x4*(i0)) +#define NVA3_3D_VP_ATTR_EN_ALT__ESIZE 0x00000004 +#define NVA3_3D_VP_ATTR_EN_ALT__LEN 0x00000004 +#define NVA3_3D_VP_ATTR_EN_ALT_7__MASK 0xf0000000 +#define NVA3_3D_VP_ATTR_EN_ALT_7__SHIFT 28 +#define NVA3_3D_VP_ATTR_EN_ALT_7_X 0x10000000 +#define NVA3_3D_VP_ATTR_EN_ALT_7_Y 0x20000000 +#define NVA3_3D_VP_ATTR_EN_ALT_7_Z 0x40000000 +#define NVA3_3D_VP_ATTR_EN_ALT_7_W 0x80000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6__MASK 0x0f000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6__SHIFT 24 +#define NVA3_3D_VP_ATTR_EN_ALT_6_X 0x01000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6_Y 0x02000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6_Z 0x04000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6_W 0x08000000 +#define NVA3_3D_VP_ATTR_EN_ALT_5__MASK 0x00f00000 +#define NVA3_3D_VP_ATTR_EN_ALT_5__SHIFT 20 +#define NVA3_3D_VP_ATTR_EN_ALT_5_X 0x00100000 +#define NVA3_3D_VP_ATTR_EN_ALT_5_Y 0x00200000 +#define NVA3_3D_VP_ATTR_EN_ALT_5_Z 0x00400000 +#define NVA3_3D_VP_ATTR_EN_ALT_5_W 0x00800000 +#define NVA3_3D_VP_ATTR_EN_ALT_4__MASK 0x000f0000 +#define NVA3_3D_VP_ATTR_EN_ALT_4__SHIFT 16 +#define NVA3_3D_VP_ATTR_EN_ALT_4_X 0x00010000 +#define NVA3_3D_VP_ATTR_EN_ALT_4_Y 0x00020000 +#define NVA3_3D_VP_ATTR_EN_ALT_4_Z 0x00040000 +#define NVA3_3D_VP_ATTR_EN_ALT_4_W 0x00080000 +#define NVA3_3D_VP_ATTR_EN_ALT_3__MASK 0x0000f000 +#define NVA3_3D_VP_ATTR_EN_ALT_3__SHIFT 12 +#define NVA3_3D_VP_ATTR_EN_ALT_3_X 0x00001000 +#define NVA3_3D_VP_ATTR_EN_ALT_3_Y 0x00002000 +#define NVA3_3D_VP_ATTR_EN_ALT_3_Z 0x00004000 +#define NVA3_3D_VP_ATTR_EN_ALT_3_W 0x00008000 +#define NVA3_3D_VP_ATTR_EN_ALT_2__MASK 0x00000f00 +#define NVA3_3D_VP_ATTR_EN_ALT_2__SHIFT 8 +#define NVA3_3D_VP_ATTR_EN_ALT_2_X 0x00000100 +#define NVA3_3D_VP_ATTR_EN_ALT_2_Y 0x00000200 +#define NVA3_3D_VP_ATTR_EN_ALT_2_Z 0x00000400 +#define NVA3_3D_VP_ATTR_EN_ALT_2_W 0x00000800 +#define NVA3_3D_VP_ATTR_EN_ALT_1__MASK 0x000000f0 +#define NVA3_3D_VP_ATTR_EN_ALT_1__SHIFT 4 +#define NVA3_3D_VP_ATTR_EN_ALT_1_X 0x00000010 +#define NVA3_3D_VP_ATTR_EN_ALT_1_Y 0x00000020 +#define NVA3_3D_VP_ATTR_EN_ALT_1_Z 0x00000040 +#define NVA3_3D_VP_ATTR_EN_ALT_1_W 0x00000080 +#define NVA3_3D_VP_ATTR_EN_ALT_0__MASK 0x0000000f +#define NVA3_3D_VP_ATTR_EN_ALT_0__SHIFT 0 +#define NVA3_3D_VP_ATTR_EN_ALT_0_X 0x00000001 +#define NVA3_3D_VP_ATTR_EN_ALT_0_Y 0x00000002 +#define NVA3_3D_VP_ATTR_EN_ALT_0_Z 0x00000004 +#define NVA3_3D_VP_ATTR_EN_ALT_0_W 0x00000008 + +#define NVA3_3D_UNK1140 0x00001140 + +#define NVA0_3D_UNK1144 0x00001144 + +#define NVA0_3D_VTX_ATTR_DEFINE 0x0000114c +#define NVA0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff +#define NVA0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0 +#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700 +#define NVA0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8 +#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001 +#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000 + +#define NVA0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0)) +#define NVA0_3D_VTX_ATTR_DATA__ESIZE 0x00000004 +#define NVA0_3D_VTX_ATTR_DATA__LEN 0x00000004 + +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT(i0) (0x00001160 + 0x4*(i0)) +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT__ESIZE 0x00000004 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT__LEN 0x00000020 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BUFFER__MASK 0x0000001f +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BUFFER__SHIFT 0 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_CONST 0x00000040 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_OFFSET__MASK 0x001fff80 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_OFFSET__SHIFT 7 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT__MASK 0x07e00000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT__SHIFT 21 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32_32_32 0x00200000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32_32 0x00400000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16_16_16 0x00600000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32 0x00800000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16_16 0x00a00000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8_8_8 0x01400000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16 0x01e00000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32 0x02400000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8_8 0x02600000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8 0x03000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16 0x03600000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8 0x03a00000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_10_10_10_2 0x06000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__MASK 0x38000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__SHIFT 27 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SNORM 0x08000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_UNORM 0x10000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SINT 0x18000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_UINT 0x20000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_USCALED 0x28000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SSCALED 0x30000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_FLOAT 0x38000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BGRA 0x80000000 + +#define NV50_3D_RT_CONTROL 0x0000121c +#define NV50_3D_RT_CONTROL_COUNT__MASK 0x0000000f +#define NV50_3D_RT_CONTROL_COUNT__SHIFT 0 +#define NV50_3D_RT_CONTROL_MAP0__MASK 0x00000070 +#define NV50_3D_RT_CONTROL_MAP0__SHIFT 4 +#define NV50_3D_RT_CONTROL_MAP1__MASK 0x00000380 +#define NV50_3D_RT_CONTROL_MAP1__SHIFT 7 +#define NV50_3D_RT_CONTROL_MAP2__MASK 0x00001c00 +#define NV50_3D_RT_CONTROL_MAP2__SHIFT 10 +#define NV50_3D_RT_CONTROL_MAP3__MASK 0x0000e000 +#define NV50_3D_RT_CONTROL_MAP3__SHIFT 13 +#define NV50_3D_RT_CONTROL_MAP4__MASK 0x00070000 +#define NV50_3D_RT_CONTROL_MAP4__SHIFT 16 +#define NV50_3D_RT_CONTROL_MAP5__MASK 0x00380000 +#define NV50_3D_RT_CONTROL_MAP5__SHIFT 19 +#define NV50_3D_RT_CONTROL_MAP6__MASK 0x01c00000 +#define NV50_3D_RT_CONTROL_MAP6__SHIFT 22 +#define NV50_3D_RT_CONTROL_MAP7__MASK 0x0e000000 +#define NV50_3D_RT_CONTROL_MAP7__SHIFT 25 + +#define NV50_3D_UNK1220 0x00001220 + +#define NV50_3D_RT_ARRAY_MODE 0x00001224 +#define NV50_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NV50_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0 +#define NV50_3D_RT_ARRAY_MODE_MODE__MASK 0x00010000 +#define NV50_3D_RT_ARRAY_MODE_MODE__SHIFT 16 +#define NV50_3D_RT_ARRAY_MODE_MODE_2D_ARRAY 0x00000000 +#define NV50_3D_RT_ARRAY_MODE_MODE_3D 0x00010000 + +#define NV50_3D_ZETA_HORIZ 0x00001228 + +#define NV50_3D_ZETA_VERT 0x0000122c + +#define NV50_3D_ZETA_ARRAY_MODE 0x00001230 +#define NV50_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NV50_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0 +#define NV50_3D_ZETA_ARRAY_MODE_UNK 0x00010000 + +#define NV50_3D_LINKED_TSC 0x00001234 + +#define NV50_3D_UNK1238 0x00001238 + +#define NVA0_3D_DRAW_TFB_BYTES 0x0000123c + +#define NV50_3D_RT_HORIZ(i0) (0x00001240 + 0x8*(i0)) +#define NV50_3D_RT_HORIZ__ESIZE 0x00000008 +#define NV50_3D_RT_HORIZ__LEN 0x00000008 +#define NV50_3D_RT_HORIZ_WIDTH__MASK 0x0fffffff +#define NV50_3D_RT_HORIZ_WIDTH__SHIFT 0 +#define NV50_3D_RT_HORIZ_LINEAR 0x80000000 + +#define NV50_3D_RT_VERT(i0) (0x00001244 + 0x8*(i0)) +#define NV50_3D_RT_VERT__ESIZE 0x00000008 +#define NV50_3D_RT_VERT__LEN 0x00000008 + +#define NV50_3D_CB_DEF_ADDRESS_HIGH 0x00001280 + +#define NV50_3D_CB_DEF_ADDRESS_LOW 0x00001284 + +#define NV50_3D_CB_DEF_SET 0x00001288 +#define NV50_3D_CB_DEF_SET_SIZE__MASK 0x0000ffff +#define NV50_3D_CB_DEF_SET_SIZE__SHIFT 0 +#define NV50_3D_CB_DEF_SET_BUFFER__MASK 0x007f0000 +#define NV50_3D_CB_DEF_SET_BUFFER__SHIFT 16 + +#define NV50_3D_UNK128C 0x0000128c +#define NV50_3D_UNK128C_0__MASK 0x00000003 +#define NV50_3D_UNK128C_0__SHIFT 0 +#define NV50_3D_UNK128C_1__MASK 0x00000030 +#define NV50_3D_UNK128C_1__SHIFT 4 +#define NV50_3D_UNK128C_2__MASK 0x00000300 +#define NV50_3D_UNK128C_2__SHIFT 8 +#define NV50_3D_UNK128C_3__MASK 0x00003000 +#define NV50_3D_UNK128C_3__SHIFT 12 + +#define NV50_3D_CALL_LIMIT_LOG 0x00001290 +#define NV50_3D_CALL_LIMIT_LOG_VP__MASK 0x0000000f +#define NV50_3D_CALL_LIMIT_LOG_VP__SHIFT 0 +#define NV50_3D_CALL_LIMIT_LOG_GP__MASK 0x000000f0 +#define NV50_3D_CALL_LIMIT_LOG_GP__SHIFT 4 +#define NV50_3D_CALL_LIMIT_LOG_FP__MASK 0x00000f00 +#define NV50_3D_CALL_LIMIT_LOG_FP__SHIFT 8 + +#define NV50_3D_STRMOUT_BUFFERS_CTRL 0x00001294 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED 0x00000001 +#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE__MASK 0x00000002 +#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE__SHIFT 1 +#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_PRIMITIVES 0x00000000 +#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET 0x00000002 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__MASK 0x000000f0 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT 4 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MASK 0x000fff00 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT 8 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX 0x00000800 + +#define NV50_3D_FP_RESULT_COUNT 0x00001298 + +#define NV50_3D_VTX_UNK129C 0x0000129c + +#define NV50_3D_UNK12A0 0x000012a0 + +#define NV50_3D_UNK12A8 0x000012a8 +#define NV50_3D_UNK12A8_UNK1 0x00000001 +#define NV50_3D_UNK12A8_UNK2__MASK 0x000ffff0 +#define NV50_3D_UNK12A8_UNK2__SHIFT 4 + +#define NV50_3D_UNK12AC 0x000012ac + +#define NV50_3D_UNK12B0 0x000012b0 +#define NV50_3D_UNK12B0_UNK0__MASK 0x000000ff +#define NV50_3D_UNK12B0_UNK0__SHIFT 0 +#define NV50_3D_UNK12B0_UNK1__MASK 0x0000ff00 +#define NV50_3D_UNK12B0_UNK1__SHIFT 8 +#define NV50_3D_UNK12B0_UNK2__MASK 0x00ff0000 +#define NV50_3D_UNK12B0_UNK2__SHIFT 16 +#define NV50_3D_UNK12B0_UNK3__MASK 0xff000000 +#define NV50_3D_UNK12B0_UNK3__SHIFT 24 +#define NV50_3D_UNK12B0_UNK3__MAX 0x00000080 + +#define NV50_3D_UNK12B4 0x000012b4 + +#define NV50_3D_UNK12B8 0x000012b8 + +#define NV50_3D_DEPTH_TEST_ENABLE 0x000012cc + +#define NV50_3D_D3D_FILL_MODE 0x000012d0 +#define NV50_3D_D3D_FILL_MODE_POINT 0x00000001 +#define NV50_3D_D3D_FILL_MODE_WIREFRAME 0x00000002 +#define NV50_3D_D3D_FILL_MODE_SOLID 0x00000003 + +#define NV50_3D_SHADE_MODEL 0x000012d4 +#define NV50_3D_SHADE_MODEL_FLAT 0x00001d00 +#define NV50_3D_SHADE_MODEL_SMOOTH 0x00001d01 + +#define NV50_3D_LOCAL_ADDRESS_HIGH 0x000012d8 + +#define NV50_3D_LOCAL_ADDRESS_LOW 0x000012dc + +#define NV50_3D_LOCAL_SIZE_LOG 0x000012e0 + +#define NV50_3D_BLEND_INDEPENDENT 0x000012e4 + +#define NV50_3D_DEPTH_WRITE_ENABLE 0x000012e8 + +#define NV50_3D_ALPHA_TEST_ENABLE 0x000012ec + +#define NV50_3D_PM_SET(i0) (0x000012f0 + 0x4*(i0)) +#define NV50_3D_PM_SET__ESIZE 0x00000004 +#define NV50_3D_PM_SET__LEN 0x00000004 + +#define NV50_3D_VB_ELEMENT_U8_SETUP 0x00001300 +#define NV50_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000 +#define NV50_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30 +#define NV50_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff +#define NV50_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0 + +#define NV50_3D_VB_ELEMENT_U8 0x00001304 +#define NV50_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff +#define NV50_3D_VB_ELEMENT_U8_I0__SHIFT 0 +#define NV50_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00 +#define NV50_3D_VB_ELEMENT_U8_I1__SHIFT 8 +#define NV50_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000 +#define NV50_3D_VB_ELEMENT_U8_I2__SHIFT 16 +#define NV50_3D_VB_ELEMENT_U8_I3__MASK 0xff000000 +#define NV50_3D_VB_ELEMENT_U8_I3__SHIFT 24 + +#define NV50_3D_D3D_CULL_MODE 0x00001308 +#define NV50_3D_D3D_CULL_MODE_NONE 0x00000001 +#define NV50_3D_D3D_CULL_MODE_FRONT 0x00000002 +#define NV50_3D_D3D_CULL_MODE_BACK 0x00000003 + +#define NV50_3D_DEPTH_TEST_FUNC 0x0000130c +#define NV50_3D_DEPTH_TEST_FUNC_NEVER 0x00000200 +#define NV50_3D_DEPTH_TEST_FUNC_LESS 0x00000201 +#define NV50_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202 +#define NV50_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203 +#define NV50_3D_DEPTH_TEST_FUNC_GREATER 0x00000204 +#define NV50_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205 +#define NV50_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206 +#define NV50_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207 + +#define NV50_3D_ALPHA_TEST_REF 0x00001310 + +#define NV50_3D_ALPHA_TEST_FUNC 0x00001314 +#define NV50_3D_ALPHA_TEST_FUNC_NEVER 0x00000200 +#define NV50_3D_ALPHA_TEST_FUNC_LESS 0x00000201 +#define NV50_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202 +#define NV50_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203 +#define NV50_3D_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NV50_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 +#define NV50_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206 +#define NV50_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207 + +#define NVA0_3D_DRAW_TFB_STRIDE 0x00001318 +#define NVA0_3D_DRAW_TFB_STRIDE__MIN 0x00000001 +#define NVA0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff + +#define NV50_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0)) +#define NV50_3D_BLEND_COLOR__ESIZE 0x00000004 +#define NV50_3D_BLEND_COLOR__LEN 0x00000004 + +#define NV50_3D_UNK132C 0x0000132c + +#define NV50_3D_TSC_FLUSH 0x00001330 +#define NV50_3D_TSC_FLUSH_SPECIFIC 0x00000001 +#define NV50_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_3D_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_3D_TIC_FLUSH 0x00001334 +#define NV50_3D_TIC_FLUSH_SPECIFIC 0x00000001 +#define NV50_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_3D_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_3D_TEX_CACHE_CTL 0x00001338 +#define NV50_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030 +#define NV50_3D_TEX_CACHE_CTL_UNK1__SHIFT 4 + +#define NV50_3D_BLEND_SEPARATE_ALPHA 0x0000133c + +#define NV50_3D_BLEND_EQUATION_RGB 0x00001340 +#define NV50_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NV50_3D_BLEND_EQUATION_RGB_MIN 0x00008007 +#define NV50_3D_BLEND_EQUATION_RGB_MAX 0x00008008 +#define NV50_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NV50_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NV50_3D_BLEND_FUNC_SRC_RGB 0x00001344 + +#define NV50_3D_BLEND_FUNC_DST_RGB 0x00001348 + +#define NV50_3D_BLEND_EQUATION_ALPHA 0x0000134c +#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NV50_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NV50_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NV50_3D_BLEND_FUNC_SRC_ALPHA 0x00001350 + +#define NV50_3D_UNK1354 0x00001354 + +#define NV50_3D_BLEND_FUNC_DST_ALPHA 0x00001358 + +#define NV50_3D_BLEND_ENABLE_COMMON 0x0000135c + +#define NV50_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0)) +#define NV50_3D_BLEND_ENABLE__ESIZE 0x00000004 +#define NV50_3D_BLEND_ENABLE__LEN 0x00000008 + +#define NV50_3D_STENCIL_ENABLE 0x00001380 + +#define NV50_3D_STENCIL_FRONT_OP_FAIL 0x00001384 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a +#define NV50_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 + +#define NV50_3D_STENCIL_FRONT_FUNC_REF 0x00001394 + +#define NV50_3D_STENCIL_FRONT_MASK 0x00001398 + +#define NV50_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c + +#define NV50_3D_UNK13A0 0x000013a0 + +#define NVA0_3D_DRAW_TFB_BASE 0x000013a4 + +#define NV50_3D_FRAG_COLOR_CLAMP_EN 0x000013a8 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000 + +#define NV50_3D_SCREEN_Y_CONTROL 0x000013ac +#define NV50_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001 +#define NV50_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010 + +#define NV50_3D_LINE_WIDTH 0x000013b0 + +#define NV50_3D_TEX_LIMITS(i0) (0x000013b4 + 0x4*(i0)) +#define NV50_3D_TEX_LIMITS__ESIZE 0x00000004 +#define NV50_3D_TEX_LIMITS__LEN 0x00000003 +#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f +#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0 +#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000 +#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004 +#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0 +#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4 +#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000 +#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007 + +#define NV50_3D_POINT_COORD_REPLACE_MAP(i0) (0x000013c0 + 0x4*(i0)) +#define NV50_3D_POINT_COORD_REPLACE_MAP__ESIZE 0x00000004 +#define NV50_3D_POINT_COORD_REPLACE_MAP__LEN 0x00000010 + +#define NV50_3D_UNK1400_LANES 0x00001400 + +#define NV50_3D_UNK1404 0x00001404 + +#define NV50_3D_UNK1408 0x00001408 + +#define NV50_3D_VP_START_ID 0x0000140c + +#define NV50_3D_GP_START_ID 0x00001410 + +#define NV50_3D_FP_START_ID 0x00001414 + +#define NVA3_3D_UNK1418 0x00001418 + +#define NV50_3D_UNK141C 0x0000141c + +#define NV50_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420 +#define NV50_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001 +#define NV50_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400 + +#define NV50_3D_VERTEX_ARRAY_FLUSH 0x0000142c + +#define NV50_3D_UNK1430 0x00001430 +#define NV50_3D_UNK1430_UNK0 0x00000010 +#define NV50_3D_UNK1430_UNK1 0x00000100 + +#define NV50_3D_VB_ELEMENT_BASE 0x00001434 + +#define NV50_3D_VB_INSTANCE_BASE 0x00001438 + +#define NV50_3D_CLEAR_FLAGS 0x0000143c +#define NV50_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001 +#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT__MASK 0x00000010 +#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT__SHIFT 4 +#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT_SCISSOR 0x00000000 +#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT 0x00000010 + +#define NV50_3D_CODE_CB_FLUSH 0x00001440 + +#define NV50_3D_BIND_TSC(i0) (0x00001444 + 0x8*(i0)) +#define NV50_3D_BIND_TSC__ESIZE 0x00000008 +#define NV50_3D_BIND_TSC__LEN 0x00000003 +#define NV50_3D_BIND_TSC_VALID 0x00000001 +#define NV50_3D_BIND_TSC_SAMPLER__MASK 0x000000f0 +#define NV50_3D_BIND_TSC_SAMPLER__SHIFT 4 +#define NV50_3D_BIND_TSC_TSC__MASK 0x001ff000 +#define NV50_3D_BIND_TSC_TSC__SHIFT 12 + +#define NV50_3D_BIND_TIC(i0) (0x00001448 + 0x8*(i0)) +#define NV50_3D_BIND_TIC__ESIZE 0x00000008 +#define NV50_3D_BIND_TIC__LEN 0x00000003 +#define NV50_3D_BIND_TIC_VALID 0x00000001 +#define NV50_3D_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NV50_3D_BIND_TIC_TEXTURE__SHIFT 1 +#define NV50_3D_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NV50_3D_BIND_TIC_TIC__SHIFT 9 + +#define NV50_3D_BIND_TSC2(i0) (0x00001468 + 0x8*(i0)) +#define NV50_3D_BIND_TSC2__ESIZE 0x00000008 +#define NV50_3D_BIND_TSC2__LEN 0x00000003 +#define NV50_3D_BIND_TSC2_VALID 0x00000001 +#define NV50_3D_BIND_TSC2_SAMPLER__MASK 0x00000010 +#define NV50_3D_BIND_TSC2_SAMPLER__SHIFT 4 +#define NV50_3D_BIND_TSC2_TSC__MASK 0x001ff000 +#define NV50_3D_BIND_TSC2_TSC__SHIFT 12 + +#define NV50_3D_BIND_TIC2(i0) (0x0000146c + 0x8*(i0)) +#define NV50_3D_BIND_TIC2__ESIZE 0x00000008 +#define NV50_3D_BIND_TIC2__LEN 0x00000003 +#define NV50_3D_BIND_TIC2_VALID 0x00000001 +#define NV50_3D_BIND_TIC2_TEXTURE__MASK 0x00000002 +#define NV50_3D_BIND_TIC2_TEXTURE__SHIFT 1 +#define NV50_3D_BIND_TIC2_TIC__MASK 0x7ffffe00 +#define NV50_3D_BIND_TIC2_TIC__SHIFT 9 + +#define NV50_3D_STRMOUT_MAP(i0) (0x00001480 + 0x4*(i0)) +#define NV50_3D_STRMOUT_MAP__ESIZE 0x00000004 +#define NV50_3D_STRMOUT_MAP__LEN 0x00000020 + +#define NV50_3D_CLIPID_HEIGHT 0x00001504 +#define NV50_3D_CLIPID_HEIGHT__MAX 0x00002000 + +#define NV50_3D_CLIPID_FILL_RECT_HORIZ 0x00001508 +#define NV50_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff +#define NV50_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0 +#define NV50_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000 +#define NV50_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16 + +#define NV50_3D_CLIPID_FILL_RECT_VERT 0x0000150c +#define NV50_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff +#define NV50_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0 +#define NV50_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000 +#define NV50_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16 + +#define NV50_3D_CLIP_DISTANCE_ENABLE 0x00001510 +#define NV50_3D_CLIP_DISTANCE_ENABLE_0 0x00000001 +#define NV50_3D_CLIP_DISTANCE_ENABLE_1 0x00000002 +#define NV50_3D_CLIP_DISTANCE_ENABLE_2 0x00000004 +#define NV50_3D_CLIP_DISTANCE_ENABLE_3 0x00000008 +#define NV50_3D_CLIP_DISTANCE_ENABLE_4 0x00000010 +#define NV50_3D_CLIP_DISTANCE_ENABLE_5 0x00000020 +#define NV50_3D_CLIP_DISTANCE_ENABLE_6 0x00000040 +#define NV50_3D_CLIP_DISTANCE_ENABLE_7 0x00000080 + +#define NV50_3D_SAMPLECNT_ENABLE 0x00001514 + +#define NV50_3D_POINT_SIZE 0x00001518 + +#define NV50_3D_ZCULL_STATCTRS_ENABLE 0x0000151c + +#define NV50_3D_POINT_SPRITE_ENABLE 0x00001520 + +#define NVA0_3D_UNK152C 0x0000152c +#define NVA0_3D_UNK152C_UNK0 0x00000001 +#define NVA0_3D_UNK152C_UNK1 0x00000010 +#define NVA0_3D_UNK152C_UNK2 0x00000100 +#define NVA0_3D_UNK152C_UNK3__MASK 0x000ff000 +#define NVA0_3D_UNK152C_UNK3__SHIFT 12 +#define NVA0_3D_UNK152C_UNK3__MAX 0x00000028 + +#define NV50_3D_COUNTER_RESET 0x00001530 +#define NV50_3D_COUNTER_RESET_SAMPLECNT 0x00000001 +#define NV50_3D_COUNTER_RESET_ZCULL_STATS 0x00000002 +#define NVA0_3D_COUNTER_RESET_STRMOUT_VERTICES 0x00000008 +#define NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK 0x00000010 +#define NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x00000011 +#define NV50_3D_COUNTER_RESET_VFETCH_VERTICES 0x00000012 +#define NV50_3D_COUNTER_RESET_VFETCH_PRIMITIVES 0x00000013 +#define NV50_3D_COUNTER_RESET_VP_LAUNCHES 0x00000015 +#define NV50_3D_COUNTER_RESET_GP_LAUNCHES 0x0000001a +#define NV50_3D_COUNTER_RESET_GP_PRIMITIVES_OUT 0x0000001b +#define NV50_3D_COUNTER_RESET_RAST_PRIMITIVES_PRECLIP 0x0000001c +#define NV50_3D_COUNTER_RESET_RAST_PRIMITIVES_POSTCLIP 0x0000001d +#define NV50_3D_COUNTER_RESET_FP_PIXELS 0x0000001e + +#define NV50_3D_MULTISAMPLE_ENABLE 0x00001534 + +#define NV50_3D_ZETA_ENABLE 0x00001538 + +#define NV50_3D_MULTISAMPLE_CTRL 0x0000153c +#define NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001 +#define NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010 + +#define NV50_3D_NOPERSPECTIVE_BITMAP(i0) (0x00001540 + 0x4*(i0)) +#define NV50_3D_NOPERSPECTIVE_BITMAP__ESIZE 0x00000004 +#define NV50_3D_NOPERSPECTIVE_BITMAP__LEN 0x00000004 + +#define NV50_3D_COND_ADDRESS_HIGH 0x00001550 + +#define NV50_3D_COND_ADDRESS_LOW 0x00001554 + +#define NV50_3D_COND_MODE 0x00001558 +#define NV50_3D_COND_MODE_NEVER 0x00000000 +#define NV50_3D_COND_MODE_ALWAYS 0x00000001 +#define NV50_3D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NV50_3D_COND_MODE_EQUAL 0x00000003 +#define NV50_3D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NV50_3D_TSC_ADDRESS_HIGH 0x0000155c + +#define NV50_3D_TSC_ADDRESS_LOW 0x00001560 +#define NV50_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020 + +#define NV50_3D_TSC_LIMIT 0x00001564 +#define NV50_3D_TSC_LIMIT__MAX 0x00001fff + +#define NV50_3D_UNK1568 0x00001568 + +#define NV50_3D_POLYGON_OFFSET_FACTOR 0x0000156c + +#define NV50_3D_LINE_SMOOTH_ENABLE 0x00001570 + +#define NV50_3D_TIC_ADDRESS_HIGH 0x00001574 + +#define NV50_3D_TIC_ADDRESS_LOW 0x00001578 + +#define NV50_3D_TIC_LIMIT 0x0000157c + +#define NV50_3D_PM_CONTROL(i0) (0x00001580 + 0x4*(i0)) +#define NV50_3D_PM_CONTROL__ESIZE 0x00000004 +#define NV50_3D_PM_CONTROL__LEN 0x00000004 +#define NV50_3D_PM_CONTROL_UNK0 0x00000001 +#define NV50_3D_PM_CONTROL_UNK1__MASK 0x00000070 +#define NV50_3D_PM_CONTROL_UNK1__SHIFT 4 +#define NV50_3D_PM_CONTROL_UNK2__MASK 0x00ffff00 +#define NV50_3D_PM_CONTROL_UNK2__SHIFT 8 +#define NV50_3D_PM_CONTROL_UNK3__MASK 0xff000000 +#define NV50_3D_PM_CONTROL_UNK3__SHIFT 24 + +#define NV50_3D_ZCULL_REGION 0x00001590 + +#define NV50_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594 + +#define NV50_3D_STENCIL_BACK_OP_FAIL 0x00001598 +#define NV50_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 +#define NV50_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a +#define NV50_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 +#define NV50_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 +#define NV50_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 +#define NV50_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_BACK_OP_ZPASS 0x000015a0 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a +#define NV50_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 + +#define NV50_3D_UNK15A8 0x000015a8 +#define NV50_3D_UNK15A8_UNK1__MASK 0x00000007 +#define NV50_3D_UNK15A8_UNK1__SHIFT 0 +#define NV50_3D_UNK15A8_UNK2__MASK 0x00000070 +#define NV50_3D_UNK15A8_UNK2__SHIFT 4 + +#define NV50_3D_UNK15AC 0x000015ac + +#define NV50_3D_UNK15B0 0x000015b0 +#define NV50_3D_UNK15B0_0 0x00000001 +#define NV50_3D_UNK15B0_1 0x00000010 +#define NV50_3D_UNK15B0_2 0x00000100 + +#define NV50_3D_CSAA_ENABLE 0x000015b4 + +#define NV50_3D_FRAMEBUFFER_SRGB 0x000015b8 + +#define NV50_3D_POLYGON_OFFSET_UNITS 0x000015bc + +#define NVA3_3D_UNK15C4 0x000015c4 + +#define NVA3_3D_UNK15C8 0x000015c8 + +#define NV50_3D_LAYER 0x000015cc +#define NV50_3D_LAYER_IDX__MASK 0x0000ffff +#define NV50_3D_LAYER_IDX__SHIFT 0 +#define NV50_3D_LAYER_USE_GP 0x00010000 + +#define NV50_3D_MULTISAMPLE_MODE 0x000015d0 +#define NV50_3D_MULTISAMPLE_MODE_MS1 0x00000000 +#define NV50_3D_MULTISAMPLE_MODE_MS2 0x00000001 +#define NV50_3D_MULTISAMPLE_MODE_MS4 0x00000002 +#define NV50_3D_MULTISAMPLE_MODE_MS8 0x00000003 +#define NV50_3D_MULTISAMPLE_MODE_MS8_ALT 0x00000004 +#define NV50_3D_MULTISAMPLE_MODE_MS2_ALT 0x00000005 +#define NV50_3D_MULTISAMPLE_MODE_UNK6 0x00000006 +#define NV50_3D_MULTISAMPLE_MODE_MS4_CS4 0x00000008 +#define NV50_3D_MULTISAMPLE_MODE_MS4_CS12 0x00000009 +#define NV50_3D_MULTISAMPLE_MODE_MS8_CS8 0x0000000a +#define NV50_3D_MULTISAMPLE_MODE_MS8_CS24 0x0000000b + +#define NV50_3D_VERTEX_BEGIN_D3D 0x000015d4 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NV50_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000 +#define NV84_3D_VERTEX_BEGIN_D3D_PRIMITIVE_ID_CONT 0x20000000 +#define NVA0_3D_VERTEX_BEGIN_D3D_INSTANCE_CONT 0x40000000 + +#define NV50_3D_VERTEX_END_D3D 0x000015d8 +#define NV50_3D_VERTEX_END_D3D_UNK0 0x00000001 +#define NVA0_3D_VERTEX_END_D3D_UNK1 0x00000002 + +#define NV50_3D_VERTEX_BEGIN_GL 0x000015dc +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x10000000 +#define NV84_3D_VERTEX_BEGIN_GL_PRIMITIVE_ID_CONT 0x20000000 +#define NVA0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x40000000 + +#define NV50_3D_VERTEX_END_GL 0x000015e0 +#define NV50_3D_VERTEX_END_GL_UNK0 0x00000001 +#define NVA0_3D_VERTEX_END_GL_UNK1 0x00000002 + +#define NV50_3D_EDGEFLAG 0x000015e4 + +#define NV50_3D_VB_ELEMENT_U32 0x000015e8 + +#define NV50_3D_VB_ELEMENT_U16_SETUP 0x000015ec +#define NV50_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000 +#define NV50_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30 +#define NV50_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff +#define NV50_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0 + +#define NV50_3D_VB_ELEMENT_U16 0x000015f0 +#define NV50_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff +#define NV50_3D_VB_ELEMENT_U16_I0__SHIFT 0 +#define NV50_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000 +#define NV50_3D_VB_ELEMENT_U16_I1__SHIFT 16 + +#define NV50_3D_VERTEX_BASE_HIGH 0x000015f4 + +#define NV50_3D_VERTEX_BASE_LOW 0x000015f8 + +#define NV50_3D_VERTEX_DATA 0x00001640 + +#define NV50_3D_PRIM_RESTART_ENABLE 0x00001644 + +#define NV50_3D_PRIM_RESTART_INDEX 0x00001648 + +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001 +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010 +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100 +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000 + +#define NV50_3D_VP_ATTR_EN(i0) (0x00001650 + 0x4*(i0)) +#define NV50_3D_VP_ATTR_EN__ESIZE 0x00000004 +#define NV50_3D_VP_ATTR_EN__LEN 0x00000002 +#define NV50_3D_VP_ATTR_EN_7__MASK 0xf0000000 +#define NV50_3D_VP_ATTR_EN_7__SHIFT 28 +#define NV50_3D_VP_ATTR_EN_7_X 0x10000000 +#define NV50_3D_VP_ATTR_EN_7_Y 0x20000000 +#define NV50_3D_VP_ATTR_EN_7_Z 0x40000000 +#define NV50_3D_VP_ATTR_EN_7_W 0x80000000 +#define NV50_3D_VP_ATTR_EN_6__MASK 0x0f000000 +#define NV50_3D_VP_ATTR_EN_6__SHIFT 24 +#define NV50_3D_VP_ATTR_EN_6_X 0x01000000 +#define NV50_3D_VP_ATTR_EN_6_Y 0x02000000 +#define NV50_3D_VP_ATTR_EN_6_Z 0x04000000 +#define NV50_3D_VP_ATTR_EN_6_W 0x08000000 +#define NV50_3D_VP_ATTR_EN_5__MASK 0x00f00000 +#define NV50_3D_VP_ATTR_EN_5__SHIFT 20 +#define NV50_3D_VP_ATTR_EN_5_X 0x00100000 +#define NV50_3D_VP_ATTR_EN_5_Y 0x00200000 +#define NV50_3D_VP_ATTR_EN_5_Z 0x00400000 +#define NV50_3D_VP_ATTR_EN_5_W 0x00800000 +#define NV50_3D_VP_ATTR_EN_4__MASK 0x000f0000 +#define NV50_3D_VP_ATTR_EN_4__SHIFT 16 +#define NV50_3D_VP_ATTR_EN_4_X 0x00010000 +#define NV50_3D_VP_ATTR_EN_4_Y 0x00020000 +#define NV50_3D_VP_ATTR_EN_4_Z 0x00040000 +#define NV50_3D_VP_ATTR_EN_4_W 0x00080000 +#define NV50_3D_VP_ATTR_EN_3__MASK 0x0000f000 +#define NV50_3D_VP_ATTR_EN_3__SHIFT 12 +#define NV50_3D_VP_ATTR_EN_3_X 0x00001000 +#define NV50_3D_VP_ATTR_EN_3_Y 0x00002000 +#define NV50_3D_VP_ATTR_EN_3_Z 0x00004000 +#define NV50_3D_VP_ATTR_EN_3_W 0x00008000 +#define NV50_3D_VP_ATTR_EN_2__MASK 0x00000f00 +#define NV50_3D_VP_ATTR_EN_2__SHIFT 8 +#define NV50_3D_VP_ATTR_EN_2_X 0x00000100 +#define NV50_3D_VP_ATTR_EN_2_Y 0x00000200 +#define NV50_3D_VP_ATTR_EN_2_Z 0x00000400 +#define NV50_3D_VP_ATTR_EN_2_W 0x00000800 +#define NV50_3D_VP_ATTR_EN_1__MASK 0x000000f0 +#define NV50_3D_VP_ATTR_EN_1__SHIFT 4 +#define NV50_3D_VP_ATTR_EN_1_X 0x00000010 +#define NV50_3D_VP_ATTR_EN_1_Y 0x00000020 +#define NV50_3D_VP_ATTR_EN_1_Z 0x00000040 +#define NV50_3D_VP_ATTR_EN_1_W 0x00000080 +#define NV50_3D_VP_ATTR_EN_0__MASK 0x0000000f +#define NV50_3D_VP_ATTR_EN_0__SHIFT 0 +#define NV50_3D_VP_ATTR_EN_0_X 0x00000001 +#define NV50_3D_VP_ATTR_EN_0_Y 0x00000002 +#define NV50_3D_VP_ATTR_EN_0_Z 0x00000004 +#define NV50_3D_VP_ATTR_EN_0_W 0x00000008 + +#define NV50_3D_POINT_SMOOTH_ENABLE 0x00001658 + +#define NV50_3D_POINT_RASTER_RULES 0x0000165c +#define NV50_3D_POINT_RASTER_RULES_OGL 0x00000000 +#define NV50_3D_POINT_RASTER_RULES_D3D 0x00000001 + +#define NV50_3D_POINT_SPRITE_CTRL 0x00001660 +#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN__MASK 0x00000010 +#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN__SHIFT 4 +#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN_LOWER_LEFT 0x00000000 +#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN_UPPER_LEFT 0x00000010 + +#define NVA0_3D_TEX_MISC 0x00001664 +#define NVA0_3D_TEX_MISC_UNK1 0x00000002 +#define NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004 + +#define NV50_3D_LINE_SMOOTH_BLUR 0x00001668 +#define NV50_3D_LINE_SMOOTH_BLUR_LOW 0x00000000 +#define NV50_3D_LINE_SMOOTH_BLUR_MEDIUM 0x00000001 +#define NV50_3D_LINE_SMOOTH_BLUR_HIGH 0x00000002 + +#define NV50_3D_LINE_STIPPLE_ENABLE 0x0000166c + +#define NV50_3D_COVERAGE_LUT(i0) (0x00001670 + 0x4*(i0)) +#define NV50_3D_COVERAGE_LUT__ESIZE 0x00000004 +#define NV50_3D_COVERAGE_LUT__LEN 0x00000004 +#define NV50_3D_COVERAGE_LUT_0__MASK 0x000000ff +#define NV50_3D_COVERAGE_LUT_0__SHIFT 0 +#define NV50_3D_COVERAGE_LUT_1__MASK 0x0000ff00 +#define NV50_3D_COVERAGE_LUT_1__SHIFT 8 +#define NV50_3D_COVERAGE_LUT_2__MASK 0x00ff0000 +#define NV50_3D_COVERAGE_LUT_2__SHIFT 16 +#define NV50_3D_COVERAGE_LUT_3__MASK 0xff000000 +#define NV50_3D_COVERAGE_LUT_3__SHIFT 24 + +#define NV50_3D_LINE_STIPPLE 0x00001680 +#define NV50_3D_LINE_STIPPLE_FACTOR_M1__MASK 0x000000ff +#define NV50_3D_LINE_STIPPLE_FACTOR_M1__SHIFT 0 +#define NV50_3D_LINE_STIPPLE_PATTERN__MASK 0x00ffff00 +#define NV50_3D_LINE_STIPPLE_PATTERN__SHIFT 8 + +#define NV50_3D_PROVOKING_VERTEX_LAST 0x00001684 + +#define NV50_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688 + +#define NV50_3D_POLYGON_STIPPLE_ENABLE 0x0000168c + +#define NV50_3D_UNK1690 0x00001690 +#define NV50_3D_UNK1690_ALWAYS_DERIV 0x00000001 +#define NV50_3D_UNK1690_UNK16 0x00010000 + +#define NV50_3D_SET_PROGRAM_CB 0x00001694 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM__MASK 0x000000f0 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM__SHIFT 4 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX 0x00000000 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY 0x00000020 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT 0x00000030 +#define NV50_3D_SET_PROGRAM_CB_INDEX__MASK 0x00000f00 +#define NV50_3D_SET_PROGRAM_CB_INDEX__SHIFT 8 +#define NV50_3D_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000 +#define NV50_3D_SET_PROGRAM_CB_BUFFER__SHIFT 12 +#define NV50_3D_SET_PROGRAM_CB_VALID 0x00000001 + +#define NV50_3D_UNK1698 0x00001698 +#define NV50_3D_UNK1698_0 0x00000001 +#define NV50_3D_UNK1698_1 0x00000010 +#define NV50_3D_UNK1698_2 0x00000100 + +#define NVA3_3D_SAMPLE_SHADING 0x0000169c +#define NVA3_3D_SAMPLE_SHADING_MIN_SAMPLES__MASK 0x0000000f +#define NVA3_3D_SAMPLE_SHADING_MIN_SAMPLES__SHIFT 0 +#define NVA3_3D_SAMPLE_SHADING_ENABLE 0x00000010 + +#define NVA3_3D_UNK16A0 0x000016a0 + +#define NV50_3D_VP_RESULT_MAP_SIZE 0x000016ac + +#define NV50_3D_VP_REG_ALLOC_TEMP 0x000016b0 + +#define NVA0_3D_UNK16B4 0x000016b4 +#define NVA0_3D_UNK16B4_UNK0 0x00000001 +#define NVA3_3D_UNK16B4_UNK1 0x00000002 + +#define NV50_3D_VP_REG_ALLOC_RESULT 0x000016b8 + +#define NV50_3D_VP_RESULT_MAP(i0) (0x000016bc + 0x4*(i0)) +#define NV50_3D_VP_RESULT_MAP__ESIZE 0x00000004 +#define NV50_3D_VP_RESULT_MAP__LEN 0x00000011 +#define NV50_3D_VP_RESULT_MAP_0__MASK 0x000000ff +#define NV50_3D_VP_RESULT_MAP_0__SHIFT 0 +#define NV50_3D_VP_RESULT_MAP_1__MASK 0x0000ff00 +#define NV50_3D_VP_RESULT_MAP_1__SHIFT 8 +#define NV50_3D_VP_RESULT_MAP_2__MASK 0x00ff0000 +#define NV50_3D_VP_RESULT_MAP_2__SHIFT 16 +#define NV50_3D_VP_RESULT_MAP_3__MASK 0xff000000 +#define NV50_3D_VP_RESULT_MAP_3__SHIFT 24 + +#define NV50_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0)) +#define NV50_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004 +#define NV50_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020 + +#define NVA0_3D_STRMOUT_OFFSET(i0) (0x00001780 + 0x4*(i0)) +#define NVA0_3D_STRMOUT_OFFSET__ESIZE 0x00000004 +#define NVA0_3D_STRMOUT_OFFSET__LEN 0x00000004 + +#define NV50_3D_GP_ENABLE 0x00001798 + +#define NV50_3D_GP_REG_ALLOC_TEMP 0x000017a0 + +#define NV50_3D_GP_REG_ALLOC_RESULT 0x000017a8 + +#define NV50_3D_GP_RESULT_MAP_SIZE 0x000017ac + +#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE 0x000017b0 +#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS 0x00000001 +#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP 0x00000002 +#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP 0x00000003 + +#define NV50_3D_RASTERIZE_ENABLE 0x000017b4 + +#define NV50_3D_STRMOUT_ENABLE 0x000017b8 + +#define NV50_3D_GP_RESULT_MAP(i0) (0x000017fc + 0x4*(i0)) +#define NV50_3D_GP_RESULT_MAP__ESIZE 0x00000004 +#define NV50_3D_GP_RESULT_MAP__LEN 0x00000021 +#define NV50_3D_GP_RESULT_MAP_0__MASK 0x000000ff +#define NV50_3D_GP_RESULT_MAP_0__SHIFT 0 +#define NV50_3D_GP_RESULT_MAP_1__MASK 0x0000ff00 +#define NV50_3D_GP_RESULT_MAP_1__SHIFT 8 +#define NV50_3D_GP_RESULT_MAP_2__MASK 0x00ff0000 +#define NV50_3D_GP_RESULT_MAP_2__SHIFT 16 +#define NV50_3D_GP_RESULT_MAP_3__MASK 0xff000000 +#define NV50_3D_GP_RESULT_MAP_3__SHIFT 24 + +#define NV50_3D_POLYGON_OFFSET_CLAMP 0x0000187c + +#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT(i0) (0x00001880 + 0x4*(i0)) +#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT__ESIZE 0x00000004 +#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT__LEN 0x00000020 + +#define NV50_3D_GP_VIEWPORT_ID_ENABLE 0x00001900 + +#define NV50_3D_SEMANTIC_COLOR 0x00001904 +#define NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK 0x000000ff +#define NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT 0 +#define NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK 0x0000ff00 +#define NV50_3D_SEMANTIC_COLOR_BFC0_ID__SHIFT 8 +#define NV50_3D_SEMANTIC_COLOR_COLR_NR__MASK 0x00ff0000 +#define NV50_3D_SEMANTIC_COLOR_COLR_NR__SHIFT 16 +#define NV50_3D_SEMANTIC_COLOR_CLMP_EN 0x01000000 + +#define NV50_3D_SEMANTIC_CLIP 0x00001908 +#define NV50_3D_SEMANTIC_CLIP_CLIP_START__MASK 0x000000ff +#define NV50_3D_SEMANTIC_CLIP_CLIP_START__SHIFT 0 +#define NV50_3D_SEMANTIC_CLIP_CLIP_NUM__MASK 0x00000f00 +#define NV50_3D_SEMANTIC_CLIP_CLIP_NUM__SHIFT 8 + +#define NV50_3D_SEMANTIC_LAYER 0x0000190c +#define NV50_3D_SEMANTIC_LAYER_LAYER_ID__MASK 0x000000ff +#define NV50_3D_SEMANTIC_LAYER_LAYER_ID__SHIFT 0 + +#define NV50_3D_SEMANTIC_PTSZ 0x00001910 +#define NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK 0x00000001 +#define NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__SHIFT 0 +#define NV50_3D_SEMANTIC_PTSZ_PTSZ_ID__MASK 0x00000ff0 +#define NV50_3D_SEMANTIC_PTSZ_PTSZ_ID__SHIFT 4 + +#define NV50_3D_SEMANTIC_PRIM_ID 0x00001914 +#define NV50_3D_SEMANTIC_PRIM_ID_PRIM_ID__MASK 0x000000ff +#define NV50_3D_SEMANTIC_PRIM_ID_PRIM_ID__SHIFT 0 + +#define NV50_3D_CULL_FACE_ENABLE 0x00001918 + +#define NV50_3D_FRONT_FACE 0x0000191c +#define NV50_3D_FRONT_FACE_CW 0x00000900 +#define NV50_3D_FRONT_FACE_CCW 0x00000901 + +#define NV50_3D_CULL_FACE 0x00001920 +#define NV50_3D_CULL_FACE_FRONT 0x00000404 +#define NV50_3D_CULL_FACE_BACK 0x00000405 +#define NV50_3D_CULL_FACE_FRONT_AND_BACK 0x00000408 + +#define NV50_3D_LINE_LAST_PIXEL 0x00001924 + +#define NVA3_3D_FP_MULTISAMPLE 0x00001928 +#define NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK 0x00000001 +#define NVA3_3D_FP_MULTISAMPLE_FORCE_PER_SAMPLE 0x00000002 + +#define NV50_3D_VIEWPORT_TRANSFORM_EN 0x0000192c + +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001 +#define NVA0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002 +#define NVA0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000 +#define NV84_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000 + +#define NV50_3D_CLIP_DISTANCE_MODE 0x00001940 +#define NV50_3D_CLIP_DISTANCE_MODE_0__MASK 0x00000001 +#define NV50_3D_CLIP_DISTANCE_MODE_0__SHIFT 0 +#define NV50_3D_CLIP_DISTANCE_MODE_0_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_0_CULL 0x00000001 +#define NV50_3D_CLIP_DISTANCE_MODE_1__MASK 0x00000010 +#define NV50_3D_CLIP_DISTANCE_MODE_1__SHIFT 4 +#define NV50_3D_CLIP_DISTANCE_MODE_1_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_1_CULL 0x00000010 +#define NV50_3D_CLIP_DISTANCE_MODE_2__MASK 0x00000100 +#define NV50_3D_CLIP_DISTANCE_MODE_2__SHIFT 8 +#define NV50_3D_CLIP_DISTANCE_MODE_2_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_2_CULL 0x00000100 +#define NV50_3D_CLIP_DISTANCE_MODE_3__MASK 0x00001000 +#define NV50_3D_CLIP_DISTANCE_MODE_3__SHIFT 12 +#define NV50_3D_CLIP_DISTANCE_MODE_3_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_3_CULL 0x00001000 +#define NV50_3D_CLIP_DISTANCE_MODE_4__MASK 0x00010000 +#define NV50_3D_CLIP_DISTANCE_MODE_4__SHIFT 16 +#define NV50_3D_CLIP_DISTANCE_MODE_4_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_4_CULL 0x00010000 +#define NV50_3D_CLIP_DISTANCE_MODE_5__MASK 0x00100000 +#define NV50_3D_CLIP_DISTANCE_MODE_5__SHIFT 20 +#define NV50_3D_CLIP_DISTANCE_MODE_5_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_5_CULL 0x00100000 +#define NV50_3D_CLIP_DISTANCE_MODE_6__MASK 0x01000000 +#define NV50_3D_CLIP_DISTANCE_MODE_6__SHIFT 24 +#define NV50_3D_CLIP_DISTANCE_MODE_6_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_6_CULL 0x01000000 +#define NV50_3D_CLIP_DISTANCE_MODE_7__MASK 0x10000000 +#define NV50_3D_CLIP_DISTANCE_MODE_7__SHIFT 28 +#define NV50_3D_CLIP_DISTANCE_MODE_7_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_7_CULL 0x10000000 + +#define NVA3_3D_UNK1944 0x00001944 + +#define NV50_3D_CLIP_RECTS_EN 0x0000194c + +#define NV50_3D_CLIP_RECTS_MODE 0x00001950 +#define NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000 +#define NV50_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001 +#define NV50_3D_CLIP_RECTS_MODE_NEVER 0x00000002 + +#define NV50_3D_ZCULL_VALIDATE 0x00001954 +#define NV50_3D_ZCULL_VALIDATE_CLEAR_UNK0 0x00000001 +#define NV50_3D_ZCULL_VALIDATE_CLEAR_UNK1 0x00000010 + +#define NV50_3D_ZCULL_INVALIDATE 0x00001958 + +#define NVA3_3D_UNK1960 0x00001960 +#define NVA3_3D_UNK1960_0 0x00000001 +#define NVA3_3D_UNK1960_1 0x00000010 + +#define NV50_3D_UNK1968 0x00001968 +#define NV50_3D_UNK1968_0 0x00000001 +#define NV50_3D_UNK1968_1 0x00000010 + +#define NV50_3D_FP_CTRL_UNK196C 0x0000196c +#define NV50_3D_FP_CTRL_UNK196C_0 0x00000001 +#define NV50_3D_FP_CTRL_UNK196C_1 0x00000010 + +#define NV50_3D_UNK1978 0x00001978 + +#define NV50_3D_CLIPID_ENABLE 0x0000197c + +#define NV50_3D_CLIPID_WIDTH 0x00001980 +#define NV50_3D_CLIPID_WIDTH__MAX 0x00002000 +#define NV50_3D_CLIPID_WIDTH__ALIGN 0x00000040 + +#define NV50_3D_CLIPID_ID 0x00001984 + +#define NV50_3D_FP_INTERPOLANT_CTRL 0x00001988 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK__MASK 0xff000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK__SHIFT 24 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_X 0x01000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_Y 0x02000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_Z 0x04000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_W 0x08000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__MASK 0x00ff0000 +#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT 16 +#define NV50_3D_FP_INTERPOLANT_CTRL_OFFSET__MASK 0x0000ff00 +#define NV50_3D_FP_INTERPOLANT_CTRL_OFFSET__SHIFT 8 +#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT__MASK 0x000000ff +#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT 0 + +#define NV50_3D_FP_REG_ALLOC_TEMP 0x0000198c + +#define NV50_3D_REG_MODE 0x000019a0 +#define NV50_3D_REG_MODE_PACKED 0x00000001 +#define NV50_3D_REG_MODE_STRIPED 0x00000002 + +#define NV50_3D_FP_CONTROL 0x000019a8 +#define NV50_3D_FP_CONTROL_MULTIPLE_RESULTS 0x00000001 +#define NV50_3D_FP_CONTROL_EXPORTS_Z 0x00000100 +#define NV50_3D_FP_CONTROL_USES_KIL 0x00100000 + +#define NV50_3D_DEPTH_BOUNDS_EN 0x000019bc + +#define NV50_3D_UNK19C0 0x000019c0 + +#define NV50_3D_LOGIC_OP_ENABLE 0x000019c4 + +#define NV50_3D_LOGIC_OP 0x000019c8 +#define NV50_3D_LOGIC_OP_CLEAR 0x00001500 +#define NV50_3D_LOGIC_OP_AND 0x00001501 +#define NV50_3D_LOGIC_OP_AND_REVERSE 0x00001502 +#define NV50_3D_LOGIC_OP_COPY 0x00001503 +#define NV50_3D_LOGIC_OP_AND_INVERTED 0x00001504 +#define NV50_3D_LOGIC_OP_NOOP 0x00001505 +#define NV50_3D_LOGIC_OP_XOR 0x00001506 +#define NV50_3D_LOGIC_OP_OR 0x00001507 +#define NV50_3D_LOGIC_OP_NOR 0x00001508 +#define NV50_3D_LOGIC_OP_EQUIV 0x00001509 +#define NV50_3D_LOGIC_OP_INVERT 0x0000150a +#define NV50_3D_LOGIC_OP_OR_REVERSE 0x0000150b +#define NV50_3D_LOGIC_OP_COPY_INVERTED 0x0000150c +#define NV50_3D_LOGIC_OP_OR_INVERTED 0x0000150d +#define NV50_3D_LOGIC_OP_NAND 0x0000150e +#define NV50_3D_LOGIC_OP_SET 0x0000150f + +#define NV50_3D_ZETA_COMP_ENABLE 0x000019cc + +#define NV50_3D_CLEAR_BUFFERS 0x000019d0 +#define NV50_3D_CLEAR_BUFFERS_Z 0x00000001 +#define NV50_3D_CLEAR_BUFFERS_S 0x00000002 +#define NV50_3D_CLEAR_BUFFERS_R 0x00000004 +#define NV50_3D_CLEAR_BUFFERS_G 0x00000008 +#define NV50_3D_CLEAR_BUFFERS_B 0x00000010 +#define NV50_3D_CLEAR_BUFFERS_A 0x00000020 +#define NV50_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0 +#define NV50_3D_CLEAR_BUFFERS_RT__SHIFT 6 +#define NV50_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00 +#define NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT 10 + +#define NV50_3D_CLIPID_FILL 0x000019d4 + +#define NV50_3D_UNK19D8(i0) (0x000019d8 + 0x4*(i0)) +#define NV50_3D_UNK19D8__ESIZE 0x00000004 +#define NV50_3D_UNK19D8__LEN 0x00000002 + +#define NV50_3D_RT_COMP_ENABLE(i0) (0x000019e0 + 0x4*(i0)) +#define NV50_3D_RT_COMP_ENABLE__ESIZE 0x00000004 +#define NV50_3D_RT_COMP_ENABLE__LEN 0x00000008 + +#define NV50_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0)) +#define NV50_3D_COLOR_MASK__ESIZE 0x00000004 +#define NV50_3D_COLOR_MASK__LEN 0x00000008 +#define NV50_3D_COLOR_MASK_R 0x0000000f +#define NV50_3D_COLOR_MASK_G 0x000000f0 +#define NV50_3D_COLOR_MASK_B 0x00000f00 +#define NV50_3D_COLOR_MASK_A 0x0000f000 + +#define NV50_3D_UNK1A20 0x00001a20 + +#define NV50_3D_DELAY 0x00001a24 + +#define NV50_3D_UNK1A28 0x00001a28 +#define NV50_3D_UNK1A28_0__MASK 0x000000ff +#define NV50_3D_UNK1A28_0__SHIFT 0 +#define NV50_3D_UNK1A28_1 0x00000100 + +#define NV50_3D_UNK1A2C 0x00001a2c + +#define NV50_3D_UNK1A30 0x00001a30 + +#define NV50_3D_UNK1A34 0x00001a34 + +#define NV50_3D_UNK1A38 0x00001a38 + +#define NV50_3D_UNK1A3C 0x00001a3c + +#define NV50_3D_UNK1A40(i0) (0x00001a40 + 0x4*(i0)) +#define NV50_3D_UNK1A40__ESIZE 0x00000004 +#define NV50_3D_UNK1A40__LEN 0x00000010 +#define NV50_3D_UNK1A40_0__MASK 0x00000007 +#define NV50_3D_UNK1A40_0__SHIFT 0 +#define NV50_3D_UNK1A40_1__MASK 0x00000070 +#define NV50_3D_UNK1A40_1__SHIFT 4 +#define NV50_3D_UNK1A40_2__MASK 0x00000700 +#define NV50_3D_UNK1A40_2__SHIFT 8 +#define NV50_3D_UNK1A40_3__MASK 0x00007000 +#define NV50_3D_UNK1A40_3__SHIFT 12 +#define NV50_3D_UNK1A40_4__MASK 0x00070000 +#define NV50_3D_UNK1A40_4__SHIFT 16 +#define NV50_3D_UNK1A40_5__MASK 0x00700000 +#define NV50_3D_UNK1A40_5__SHIFT 20 +#define NV50_3D_UNK1A40_6__MASK 0x07000000 +#define NV50_3D_UNK1A40_6__SHIFT 24 +#define NV50_3D_UNK1A40_7__MASK 0x70000000 +#define NV50_3D_UNK1A40_7__SHIFT 28 + +#define NV50_3D_STRMOUT_ADDRESS_HIGH(i0) (0x00001a80 + 0x10*(i0)) +#define NV50_3D_STRMOUT_ADDRESS_HIGH__ESIZE 0x00000010 +#define NV50_3D_STRMOUT_ADDRESS_HIGH__LEN 0x00000004 + +#define NV50_3D_STRMOUT_ADDRESS_LOW(i0) (0x00001a84 + 0x10*(i0)) +#define NV50_3D_STRMOUT_ADDRESS_LOW__ESIZE 0x00000010 +#define NV50_3D_STRMOUT_ADDRESS_LOW__LEN 0x00000004 + +#define NV50_3D_STRMOUT_NUM_ATTRIBS(i0) (0x00001a88 + 0x10*(i0)) +#define NV50_3D_STRMOUT_NUM_ATTRIBS__ESIZE 0x00000010 +#define NV50_3D_STRMOUT_NUM_ATTRIBS__LEN 0x00000004 +#define NV50_3D_STRMOUT_NUM_ATTRIBS__MAX 0x00000040 + +#define NVA0_3D_STRMOUT_OFFSET_LIMIT(i0) (0x00001a8c + 0x10*(i0)) +#define NVA0_3D_STRMOUT_OFFSET_LIMIT__ESIZE 0x00000010 +#define NVA0_3D_STRMOUT_OFFSET_LIMIT__LEN 0x00000004 + +#define NV50_3D_VERTEX_ARRAY_ATTRIB(i0) (0x00001ac0 + 0x4*(i0)) +#define NV50_3D_VERTEX_ARRAY_ATTRIB__ESIZE 0x00000004 +#define NV50_3D_VERTEX_ARRAY_ATTRIB__LEN 0x00000010 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_BUFFER__MASK 0x0000000f +#define NV50_3D_VERTEX_ARRAY_ATTRIB_BUFFER__SHIFT 0 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_CONST 0x00000010 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_OFFSET__MASK 0x0007ffe0 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_OFFSET__SHIFT 5 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT__MASK 0x01f80000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT__SHIFT 19 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 0x00080000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32 0x00100000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16 0x00180000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32 0x00200000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16 0x00280000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8 0x00500000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16 0x00780000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32 0x00900000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8 0x00980000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8 0x00c00000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16 0x00d80000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8 0x00e80000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_10_10_10_2 0x01800000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__MASK 0x7e000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__SHIFT 25 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x7e000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x24000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x12000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x5a000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x6c000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x48000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x36000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_BGRA 0x80000000 + +#define NV50_3D_QUERY_ADDRESS_HIGH 0x00001b00 + +#define NV50_3D_QUERY_ADDRESS_LOW 0x00001b04 + +#define NV50_3D_QUERY_SEQUENCE 0x00001b08 + +#define NV50_3D_QUERY_GET 0x00001b0c +#define NV50_3D_QUERY_GET_MODE__MASK 0x00000003 +#define NV50_3D_QUERY_GET_MODE__SHIFT 0 +#define NV50_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000 +#define NV50_3D_QUERY_GET_MODE_SYNC 0x00000001 +#define NV50_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002 +#define NV50_3D_QUERY_GET_UNK4 0x00000010 +#define NVA0_3D_QUERY_GET_INDEX__MASK 0x000000e0 +#define NVA0_3D_QUERY_GET_INDEX__SHIFT 5 +#define NV50_3D_QUERY_GET_UNK8 0x00000100 +#define NV50_3D_QUERY_GET_UNIT__MASK 0x0000f000 +#define NV50_3D_QUERY_GET_UNIT__SHIFT 12 +#define NV50_3D_QUERY_GET_UNIT_UNK00 0x00000000 +#define NV50_3D_QUERY_GET_UNIT_VFETCH 0x00001000 +#define NV50_3D_QUERY_GET_UNIT_VP 0x00002000 +#define NV50_3D_QUERY_GET_UNIT_RAST 0x00004000 +#define NV50_3D_QUERY_GET_UNIT_STRMOUT 0x00005000 +#define NV50_3D_QUERY_GET_UNIT_GP 0x00006000 +#define NV50_3D_QUERY_GET_UNIT_ZCULL 0x00007000 +#define NV50_3D_QUERY_GET_UNIT_TPROP 0x0000a000 +#define NV50_3D_QUERY_GET_UNIT_UNK0C 0x0000c000 +#define NV50_3D_QUERY_GET_UNIT_CROP 0x0000f000 +#define NV50_3D_QUERY_GET_SYNC_COND__MASK 0x00010000 +#define NV50_3D_QUERY_GET_SYNC_COND__SHIFT 16 +#define NV50_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000 +#define NV50_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000 +#define NV50_3D_QUERY_GET_INTR 0x00100000 +#define NV50_3D_QUERY_GET_TYPE__MASK 0x00800000 +#define NV50_3D_QUERY_GET_TYPE__SHIFT 23 +#define NV50_3D_QUERY_GET_TYPE_QUERY 0x00000000 +#define NV50_3D_QUERY_GET_TYPE_COUNTER 0x00800000 +#define NV50_3D_QUERY_GET_QUERY_SELECT__MASK 0x0f000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT__SHIFT 24 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZERO 0x00000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_SAMPLECNT 0x01000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_STRMOUT_NO_OVERFLOW 0x02000000 +#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_DROPPED_PRIMITIVES 0x03000000 +#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_VERTICES 0x04000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK0 0x05000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK1 0x06000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK2 0x07000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK3 0x08000000 +#define NVA0_3D_QUERY_GET_QUERY_SELECT_RT_UNK14 0x0c000000 +#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_OFFSET 0x0d000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT__MASK 0x0f000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT__SHIFT 24 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_VFETCH_VERTICES 0x00000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_VFETCH_PRIMITIVES 0x01000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_VP_LAUNCHES 0x02000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_GP_LAUNCHES 0x03000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_GP_PRIMITIVES_OUT 0x04000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_TRANSFORM_FEEDBACK 0x05000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_GENERATED_PRIMITIVES 0x06000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_RAST_PRIMITIVES_PRECLIP 0x07000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_RAST_PRIMITIVES_POSTCLIP 0x08000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_FP_PIXELS 0x09000000 +#define NV84_3D_QUERY_GET_COUNTER_SELECT_UNK0A 0x0a000000 +#define NVA0_3D_QUERY_GET_COUNTER_SELECT_UNK0C 0x0c000000 +#define NV50_3D_QUERY_GET_SHORT 0x10000000 + +#define NVA3_3D_VP_RESULT_MAP_ALT(i0) (0x00001b3c + 0x4*(i0)) +#define NVA3_3D_VP_RESULT_MAP_ALT__ESIZE 0x00000004 +#define NVA3_3D_VP_RESULT_MAP_ALT__LEN 0x00000020 +#define NVA3_3D_VP_RESULT_MAP_ALT_0__MASK 0x000000ff +#define NVA3_3D_VP_RESULT_MAP_ALT_0__SHIFT 0 +#define NVA3_3D_VP_RESULT_MAP_ALT_1__MASK 0x0000ff00 +#define NVA3_3D_VP_RESULT_MAP_ALT_1__SHIFT 8 +#define NVA3_3D_VP_RESULT_MAP_ALT_2__MASK 0x00ff0000 +#define NVA3_3D_VP_RESULT_MAP_ALT_2__SHIFT 16 +#define NVA3_3D_VP_RESULT_MAP_ALT_3__MASK 0xff000000 +#define NVA3_3D_VP_RESULT_MAP_ALT_3__SHIFT 24 + +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT(i0) (0x00001c00 + 0x10*(i0)) +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT__ESIZE 0x00000010 +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT__LEN 0x00000020 +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_STRIDE__MASK 0x00000fff +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_STRIDE__SHIFT 0 +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_ENABLE 0x20000000 + +#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT(i0) (0x00001c04 + 0x10*(i0)) +#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT__ESIZE 0x00000010 +#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT__LEN 0x00000020 + +#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT(i0) (0x00001c08 + 0x10*(i0)) +#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT__ESIZE 0x00000010 +#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT__LEN 0x00000020 + +#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT(i0) (0x00001c0c + 0x10*(i0)) +#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT__ESIZE 0x00000010 +#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT__LEN 0x00000020 + +#define NVA3_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0)) +#define NVA3_3D_IBLEND__ESIZE 0x00000020 +#define NVA3_3D_IBLEND__LEN 0x00000008 + +#define NVA3_3D_IBLEND_SEPARATE_ALPHA(i0) (0x00001e00 + 0x20*(i0)) + +#define NVA3_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0)) +#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NVA3_3D_IBLEND_EQUATION_RGB_MIN 0x00008007 +#define NVA3_3D_IBLEND_EQUATION_RGB_MAX 0x00008008 +#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVA3_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0)) + +#define NVA3_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0)) + +#define NVA3_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0)) +#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NVA3_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NVA3_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVA3_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0)) + +#define NVA3_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0)) + +#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT(i0) (0x00001f00 + 0x8*(i0)) +#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT__ESIZE 0x00000008 +#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT__LEN 0x00000020 + +#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT(i0) (0x00001f04 + 0x8*(i0)) +#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT__ESIZE 0x00000008 +#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT__LEN 0x00000020 + + +#endif /* RNNDB_NV50_3D_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h new file mode 100644 index 00000000000..f26ac45da40 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h @@ -0,0 +1,98 @@ +#ifndef NV_3DDEFS_XML +#define NV_3DDEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nv50_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38) +- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28) +- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20) +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000 +#define NV50_3D_BLEND_FACTOR_ONE 0x00004001 +#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303 +#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305 +#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308 +#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002 +#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004 +#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901 +#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903 + +#endif /* NV_3DDEFS_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_blit.h b/src/gallium/drivers/nouveau/nv50/nv50_blit.h new file mode 100644 index 00000000000..bdd6a63d1f1 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_blit.h @@ -0,0 +1,223 @@ + +#ifndef __NV50_BLIT_H__ +#define __NV50_BLIT_H__ + +#include "util/u_inlines.h" +#include "util/u_format.h" + +void * +nv50_blitter_make_fp(struct pipe_context *, + unsigned mode, + enum pipe_texture_target); + +unsigned +nv50_blit_select_mode(const struct pipe_blit_info *); + +/* Converted to a pipe->blit. */ +void +nv50_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *); + +#define NV50_BLIT_MODE_PASS 0 /* pass through TEX $t0/$s0 output */ +#define NV50_BLIT_MODE_Z24S8 1 /* encode ZS values for RGBA unorm8 */ +#define NV50_BLIT_MODE_S8Z24 2 +#define NV50_BLIT_MODE_X24S8 3 +#define NV50_BLIT_MODE_S8X24 4 +#define NV50_BLIT_MODE_Z24X8 5 +#define NV50_BLIT_MODE_X8Z24 6 +#define NV50_BLIT_MODE_ZS 7 /* put $t0/$s0 into R, $t1/$s1 into G */ +#define NV50_BLIT_MODE_XS 8 /* put $t1/$s1 into G */ +#define NV50_BLIT_MODES 9 + +/* CUBE and RECT textures are reinterpreted as 2D(_ARRAY) */ +#define NV50_BLIT_TEXTURE_BUFFER 0 +#define NV50_BLIT_TEXTURE_1D 1 +#define NV50_BLIT_TEXTURE_2D 2 +#define NV50_BLIT_TEXTURE_3D 3 +#define NV50_BLIT_TEXTURE_1D_ARRAY 4 +#define NV50_BLIT_TEXTURE_2D_ARRAY 5 +#define NV50_BLIT_MAX_TEXTURE_TYPES 6 + +static INLINE unsigned +nv50_blit_texture_type(enum pipe_texture_target target) +{ + switch (target) { + case PIPE_TEXTURE_1D: return NV50_BLIT_TEXTURE_1D; + case PIPE_TEXTURE_2D: return NV50_BLIT_TEXTURE_2D; + case PIPE_TEXTURE_3D: return NV50_BLIT_TEXTURE_3D; + case PIPE_TEXTURE_1D_ARRAY: return NV50_BLIT_TEXTURE_1D_ARRAY; + case PIPE_TEXTURE_2D_ARRAY: return NV50_BLIT_TEXTURE_2D_ARRAY; + default: + assert(target == PIPE_BUFFER); + return NV50_BLIT_TEXTURE_BUFFER; + } +} + +static INLINE unsigned +nv50_blit_get_tgsi_texture_target(enum pipe_texture_target target) +{ + switch (target) { + case PIPE_TEXTURE_1D: return TGSI_TEXTURE_1D; + case PIPE_TEXTURE_2D: return TGSI_TEXTURE_2D; + case PIPE_TEXTURE_3D: return TGSI_TEXTURE_3D; + case PIPE_TEXTURE_1D_ARRAY: return TGSI_TEXTURE_1D_ARRAY; + case PIPE_TEXTURE_2D_ARRAY: return TGSI_TEXTURE_2D_ARRAY; + default: + assert(target == PIPE_BUFFER); + return TGSI_TEXTURE_BUFFER; + } +} + +static INLINE enum pipe_texture_target +nv50_blit_reinterpret_pipe_texture_target(enum pipe_texture_target target) +{ + switch (target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return PIPE_TEXTURE_2D_ARRAY; + case PIPE_TEXTURE_RECT: + return PIPE_TEXTURE_2D; + default: + return target; + } +} + +static INLINE unsigned +nv50_blit_get_filter(const struct pipe_blit_info *info) +{ + if (info->dst.resource->nr_samples < info->src.resource->nr_samples) + return util_format_is_depth_or_stencil(info->src.format) ? 0 : 1; + + if (info->filter != PIPE_TEX_FILTER_LINEAR) + return 0; + + if ((info->dst.box.width == info->src.box.width || + info->dst.box.width == -info->src.box.width) && + (info->dst.box.height == info->src.box.height || + info->dst.box.height == -info->src.box.height)) + return 0; + + return 1; +} + +/* Since shaders cannot export stencil, we cannot copy stencil values when + * rendering to ZETA, so we attach the ZS surface to a colour render target. + */ +static INLINE enum pipe_format +nv50_blit_zeta_to_colour_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return PIPE_FORMAT_R16_UNORM; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + return PIPE_FORMAT_R8G8B8A8_UNORM; + case PIPE_FORMAT_Z32_FLOAT: + return PIPE_FORMAT_R32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return PIPE_FORMAT_R32G32_FLOAT; + default: + assert(0); + return PIPE_FORMAT_NONE; + } +} + + +static INLINE uint16_t +nv50_blit_derive_color_mask(const struct pipe_blit_info *info) +{ + const unsigned mask = info->mask; + + uint16_t color_mask = 0; + + switch (info->dst.format) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + if (mask & PIPE_MASK_S) + color_mask |= 0x1000; + /* fall through */ + case PIPE_FORMAT_Z24X8_UNORM: + if (mask & PIPE_MASK_Z) + color_mask |= 0x0111; + break; + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + if (mask & PIPE_MASK_Z) + color_mask |= 0x1110; + if (mask & PIPE_MASK_S) + color_mask |= 0x0001; + break; + default: + if (mask & (PIPE_MASK_R | PIPE_MASK_Z)) color_mask |= 0x0001; + if (mask & (PIPE_MASK_G | PIPE_MASK_S)) color_mask |= 0x0010; + if (mask & PIPE_MASK_B) color_mask |= 0x0100; + if (mask & PIPE_MASK_A) color_mask |= 0x1000; + break; + } + + return color_mask; +} + +static INLINE uint32_t +nv50_blit_eng2d_get_mask(const struct pipe_blit_info *info) +{ + uint32_t mask = 0; + + switch (info->dst.format) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + if (info->mask & PIPE_MASK_Z) mask |= 0x00ffffff; + if (info->mask & PIPE_MASK_S) mask |= 0xff000000; + break; + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + if (info->mask & PIPE_MASK_Z) mask |= 0xffffff00; + if (info->mask & PIPE_MASK_S) mask |= 0x000000ff; + break; + case PIPE_FORMAT_X8Z24_UNORM: + if (info->mask & PIPE_MASK_Z) mask = 0x00ffffff; + break; + default: + mask = 0xffffffff; + break; + } + return mask; +} + +#if NOUVEAU_DRIVER == 0xc0 +# define nv50_format_table nvc0_format_table +#endif + +/* return TRUE for formats that can be converted among each other by NVC0_2D */ +static INLINE boolean +nv50_2d_dst_format_faithful(enum pipe_format format) +{ + const uint64_t mask = + NV50_ENG2D_SUPPORTED_FORMATS & + ~NV50_ENG2D_NOCONVERT_FORMATS; + uint8_t id = nv50_format_table[format].rt; + return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0))); +} +static INLINE boolean +nv50_2d_src_format_faithful(enum pipe_format format) +{ + const uint64_t mask = + NV50_ENG2D_SUPPORTED_FORMATS & + ~(NV50_ENG2D_LUMINANCE_FORMATS | NV50_ENG2D_INTENSITY_FORMATS); + uint8_t id = nv50_format_table[format].rt; + return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0))); +} + +static INLINE boolean +nv50_2d_format_supported(enum pipe_format format) +{ + uint8_t id = nv50_format_table[format].rt; + return (id >= 0xc0) && + (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))); +} + +static INLINE boolean +nv50_2d_dst_format_ops_supported(enum pipe_format format) +{ + uint8_t id = nv50_format_table[format].rt; + return (id >= 0xc0) && + (NV50_ENG2D_OPERATION_FORMATS & (1ULL << (id - 0xc0))); +} + +#endif /* __NV50_BLIT_H__ */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c new file mode 100644 index 00000000000..b6bdf79b389 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -0,0 +1,317 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "util/u_framebuffer.h" + +#ifdef NV50_WITH_DRAW_MODULE +#include "draw/draw_context.h" +#endif + +#include "nv50/nv50_context.h" +#include "nv50/nv50_screen.h" +#include "nv50/nv50_resource.h" + +static void +nv50_flush(struct pipe_context *pipe, + struct pipe_fence_handle **fence, + unsigned flags) +{ + struct nouveau_screen *screen = nouveau_screen(pipe->screen); + + if (fence) + nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence); + + PUSH_KICK(screen->pushbuf); + + nouveau_context_update_frame_stats(nouveau_context(pipe)); +} + +static void +nv50_texture_barrier(struct pipe_context *pipe) +{ + struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf; + + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1); + PUSH_DATA (push, 0x20); +} + +void +nv50_default_kick_notify(struct nouveau_pushbuf *push) +{ + struct nv50_screen *screen = push->user_priv; + + if (screen) { + nouveau_fence_next(&screen->base); + nouveau_fence_update(&screen->base, TRUE); + if (screen->cur_ctx) + screen->cur_ctx->state.flushed = TRUE; + } +} + +static void +nv50_context_unreference_resources(struct nv50_context *nv50) +{ + unsigned s, i; + + nouveau_bufctx_del(&nv50->bufctx_3d); + nouveau_bufctx_del(&nv50->bufctx); + + util_unreference_framebuffer_state(&nv50->framebuffer); + + for (i = 0; i < nv50->num_vtxbufs; ++i) + pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL); + + pipe_resource_reference(&nv50->idxbuf.buffer, NULL); + + for (s = 0; s < 3; ++s) { + for (i = 0; i < nv50->num_textures[s]; ++i) + pipe_sampler_view_reference(&nv50->textures[s][i], NULL); + + for (i = 0; i < NV50_MAX_PIPE_CONSTBUFS; ++i) + if (!nv50->constbuf[s][i].user) + pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL); + } +} + +static void +nv50_destroy(struct pipe_context *pipe) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + if (nv50_context_screen(nv50)->cur_ctx == nv50) { + nv50->base.pushbuf->kick_notify = NULL; + nv50_context_screen(nv50)->cur_ctx = NULL; + nouveau_pushbuf_bufctx(nv50->base.pushbuf, NULL); + } + /* need to flush before destroying the bufctx */ + nouveau_pushbuf_kick(nv50->base.pushbuf, nv50->base.pushbuf->channel); + + nv50_context_unreference_resources(nv50); + +#ifdef NV50_WITH_DRAW_MODULE + draw_destroy(nv50->draw); +#endif + + nouveau_context_destroy(&nv50->base); +} + +static int +nv50_invalidate_resource_storage(struct nouveau_context *ctx, + struct pipe_resource *res, + int ref) +{ + struct nv50_context *nv50 = nv50_context(&ctx->pipe); + unsigned s, i; + + if (res->bind & PIPE_BIND_RENDER_TARGET) { + for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) { + if (nv50->framebuffer.cbufs[i] && + nv50->framebuffer.cbufs[i]->texture == res) { + nv50->dirty |= NV50_NEW_FRAMEBUFFER; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + if (!--ref) + return ref; + } + } + } + if (res->bind & PIPE_BIND_DEPTH_STENCIL) { + if (nv50->framebuffer.zsbuf && + nv50->framebuffer.zsbuf->texture == res) { + nv50->dirty |= NV50_NEW_FRAMEBUFFER; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + if (!--ref) + return ref; + } + } + + if (res->bind & PIPE_BIND_VERTEX_BUFFER) { + for (i = 0; i < nv50->num_vtxbufs; ++i) { + if (nv50->vtxbuf[i].buffer == res) { + nv50->dirty |= NV50_NEW_ARRAYS; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX); + if (!--ref) + return ref; + } + } + } + if (res->bind & PIPE_BIND_INDEX_BUFFER) { + if (nv50->idxbuf.buffer == res) + if (!--ref) + return ref; + } + + if (res->bind & PIPE_BIND_SAMPLER_VIEW) { + for (s = 0; s < 5; ++s) { + for (i = 0; i < nv50->num_textures[s]; ++i) { + if (nv50->textures[s][i] && + nv50->textures[s][i]->texture == res) { + nv50->dirty |= NV50_NEW_TEXTURES; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + if (!--ref) + return ref; + } + } + } + } + + if (res->bind & PIPE_BIND_CONSTANT_BUFFER) { + for (s = 0; s < 5; ++s) { + for (i = 0; i < nv50->num_vtxbufs; ++i) { + if (!nv50->constbuf[s][i].user && + nv50->constbuf[s][i].u.buf == res) { + nv50->dirty |= NV50_NEW_CONSTBUF; + nv50->constbuf_dirty[s] |= 1 << i; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i)); + if (!--ref) + return ref; + } + } + } + } + + return ref; +} + +struct pipe_context * +nv50_create(struct pipe_screen *pscreen, void *priv) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + struct nv50_context *nv50; + struct pipe_context *pipe; + int ret; + uint32_t flags; + + nv50 = CALLOC_STRUCT(nv50_context); + if (!nv50) + return NULL; + pipe = &nv50->base.pipe; + + if (!nv50_blitctx_create(nv50)) + goto out_err; + + nv50->base.pushbuf = screen->base.pushbuf; + nv50->base.client = screen->base.client; + + ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT, + &nv50->bufctx_3d); + if (!ret) + ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx); + if (ret) + goto out_err; + + nv50->base.screen = &screen->base; + nv50->base.copy_data = nv50_m2mf_copy_linear; + nv50->base.push_data = nv50_sifc_linear_u8; + nv50->base.push_cb = nv50_cb_push; + + nv50->screen = screen; + pipe->screen = pscreen; + pipe->priv = priv; + + pipe->destroy = nv50_destroy; + + pipe->draw_vbo = nv50_draw_vbo; + pipe->clear = nv50_clear; + + pipe->flush = nv50_flush; + pipe->texture_barrier = nv50_texture_barrier; + + if (!screen->cur_ctx) { + screen->cur_ctx = nv50; + nouveau_pushbuf_bufctx(screen->base.pushbuf, nv50->bufctx); + } + nv50->base.pushbuf->kick_notify = nv50_default_kick_notify; + + nv50_init_query_functions(nv50); + nv50_init_surface_functions(nv50); + nv50_init_state_functions(nv50); + nv50_init_resource_functions(pipe); + + nv50->base.invalidate_resource_storage = nv50_invalidate_resource_storage; + +#ifdef NV50_WITH_DRAW_MODULE + /* no software fallbacks implemented */ + nv50->draw = draw_create(pipe); + assert(nv50->draw); + draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); +#endif + + if (screen->base.device->chipset < 0x84 || + debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) { + /* PMPEG */ + nouveau_context_init_vdec(&nv50->base); + } else if (screen->base.device->chipset < 0x98 || + screen->base.device->chipset == 0xa0) { + /* VP2 */ + pipe->create_video_codec = nv84_create_decoder; + pipe->create_video_buffer = nv84_video_buffer_create; + } else { + /* VP3/4 */ + pipe->create_video_codec = nv98_create_decoder; + pipe->create_video_buffer = nv98_video_buffer_create; + } + + flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + + BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->code); + BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms); + BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc); + BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo); + + flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; + + BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo); + BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo); + + nv50->base.scratch.bo_size = 2 << 20; + + return pipe; + +out_err: + if (nv50) { + if (nv50->bufctx_3d) + nouveau_bufctx_del(&nv50->bufctx_3d); + if (nv50->bufctx) + nouveau_bufctx_del(&nv50->bufctx); + if (nv50->blit) + FREE(nv50->blit); + FREE(nv50); + } + return NULL; +} + +void +nv50_bufctx_fence(struct nouveau_bufctx *bufctx, boolean on_flush) +{ + struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending; + struct nouveau_list *it; + + for (it = list->next; it != list; it = it->next) { + struct nouveau_bufref *ref = (struct nouveau_bufref *)it; + struct nv04_resource *res = ref->priv; + if (res) + nv50_resource_validate(res, (unsigned)ref->priv_data); + } +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h new file mode 100644 index 00000000000..ee6eb0ef715 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -0,0 +1,322 @@ +#ifndef __NV50_CONTEXT_H__ +#define __NV50_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_inlines.h" +#include "util/u_dynarray.h" + +#ifdef NV50_WITH_DRAW_MODULE +#include "draw/draw_vertex.h" +#endif + +#include "nv50/nv50_debug.h" +#include "nv50/nv50_winsys.h" +#include "nv50/nv50_stateobj.h" +#include "nv50/nv50_screen.h" +#include "nv50/nv50_program.h" +#include "nv50/nv50_resource.h" +#include "nv50/nv50_transfer.h" + +#include "nouveau_context.h" +#include "nv_object.xml.h" +#include "nv_m2mf.xml.h" +#include "nv50/nv50_3ddefs.xml.h" +#include "nv50/nv50_3d.xml.h" +#include "nv50/nv50_2d.xml.h" + +#define NV50_NEW_BLEND (1 << 0) +#define NV50_NEW_RASTERIZER (1 << 1) +#define NV50_NEW_ZSA (1 << 2) +#define NV50_NEW_VERTPROG (1 << 3) +#define NV50_NEW_GMTYPROG (1 << 6) +#define NV50_NEW_FRAGPROG (1 << 7) +#define NV50_NEW_BLEND_COLOUR (1 << 8) +#define NV50_NEW_STENCIL_REF (1 << 9) +#define NV50_NEW_CLIP (1 << 10) +#define NV50_NEW_SAMPLE_MASK (1 << 11) +#define NV50_NEW_FRAMEBUFFER (1 << 12) +#define NV50_NEW_STIPPLE (1 << 13) +#define NV50_NEW_SCISSOR (1 << 14) +#define NV50_NEW_VIEWPORT (1 << 15) +#define NV50_NEW_ARRAYS (1 << 16) +#define NV50_NEW_VERTEX (1 << 17) +#define NV50_NEW_CONSTBUF (1 << 18) +#define NV50_NEW_TEXTURES (1 << 19) +#define NV50_NEW_SAMPLERS (1 << 20) +#define NV50_NEW_STRMOUT (1 << 21) +#define NV50_NEW_CONTEXT (1 << 31) + +#define NV50_BIND_FB 0 +#define NV50_BIND_VERTEX 1 +#define NV50_BIND_VERTEX_TMP 2 +#define NV50_BIND_INDEX 3 +#define NV50_BIND_TEXTURES 4 +#define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i)) +#define NV50_BIND_SO 53 +#define NV50_BIND_SCREEN 54 +#define NV50_BIND_TLS 55 +#define NV50_BIND_COUNT 56 +#define NV50_BIND_2D 0 +#define NV50_BIND_M2MF 0 +#define NV50_BIND_FENCE 1 + +#define NV50_CB_TMP 123 +/* fixed constant buffer binding points - low indices for user's constbufs */ +#define NV50_CB_PVP 124 +#define NV50_CB_PGP 126 +#define NV50_CB_PFP 125 +#define NV50_CB_AUX 127 + + +struct nv50_blitctx; + +boolean nv50_blitctx_create(struct nv50_context *); + +struct nv50_context { + struct nouveau_context base; + + struct nv50_screen *screen; + + struct nouveau_bufctx *bufctx_3d; + struct nouveau_bufctx *bufctx; + + uint32_t dirty; + + struct { + uint32_t instance_elts; /* bitmask of per-instance elements */ + uint32_t instance_base; + uint32_t interpolant_ctrl; + uint32_t semantic_color; + uint32_t semantic_psize; + int32_t index_bias; + boolean uniform_buffer_bound[3]; + boolean prim_restart; + boolean point_sprite; + boolean rt_serialize; + boolean flushed; + boolean rasterizer_discard; + uint8_t tls_required; + boolean new_tls_space; + uint8_t num_vtxbufs; + uint8_t num_vtxelts; + uint8_t num_textures[3]; + uint8_t num_samplers[3]; + uint8_t prim_size; + uint16_t scissor; + } state; + + struct nv50_blend_stateobj *blend; + struct nv50_rasterizer_stateobj *rast; + struct nv50_zsa_stateobj *zsa; + struct nv50_vertex_stateobj *vertex; + + struct nv50_program *vertprog; + struct nv50_program *gmtyprog; + struct nv50_program *fragprog; + + struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS]; + uint16_t constbuf_dirty[3]; + uint16_t constbuf_valid[3]; + + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned num_vtxbufs; + struct pipe_index_buffer idxbuf; + uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ + uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ + uint32_t vbo_constant; /* bitmask of user buffers with stride 0 */ + uint32_t vb_elt_first; /* from pipe_draw_info, for vertex upload */ + uint32_t vb_elt_limit; /* max - min element (count - 1) */ + uint32_t instance_off; /* base vertex for instanced arrays */ + uint32_t instance_max; /* max instance for current draw call */ + + struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS]; + unsigned num_textures[3]; + struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS]; + unsigned num_samplers[3]; + + uint8_t num_so_targets; + uint8_t so_targets_dirty; + struct pipe_stream_output_target *so_target[4]; + + struct pipe_framebuffer_state framebuffer; + struct pipe_blend_color blend_colour; + struct pipe_stencil_ref stencil_ref; + struct pipe_poly_stipple stipple; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + struct pipe_clip_state clip; + + unsigned sample_mask; + + boolean vbo_push_hint; + + struct pipe_query *cond_query; + boolean cond_cond; + uint cond_mode; + + struct nv50_blitctx *blit; + +#ifdef NV50_WITH_DRAW_MODULE + struct draw_context *draw; +#endif +}; + +static INLINE struct nv50_context * +nv50_context(struct pipe_context *pipe) +{ + return (struct nv50_context *)pipe; +} + +static INLINE struct nv50_screen * +nv50_context_screen(struct nv50_context *nv50) +{ + return nv50_screen(&nv50->base.screen->base); +} + +/* return index used in nv50_context arrays for a specific shader type */ +static INLINE unsigned +nv50_context_shader_stage(unsigned pipe) +{ + switch (pipe) { + case PIPE_SHADER_VERTEX: return 0; + case PIPE_SHADER_FRAGMENT: return 1; + case PIPE_SHADER_GEOMETRY: return 2; + case PIPE_SHADER_COMPUTE: return 3; + default: + assert(!"invalid/unhandled shader type"); + return 0; + } +} + +/* nv50_context.c */ +struct pipe_context *nv50_create(struct pipe_screen *, void *); + +void nv50_bufctx_fence(struct nouveau_bufctx *, boolean on_flush); + +void nv50_default_kick_notify(struct nouveau_pushbuf *); + +/* nv50_draw.c */ +extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); + +/* nv50_query.c */ +void nv50_init_query_functions(struct nv50_context *); +void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, + struct pipe_query *, unsigned result_offset); +void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *); +void nva0_so_target_save_offset(struct pipe_context *, + struct pipe_stream_output_target *, + unsigned index, boolean seralize); + +#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) + +/* nv50_shader_state.c */ +void nv50_vertprog_validate(struct nv50_context *); +void nv50_gmtyprog_validate(struct nv50_context *); +void nv50_fragprog_validate(struct nv50_context *); +void nv50_fp_linkage_validate(struct nv50_context *); +void nv50_gp_linkage_validate(struct nv50_context *); +void nv50_constbufs_validate(struct nv50_context *); +void nv50_validate_derived_rs(struct nv50_context *); +void nv50_stream_output_validate(struct nv50_context *); + +/* nv50_state.c */ +extern void nv50_init_state_functions(struct nv50_context *); + +/* nv50_state_validate.c */ +/* @words: check for space before emitting relocs */ +extern boolean nv50_state_validate(struct nv50_context *, uint32_t state_mask, + unsigned space_words); + +/* nv50_surface.c */ +extern void nv50_clear(struct pipe_context *, unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil); +extern void nv50_init_surface_functions(struct nv50_context *); + +/* nv50_tex.c */ +void nv50_validate_textures(struct nv50_context *); +void nv50_validate_samplers(struct nv50_context *); + +struct pipe_sampler_view * +nv50_create_texture_view(struct pipe_context *, + struct pipe_resource *, + const struct pipe_sampler_view *, + uint32_t flags, + enum pipe_texture_target); +struct pipe_sampler_view * +nv50_create_sampler_view(struct pipe_context *, + struct pipe_resource *, + const struct pipe_sampler_view *); + +/* nv50_transfer.c */ +void +nv50_m2mf_transfer_rect(struct nv50_context *, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy); +void +nv50_sifc_linear_u8(struct nouveau_context *pipe, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data); +void +nv50_m2mf_copy_linear(struct nouveau_context *pipe, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size); +void +nv50_cb_push(struct nouveau_context *nv, + struct nouveau_bo *bo, unsigned domain, + unsigned base, unsigned size, + unsigned offset, unsigned words, const uint32_t *data); + +/* nv50_vbo.c */ +void nv50_draw_vbo(struct pipe_context *, const struct pipe_draw_info *); + +void * +nv50_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements); +void +nv50_vertex_state_delete(struct pipe_context *pipe, void *hwcso); + +void nv50_vertex_arrays_validate(struct nv50_context *nv50); + +/* nv50_push.c */ +void nv50_push_vbo(struct nv50_context *, const struct pipe_draw_info *); + +/* nv84_video.c */ +struct pipe_video_codec * +nv84_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ); + +struct pipe_video_buffer * +nv84_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *template); + +int +nv84_screen_get_video_param(struct pipe_screen *pscreen, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint, + enum pipe_video_cap param); + +boolean +nv84_screen_video_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint); + +/* nv98_video.c */ +struct pipe_video_codec * +nv98_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ); + +struct pipe_video_buffer * +nv98_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *template); + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_debug.h b/src/gallium/drivers/nouveau/nv50/nv50_debug.h new file mode 100644 index 00000000000..f3dee621519 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_debug.h @@ -0,0 +1,25 @@ + +#ifndef __NV50_DEBUG_H__ +#define __NV50_DEBUG_H__ + +#include <stdio.h> + +#include "util/u_debug.h" + +#define NV50_DEBUG_MISC 0x0001 +#define NV50_DEBUG_SHADER 0x0100 +#define NV50_DEBUG_PROG_IR 0x0200 +#define NV50_DEBUG_PROG_RA 0x0400 +#define NV50_DEBUG_PROG_CFLOW 0x0800 +#define NV50_DEBUG_PROG_ALL 0x1f00 + +#define NV50_DEBUG 0 + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args) + +#define NV50_DBGMSG(ch, args...) \ + if ((NV50_DEBUG) & (NV50_DEBUG_##ch)) \ + debug_printf(args) + +#endif /* __NV50_DEBUG_H__ */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h new file mode 100644 index 00000000000..2e42843fa56 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h @@ -0,0 +1,200 @@ +#ifndef NV50_DEFS_XML +#define NV50_DEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- rnndb/nv50_defs.xml ( 7783 bytes, from 2013-02-14 13:56:25) +- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nvchipsets.xml ( 3704 bytes, from 2012-08-18 12:48:55) + +Copyright (C) 2006-2013 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_VSTATUS_IDLE 0x00000000 +#define NV50_VSTATUS_BUSY 0x00000001 +#define NV50_VSTATUS_UNK2 0x00000002 +#define NV50_VSTATUS_WAITING 0x00000003 +#define NV50_VSTATUS_BLOCKED 0x00000005 +#define NV50_VSTATUS_FAULTED 0x00000006 +#define NV50_VSTATUS_PAUSED 0x00000007 +#define NV50_SURFACE_FORMAT_BITMAP 0x0000001c +#define NV50_SURFACE_FORMAT_UNK1D 0x0000001d +#define NV50_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0 +#define NV50_SURFACE_FORMAT_RGBA32_SINT 0x000000c1 +#define NV50_SURFACE_FORMAT_RGBA32_UINT 0x000000c2 +#define NV50_SURFACE_FORMAT_RGBX32_FLOAT 0x000000c3 +#define NV50_SURFACE_FORMAT_RGBX32_SINT 0x000000c4 +#define NV50_SURFACE_FORMAT_RGBX32_UINT 0x000000c5 +#define NV50_SURFACE_FORMAT_RGBA16_UNORM 0x000000c6 +#define NV50_SURFACE_FORMAT_RGBA16_SNORM 0x000000c7 +#define NV50_SURFACE_FORMAT_RGBA16_SINT 0x000000c8 +#define NV50_SURFACE_FORMAT_RGBA16_UINT 0x000000c9 +#define NV50_SURFACE_FORMAT_RGBA16_FLOAT 0x000000ca +#define NV50_SURFACE_FORMAT_RG32_FLOAT 0x000000cb +#define NV50_SURFACE_FORMAT_RG32_SINT 0x000000cc +#define NV50_SURFACE_FORMAT_RG32_UINT 0x000000cd +#define NV50_SURFACE_FORMAT_RGBX16_FLOAT 0x000000ce +#define NV50_SURFACE_FORMAT_BGRA8_UNORM 0x000000cf +#define NV50_SURFACE_FORMAT_BGRA8_SRGB 0x000000d0 +#define NV50_SURFACE_FORMAT_RGB10_A2_UNORM 0x000000d1 +#define NV50_SURFACE_FORMAT_RGB10_A2_UINT 0x000000d2 +#define NV50_SURFACE_FORMAT_RGBA8_UNORM 0x000000d5 +#define NV50_SURFACE_FORMAT_RGBA8_SRGB 0x000000d6 +#define NV50_SURFACE_FORMAT_RGBA8_SNORM 0x000000d7 +#define NV50_SURFACE_FORMAT_RGBA8_SINT 0x000000d8 +#define NV50_SURFACE_FORMAT_RGBA8_UINT 0x000000d9 +#define NV50_SURFACE_FORMAT_RG16_UNORM 0x000000da +#define NV50_SURFACE_FORMAT_RG16_SNORM 0x000000db +#define NV50_SURFACE_FORMAT_RG16_SINT 0x000000dc +#define NV50_SURFACE_FORMAT_RG16_UINT 0x000000dd +#define NV50_SURFACE_FORMAT_RG16_FLOAT 0x000000de +#define NV50_SURFACE_FORMAT_BGR10_A2_UNORM 0x000000df +#define NV50_SURFACE_FORMAT_R11G11B10_FLOAT 0x000000e0 +#define NV50_SURFACE_FORMAT_R32_SINT 0x000000e3 +#define NV50_SURFACE_FORMAT_R32_UINT 0x000000e4 +#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5 +#define NV50_SURFACE_FORMAT_BGRX8_UNORM 0x000000e6 +#define NV50_SURFACE_FORMAT_BGRX8_SRGB 0x000000e7 +#define NV50_SURFACE_FORMAT_B5G6R5_UNORM 0x000000e8 +#define NV50_SURFACE_FORMAT_BGR5_A1_UNORM 0x000000e9 +#define NV50_SURFACE_FORMAT_RG8_UNORM 0x000000ea +#define NV50_SURFACE_FORMAT_RG8_SNORM 0x000000eb +#define NV50_SURFACE_FORMAT_RG8_SINT 0x000000ec +#define NV50_SURFACE_FORMAT_RG8_UINT 0x000000ed +#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee +#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef +#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0 +#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1 +#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2 +#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3 +#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4 +#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5 +#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6 +#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7 +#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM 0x000000f8 +#define NV50_SURFACE_FORMAT_RGBX8_UNORM 0x000000f9 +#define NV50_SURFACE_FORMAT_RGBX8_SRGB 0x000000fa +#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFB 0x000000fb +#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFC 0x000000fc +#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFD 0x000000fd +#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFE 0x000000fe +#define NV50_SURFACE_FORMAT_Y32_UINT_UNKFF 0x000000ff +#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a +#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013 +#define NV50_ZETA_FORMAT_S8_Z24_UNORM 0x00000014 +#define NV50_ZETA_FORMAT_Z24_X8_UNORM 0x00000015 +#define NV50_ZETA_FORMAT_Z24_S8_UNORM 0x00000016 +#define NV50_ZETA_FORMAT_Z24_C8_UNORM 0x00000018 +#define NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT 0x00000019 +#define NV50_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d +#define NV50_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e +#define NV50_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f +#define NVE4_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002 +#define NVE4_IMAGE_FORMAT_RGBA32_SINT 0x00000003 +#define NVE4_IMAGE_FORMAT_RGBA32_UINT 0x00000004 +#define NVE4_IMAGE_FORMAT_RGBA16_UNORM 0x00000008 +#define NVE4_IMAGE_FORMAT_RGBA16_SNORM 0x00000009 +#define NVE4_IMAGE_FORMAT_RGBA16_SINT 0x0000000a +#define NVE4_IMAGE_FORMAT_RGBA16_UINT 0x0000000b +#define NVE4_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c +#define NVE4_IMAGE_FORMAT_RG32_FLOAT 0x0000000d +#define NVE4_IMAGE_FORMAT_RG32_SINT 0x0000000e +#define NVE4_IMAGE_FORMAT_RG32_UINT 0x0000000f +#define NVE4_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013 +#define NVE4_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015 +#define NVE4_IMAGE_FORMAT_RGBA8_UNORM 0x00000018 +#define NVE4_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a +#define NVE4_IMAGE_FORMAT_RGBA8_SINT 0x0000001b +#define NVE4_IMAGE_FORMAT_RGBA8_UINT 0x0000001c +#define NVE4_IMAGE_FORMAT_RG16_UNORM 0x0000001d +#define NVE4_IMAGE_FORMAT_RG16_SNORM 0x0000001e +#define NVE4_IMAGE_FORMAT_RG16_SINT 0x0000001f +#define NVE4_IMAGE_FORMAT_RG16_UINT 0x00000020 +#define NVE4_IMAGE_FORMAT_RG16_FLOAT 0x00000021 +#define NVE4_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024 +#define NVE4_IMAGE_FORMAT_R32_SINT 0x00000027 +#define NVE4_IMAGE_FORMAT_R32_UINT 0x00000028 +#define NVE4_IMAGE_FORMAT_R32_FLOAT 0x00000029 +#define NVE4_IMAGE_FORMAT_RG8_UNORM 0x0000002e +#define NVE4_IMAGE_FORMAT_RG8_SNORM 0x0000002f +#define NVE4_IMAGE_FORMAT_RG8_SINT 0x00000030 +#define NVE4_IMAGE_FORMAT_RG8_UINT 0x00000031 +#define NVE4_IMAGE_FORMAT_R16_UNORM 0x00000032 +#define NVE4_IMAGE_FORMAT_R16_SNORM 0x00000033 +#define NVE4_IMAGE_FORMAT_R16_SINT 0x00000034 +#define NVE4_IMAGE_FORMAT_R16_UINT 0x00000035 +#define NVE4_IMAGE_FORMAT_R16_FLOAT 0x00000036 +#define NVE4_IMAGE_FORMAT_R8_UNORM 0x00000037 +#define NVE4_IMAGE_FORMAT_R8_SNORM 0x00000038 +#define NVE4_IMAGE_FORMAT_R8_SINT 0x00000039 +#define NVE4_IMAGE_FORMAT_R8_UINT 0x0000003a +#define NV50_QUERY__SIZE 0x00000010 +#define NV50_QUERY_COUNTER 0x00000000 + +#define NV50_QUERY_RES 0x00000004 + +#define NV50_QUERY_TIME 0x00000008 + + +#endif /* NV50_DEFS_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_draw.c b/src/gallium/drivers/nouveau/nv50/nv50_draw.c new file mode 100644 index 00000000000..fa68cd8ee6a --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_draw.c @@ -0,0 +1,88 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "draw/draw_pipe.h" + +#include "nv50/nv50_context.h" + +struct nv50_render_stage { + struct draw_stage stage; + struct nv50_context *nv50; +}; + +static INLINE struct nv50_render_stage * +nv50_render_stage(struct draw_stage *stage) +{ + return (struct nv50_render_stage *)stage; +} + +static void +nv50_render_point(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_line(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_flush(struct draw_stage *stage, unsigned flags) +{ +} + +static void +nv50_render_reset_stipple_counter(struct draw_stage *stage) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_destroy(struct draw_stage *stage) +{ + FREE(stage); +} + +struct draw_stage * +nv50_draw_render_stage(struct nv50_context *nv50) +{ + struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage); + + rs->nv50 = nv50; + rs->stage.draw = nv50->draw; + rs->stage.destroy = nv50_render_destroy; + rs->stage.point = nv50_render_point; + rs->stage.line = nv50_render_line; + rs->stage.tri = nv50_render_tri; + rs->stage.flush = nv50_render_flush; + rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter; + + return &rs->stage; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c new file mode 100644 index 00000000000..0a7e812ba13 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c @@ -0,0 +1,504 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#if NOUVEAU_DRIVER == 0xc0 +# include "nvc0/nvc0_screen.h" +# include "nvc0/nvc0_3d.xml.h" +#else +# include "nv50/nv50_screen.h" +# include "nv50/nv50_3d.xml.h" +#endif +#include "nv50/nv50_texture.xml.h" +#include "nv50/nv50_defs.xml.h" + +#include "pipe/p_defines.h" + +/* Abbreviated usage masks: + * T: texturing + * R: render target + * B: render target, blendable + * C: render target (color), blendable only on nvc0 + * D: scanout/display target, blendable + * Z: depth/stencil + * V: vertex fetch + * I: image / surface, implies T + */ +#define U_V PIPE_BIND_VERTEX_BUFFER +#define U_T PIPE_BIND_SAMPLER_VIEW +#define U_I PIPE_BIND_SHADER_RESOURCE | PIPE_BIND_COMPUTE_RESOURCE +#define U_TR PIPE_BIND_RENDER_TARGET | U_T +#define U_IR U_TR | U_I +#define U_TB PIPE_BIND_BLENDABLE | U_TR +#define U_IB PIPE_BIND_BLENDABLE | U_IR +#define U_TD PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB +#define U_TZ PIPE_BIND_DEPTH_STENCIL | U_T +#define U_TV U_V | U_T +#define U_TRV U_V | U_TR +#define U_IRV U_V | U_IR +#define U_TBV U_V | U_TB +#define U_IBV U_V | U_IB +#define U_TDV U_V | U_TD +#if NOUVEAU_DRIVER == 0xc0 +# define U_TC U_TB +# define U_IC U_IB +# define U_TCV U_TBV +# define U_ICV U_IBV +# define U_tV U_TV +#else +# define U_TC U_TR +# define U_IC U_IR +# define U_TCV U_TRV +# define U_ICV U_IRV +# define U_tV U_V +#endif + +#define NV50_SURFACE_FORMAT_NONE 0 +#define NV50_ZETA_FORMAT_NONE 0 + +/* for vertex buffers: */ +#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8 +#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16 +#define NV50_TIC_0_FMT_32_32_32 NVC0_TIC_0_FMT_32_32_32 + +#if NOUVEAU_DRIVER == 0xc0 +# define NVXX_3D_VAF_SIZE(s) NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##s +# define NVXX_3D_VAF_TYPE(t) NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_##t +#else +# define NVXX_3D_VAF_SIZE(s) NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_##s +# define NVXX_3D_VAF_TYPE(t) NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_##t +#endif + +#define TBLENT_A_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u, br) \ + [PIPE_FORMAT_##pf] = { \ + sf, \ + (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \ + (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \ + (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \ + (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \ + (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ + (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ + (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ + (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ + NV50_TIC_0_FMT_##sz, \ + NVXX_3D_VAF_SIZE(sz) | \ + NVXX_3D_VAF_TYPE(t0) | (br << 31), \ + U_##u \ + } + +#define TBLENT_B_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \ + [PIPE_FORMAT_##pf] = { \ + sf, \ + (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \ + (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \ + (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \ + (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \ + (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ + (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ + (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ + (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ + NV50_TIC_0_FMT_##sz, 0, U_##u \ + } + +#define C4A(p, n, r, g, b, a, t, s, u, br) \ + TBLENT_A_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u, br) +#define C4B(p, n, r, g, b, a, t, s, u) \ + TBLENT_B_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u) + +#define ZXB(p, n, r, g, b, a, t, s, u) \ + TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \ + r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u) +#define ZSB(p, n, r, g, b, a, t, s, u) \ + TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \ + r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u) +#define SZB(p, n, r, g, b, a, t, s, u) \ + TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \ + r, g, b, ONE_FLOAT, UINT, t, UINT, UINT, s, u) + +#define F3A(p, n, r, g, b, a, t, s, u) \ + C4A(p, n, r, g, b, ONE_FLOAT, t, s, u, 0) +#define I3A(p, n, r, g, b, a, t, s, u) \ + C4A(p, n, r, g, b, ONE_INT, t, s, u, 0) +#define F3B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, r, g, b, ONE_FLOAT, t, s, u) +#define I3B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, r, g, b, ONE_INT, t, s, u) + +#define F2A(p, n, r, g, b, a, t, s, u) \ + C4A(p, n, r, g, ZERO, ONE_FLOAT, t, s, u, 0) +#define I2A(p, n, r, g, b, a, t, s, u) \ + C4A(p, n, r, g, ZERO, ONE_INT, t, s, u, 0) +#define F2B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, r, g, ZERO, ONE_FLOAT, t, s, u) +#define I2B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, r, g, ZERO, ONE_INT, t, s, u) + +#define F1A(p, n, r, g, b, a, t, s, u) \ + C4A(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u, 0) +#define I1A(p, n, r, g, b, a, t, s, u) \ + C4A(p, n, r, ZERO, ZERO, ONE_INT, t, s, u, 0) +#define F1B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u) +#define I1B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, r, ZERO, ZERO, ONE_INT, t, s, u) + +#define A1B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, ZERO, ZERO, ZERO, a, t, s, u) + +#if NOUVEAU_DRIVER == 0xc0 +const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = +#else +const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = +#endif +{ + C4A(B8G8R8A8_UNORM, BGRA8_UNORM, C2, C1, C0, C3, UNORM, 8_8_8_8, TDV, 1), + F3A(B8G8R8X8_UNORM, BGRX8_UNORM, C2, C1, C0, xx, UNORM, 8_8_8_8, TD), + C4A(B8G8R8A8_SRGB, BGRA8_SRGB, C2, C1, C0, C3, UNORM, 8_8_8_8, TD, 1), + F3A(B8G8R8X8_SRGB, BGRX8_SRGB, C2, C1, C0, xx, UNORM, 8_8_8_8, TD), + C4A(R8G8B8A8_UNORM, RGBA8_UNORM, C0, C1, C2, C3, UNORM, 8_8_8_8, IBV, 0), + F3A(R8G8B8X8_UNORM, RGBX8_UNORM, C0, C1, C2, xx, UNORM, 8_8_8_8, TB), + C4A(R8G8B8A8_SRGB, RGBA8_SRGB, C0, C1, C2, C3, UNORM, 8_8_8_8, TB, 0), + F3B(R8G8B8X8_SRGB, RGBX8_SRGB, C0, C1, C2, xx, UNORM, 8_8_8_8, TB), + + ZXB(Z16_UNORM, Z16_UNORM, C0, C0, C0, xx, UNORM, Z16, TZ), + ZXB(Z32_FLOAT, Z32_FLOAT, C0, C0, C0, xx, FLOAT, Z32, TZ), + ZXB(Z24X8_UNORM, Z24_X8_UNORM, C0, C0, C0, xx, UNORM, Z24_X8, TZ), + ZSB(Z24_UNORM_S8_UINT, Z24_S8_UNORM, C0, C0, C0, xx, UNORM, Z24_S8, TZ), + ZSB(X24S8_UINT, NONE, C1, C1, C1, xx, UNORM, Z24_S8, T), + SZB(S8_UINT_Z24_UNORM, S8_Z24_UNORM, C1, C1, C1, xx, UNORM, S8_Z24, TZ), + SZB(S8X24_UINT, NONE, C0, C0, C0, xx, UNORM, S8_Z24, T), + ZSB(Z32_FLOAT_S8X24_UINT, Z32_S8_X24_FLOAT, C0, C0, C0, xx, FLOAT, + Z32_S8_X24, TZ), + ZSB(X32_S8X24_UINT, NONE, C1, C1, C1, xx, FLOAT, Z32_S8_X24, T), + + F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, T), + C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TB), + F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TB), + C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T), + F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T), + F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T), + + C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2, + IBV, 0), + C4A(B10G10R10A2_UNORM, BGR10_A2_UNORM, C2, C1, C0, C3, UNORM, 10_10_10_2, + TBV, 1), + C4A(R10G10B10A2_SNORM, NONE, C0, C1, C2, C3, SNORM, 10_10_10_2, TV, 0), + C4A(B10G10R10A2_SNORM, NONE, C2, C1, C0, C3, SNORM, 10_10_10_2, TV, 1), + + F3B(R11G11B10_FLOAT, R11G11B10_FLOAT, C0, C1, C2, xx, FLOAT, 11_11_10, IB), + + F3B(L8_UNORM, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB), + F3B(L8_SRGB, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB), + F3B(L8_SNORM, R8_SNORM, C0, C0, C0, xx, SNORM, 8, TC), + I3B(L8_SINT, R8_SINT, C0, C0, C0, xx, SINT, 8, TR), + I3B(L8_UINT, R8_UINT, C0, C0, C0, xx, UINT, 8, TR), + F3B(L16_UNORM, R16_UNORM, C0, C0, C0, xx, UNORM, 16, TC), + F3B(L16_SNORM, R16_SNORM, C0, C0, C0, xx, SNORM, 16, TC), + F3B(L16_FLOAT, R16_FLOAT, C0, C0, C0, xx, FLOAT, 16, TB), + I3B(L16_SINT, R16_SINT, C0, C0, C0, xx, SINT, 16, TR), + I3B(L16_UINT, R16_UINT, C0, C0, C0, xx, UINT, 16, TR), + F3B(L32_FLOAT, R32_FLOAT, C0, C0, C0, xx, FLOAT, 32, TB), + I3B(L32_SINT, R32_SINT, C0, C0, C0, xx, SINT, 32, TR), + I3B(L32_UINT, R32_UINT, C0, C0, C0, xx, UINT, 32, TR), + + C4B(I8_UNORM, R8_UNORM, C0, C0, C0, C0, UNORM, 8, TR), + C4B(I8_SNORM, R8_SNORM, C0, C0, C0, C0, SNORM, 8, TR), + C4B(I8_SINT, R8_SINT, C0, C0, C0, C0, SINT, 8, TR), + C4B(I8_UINT, R8_UINT, C0, C0, C0, C0, UINT, 8, TR), + C4B(I16_UNORM, R16_UNORM, C0, C0, C0, C0, UNORM, 16, TR), + C4B(I16_SNORM, R16_SNORM, C0, C0, C0, C0, SNORM, 16, TR), + C4B(I16_FLOAT, R16_FLOAT, C0, C0, C0, C0, FLOAT, 16, TR), + C4B(I16_SINT, R16_SINT, C0, C0, C0, C0, SINT, 16, TR), + C4B(I16_UINT, R16_UINT, C0, C0, C0, C0, UINT, 16, TR), + C4B(I32_FLOAT, R32_FLOAT, C0, C0, C0, C0, FLOAT, 32, TR), + C4B(I32_SINT, R32_SINT, C0, C0, C0, C0, SINT, 32, TR), + C4B(I32_UINT, R32_UINT, C0, C0, C0, C0, UINT, 32, TR), + + A1B(A8_UNORM, A8_UNORM, xx, xx, xx, C0, UNORM, 8, TB), + A1B(A8_SNORM, R8_SNORM, xx, xx, xx, C0, SNORM, 8, T), + A1B(A8_SINT, R8_SINT, xx, xx, xx, C0, SINT, 8, T), + A1B(A8_UINT, R8_UINT, xx, xx, xx, C0, UINT, 8, T), + A1B(A16_UNORM, R16_UNORM, xx, xx, xx, C0, UNORM, 16, T), + A1B(A16_SNORM, R16_SNORM, xx, xx, xx, C0, SNORM, 16, T), + A1B(A16_FLOAT, R16_FLOAT, xx, xx, xx, C0, FLOAT, 16, T), + A1B(A16_SINT, R16_SINT, xx, xx, xx, C0, SINT, 16, T), + A1B(A16_UINT, R16_UINT, xx, xx, xx, C0, UINT, 16, T), + A1B(A32_FLOAT, R32_FLOAT, xx, xx, xx, C0, FLOAT, 32, T), + A1B(A32_SINT, R32_SINT, xx, xx, xx, C0, SINT, 32, T), + A1B(A32_UINT, R32_UINT, xx, xx, xx, C0, UINT, 32, T), + + C4B(L4A4_UNORM, NONE, C0, C0, C0, C1, UNORM, 4_4, T), + C4B(L8A8_UNORM, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T), + C4B(L8A8_SNORM, RG8_SNORM, C0, C0, C0, C1, SNORM, 8_8, T), + C4B(L8A8_SRGB, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T), + C4B(L8A8_SINT, RG8_SINT, C0, C0, C0, C1, SINT, 8_8, T), + C4B(L8A8_UINT, RG8_UINT, C0, C0, C0, C1, UINT, 8_8, T), + C4B(L16A16_UNORM, RG16_UNORM, C0, C0, C0, C1, UNORM, 16_16, T), + C4B(L16A16_SNORM, RG16_SNORM, C0, C0, C0, C1, SNORM, 16_16, T), + C4B(L16A16_FLOAT, RG16_FLOAT, C0, C0, C0, C1, FLOAT, 16_16, T), + C4B(L16A16_SINT, RG16_SINT, C0, C0, C0, C1, SINT, 16_16, T), + C4B(L16A16_UINT, RG16_UINT, C0, C0, C0, C1, UINT, 16_16, T), + C4B(L32A32_FLOAT, RG32_FLOAT, C0, C0, C0, C1, FLOAT, 32_32, T), + C4B(L32A32_SINT, RG32_SINT, C0, C0, C0, C1, SINT, 32_32, T), + C4B(L32A32_UINT, RG32_UINT, C0, C0, C0, C1, UINT, 32_32, T), + + F3B(DXT1_RGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T), + F3B(DXT1_SRGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T), + C4B(DXT1_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T), + C4B(DXT1_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T), + C4B(DXT3_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T), + C4B(DXT3_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T), + C4B(DXT5_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T), + C4B(DXT5_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T), + + F1B(RGTC1_UNORM, NONE, C0, xx, xx, xx, UNORM, RGTC1, T), + F1B(RGTC1_SNORM, NONE, C0, xx, xx, xx, SNORM, RGTC1, T), + F2B(RGTC2_UNORM, NONE, C0, C1, xx, xx, UNORM, RGTC2, T), + F2B(RGTC2_SNORM, NONE, C0, C1, xx, xx, SNORM, RGTC2, T), + F3B(LATC1_UNORM, NONE, C0, C0, C0, xx, UNORM, RGTC1, T), + F3B(LATC1_SNORM, NONE, C0, C0, C0, xx, SNORM, RGTC1, T), + C4B(LATC2_UNORM, NONE, C0, C0, C0, C1, UNORM, RGTC2, T), + C4B(LATC2_SNORM, NONE, C0, C0, C0, C1, SNORM, RGTC2, T), + + C4A(R32G32B32A32_FLOAT, RGBA32_FLOAT, C0, C1, C2, C3, FLOAT, 32_32_32_32, + IBV, 0), + C4A(R32G32B32A32_UNORM, NONE, C0, C1, C2, C3, UNORM, 32_32_32_32, TV, 0), + C4A(R32G32B32A32_SNORM, NONE, C0, C1, C2, C3, SNORM, 32_32_32_32, TV, 0), + C4A(R32G32B32A32_SINT, RGBA32_SINT, C0, C1, C2, C3, SINT, 32_32_32_32, + IRV, 0), + C4A(R32G32B32A32_UINT, RGBA32_UINT, C0, C1, C2, C3, UINT, 32_32_32_32, + IRV, 0), + F3B(R32G32B32X32_FLOAT, RGBX32_FLOAT, C0, C1, C2, xx, FLOAT, 32_32_32_32, TB), + I3B(R32G32B32X32_SINT, RGBX32_SINT, C0, C1, C2, xx, SINT, 32_32_32_32, TR), + I3B(R32G32B32X32_UINT, RGBX32_UINT, C0, C1, C2, xx, UINT, 32_32_32_32, TR), + + F2A(R32G32_FLOAT, RG32_FLOAT, C0, C1, xx, xx, FLOAT, 32_32, IBV), + F2A(R32G32_UNORM, NONE, C0, C1, xx, xx, UNORM, 32_32, TV), + F2A(R32G32_SNORM, NONE, C0, C1, xx, xx, SNORM, 32_32, TV), + I2A(R32G32_SINT, RG32_SINT, C0, C1, xx, xx, SINT, 32_32, IRV), + I2A(R32G32_UINT, RG32_UINT, C0, C1, xx, xx, UINT, 32_32, IRV), + + F1A(R32_FLOAT, R32_FLOAT, C0, xx, xx, xx, FLOAT, 32, IBV), + F1A(R32_UNORM, NONE, C0, xx, xx, xx, UNORM, 32, TV), + F1A(R32_SNORM, NONE, C0, xx, xx, xx, SNORM, 32, TV), + I1A(R32_SINT, R32_SINT, C0, xx, xx, xx, SINT, 32, IRV), + I1A(R32_UINT, R32_UINT, C0, xx, xx, xx, UINT, 32, IRV), + + C4A(R16G16B16A16_FLOAT, RGBA16_FLOAT, C0, C1, C2, C3, FLOAT, 16_16_16_16, + IBV, 0), + C4A(R16G16B16A16_UNORM, RGBA16_UNORM, C0, C1, C2, C3, UNORM, 16_16_16_16, + ICV, 0), + C4A(R16G16B16A16_SNORM, RGBA16_SNORM, C0, C1, C2, C3, SNORM, 16_16_16_16, + ICV, 0), + C4A(R16G16B16A16_SINT, RGBA16_SINT, C0, C1, C2, C3, SINT, 16_16_16_16, + IRV, 0), + C4A(R16G16B16A16_UINT, RGBA16_UINT, C0, C1, C2, C3, UINT, 16_16_16_16, + IRV, 0), + F3B(R16G16B16X16_FLOAT, RGBX16_FLOAT, C0, C1, C2, xx, FLOAT, 16_16_16_16, TB), + F3B(R16G16B16X16_UNORM, RGBA16_UNORM, C0, C1, C2, xx, UNORM, 16_16_16_16, T), + F3B(R16G16B16X16_SNORM, RGBA16_SNORM, C0, C1, C2, xx, SNORM, 16_16_16_16, T), + I3B(R16G16B16X16_SINT, RGBA16_SINT, C0, C1, C2, xx, SINT, 16_16_16_16, T), + I3B(R16G16B16X16_UINT, RGBA16_UINT, C0, C1, C2, xx, UINT, 16_16_16_16, T), + + F2A(R16G16_FLOAT, RG16_FLOAT, C0, C1, xx, xx, FLOAT, 16_16, IBV), + F2A(R16G16_UNORM, RG16_UNORM, C0, C1, xx, xx, UNORM, 16_16, ICV), + F2A(R16G16_SNORM, RG16_SNORM, C0, C1, xx, xx, SNORM, 16_16, ICV), + I2A(R16G16_SINT, RG16_SINT, C0, C1, xx, xx, SINT, 16_16, IRV), + I2A(R16G16_UINT, RG16_UINT, C0, C1, xx, xx, UINT, 16_16, IRV), + + F1A(R16_FLOAT, R16_FLOAT, C0, xx, xx, xx, FLOAT, 16, IBV), + F1A(R16_UNORM, R16_UNORM, C0, xx, xx, xx, UNORM, 16, ICV), + F1A(R16_SNORM, R16_SNORM, C0, xx, xx, xx, SNORM, 16, ICV), + I1A(R16_SINT, R16_SINT, C0, xx, xx, xx, SINT, 16, IRV), + I1A(R16_UINT, R16_UINT, C0, xx, xx, xx, UINT, 16, IRV), + + C4A(R8G8B8A8_SNORM, RGBA8_SNORM, C0, C1, C2, C3, SNORM, 8_8_8_8, ICV, 0), + C4A(R8G8B8A8_SINT, RGBA8_SINT, C0, C1, C2, C3, SINT, 8_8_8_8, IRV, 0), + C4A(R8G8B8A8_UINT, RGBA8_UINT, C0, C1, C2, C3, UINT, 8_8_8_8, IRV, 0), + F3B(R8G8B8X8_SNORM, RGBA8_SNORM, C0, C1, C2, xx, SNORM, 8_8_8_8, T), + I3B(R8G8B8X8_SINT, RGBA8_SINT, C0, C1, C2, xx, SINT, 8_8_8_8, T), + I3B(R8G8B8X8_UINT, RGBA8_UINT, C0, C1, C2, xx, UINT, 8_8_8_8, T), + + F2A(R8G8_UNORM, RG8_UNORM, C0, C1, xx, xx, UNORM, 8_8, IBV), + F2A(R8G8_SNORM, RG8_SNORM, C0, C1, xx, xx, SNORM, 8_8, ICV), + I2A(R8G8_SINT, RG8_SINT, C0, C1, xx, xx, SINT, 8_8, IRV), + I2A(R8G8_UINT, RG8_UINT, C0, C1, xx, xx, UINT, 8_8, IRV), + + F1A(R8_UNORM, R8_UNORM, C0, xx, xx, xx, UNORM, 8, IBV), + F1A(R8_SNORM, R8_SNORM, C0, xx, xx, xx, SNORM, 8, ICV), + I1A(R8_SINT, R8_SINT, C0, xx, xx, xx, SINT, 8, IRV), + I1A(R8_UINT, R8_UINT, C0, xx, xx, xx, UINT, 8, IRV), + + F3B(R8G8_B8G8_UNORM, NONE, C0, C1, C2, xx, UNORM, U8_YA8_V8_YB8, T), + F3B(G8R8_B8R8_UNORM, NONE, C1, C0, C2, xx, UNORM, U8_YA8_V8_YB8, T), + F3B(G8R8_G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, YA8_U8_YB8_V8, T), + F3B(R8G8_R8B8_UNORM, NONE, C1, C0, C2, xx, UNORM, YA8_U8_YB8_V8, T), + + F1B(R1_UNORM, BITMAP, C0, xx, xx, xx, UNORM, BITMAP, T), + + C4B(R4A4_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 4_4, T), + C4B(R8A8_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 8_8, T), + C4B(A4R4_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 4_4, T), + C4B(A8R8_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 8_8, T), + + TBLENT_B_(R8SG8SB8UX8U_NORM, 0, + C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 8_8_8_8, T), + TBLENT_B_(R5SG5SB6U_NORM, 0, + C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 5_5_6, T), + + /* vertex-only formats: */ + + C4A(R32G32B32A32_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 32_32_32_32, V, 0), + C4A(R32G32B32A32_USCALED, NONE, C0, C1, C2, C3, USCALED, 32_32_32_32, V, 0), + F3A(R32G32B32_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, tV), + F3A(R32G32B32_UNORM, NONE, C0, C1, C2, xx, UNORM, 32_32_32, V), + F3A(R32G32B32_SNORM, NONE, C0, C1, C2, xx, SNORM, 32_32_32, V), + I3A(R32G32B32_SINT, NONE, C0, C1, C2, xx, SINT, 32_32_32, tV), + I3A(R32G32B32_UINT, NONE, C0, C1, C2, xx, UINT, 32_32_32, tV), + F3A(R32G32B32_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 32_32_32, V), + F3A(R32G32B32_USCALED, NONE, C0, C1, C2, xx, USCALED, 32_32_32, V), + F2A(R32G32_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 32_32, V), + F2A(R32G32_USCALED, NONE, C0, C1, xx, xx, USCALED, 32_32, V), + F1A(R32_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 32, V), + F1A(R32_USCALED, NONE, C0, xx, xx, xx, USCALED, 32, V), + + C4A(R16G16B16A16_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 16_16_16_16, V, 0), + C4A(R16G16B16A16_USCALED, NONE, C0, C1, C2, C3, USCALED, 16_16_16_16, V, 0), + F3A(R16G16B16_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 16_16_16, V), + F3A(R16G16B16_UNORM, NONE, C0, C1, C2, xx, UNORM, 16_16_16, V), + F3A(R16G16B16_SNORM, NONE, C0, C1, C2, xx, SNORM, 16_16_16, V), + I3A(R16G16B16_SINT, NONE, C0, C1, C2, xx, SINT, 16_16_16, V), + I3A(R16G16B16_UINT, NONE, C0, C1, C2, xx, UINT, 16_16_16, V), + F3A(R16G16B16_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 16_16_16, V), + F3A(R16G16B16_USCALED, NONE, C0, C1, C2, xx, USCALED, 16_16_16, V), + F2A(R16G16_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 16_16, V), + F2A(R16G16_USCALED, NONE, C0, C1, xx, xx, USCALED, 16_16, V), + F1A(R16_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 16, V), + F1A(R16_USCALED, NONE, C0, xx, xx, xx, USCALED, 16, V), + + C4A(R8G8B8A8_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 8_8_8_8, V, 0), + C4A(R8G8B8A8_USCALED, NONE, C0, C1, C2, C3, USCALED, 8_8_8_8, V, 0), + F3A(R8G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, 8_8_8, V), + F3A(R8G8B8_SNORM, NONE, C0, C1, C2, xx, SNORM, 8_8_8, V), + I2A(R8G8B8_SINT, NONE, C0, C1, C2, xx, SINT, 8_8_8, V), + I2A(R8G8B8_UINT, NONE, C0, C1, C2, xx, UINT, 8_8_8, V), + F3A(R8G8B8_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 8_8_8, V), + F3A(R8G8B8_USCALED, NONE, C0, C1, C2, xx, USCALED, 8_8_8, V), + F2A(R8G8_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 8_8, V), + F2A(R8G8_USCALED, NONE, C0, C1, xx, xx, USCALED, 8_8, V), + F1A(R8_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 8, V), + F1A(R8_USCALED, NONE, C0, xx, xx, xx, USCALED, 8, V), + + /* FIXED types: not supported natively, converted on VBO push */ + + C4B(R32G32B32A32_FIXED, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V), + F3B(R32G32B32_FIXED, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V), + F2B(R32G32_FIXED, NONE, C0, C1, xx, xx, FLOAT, 32_32, V), + F1B(R32_FIXED, NONE, C0, xx, xx, xx, FLOAT, 32, V), + + C4B(R64G64B64A64_FLOAT, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V), + F3B(R64G64B64_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V), + F2B(R64G64_FLOAT, NONE, C0, C1, xx, xx, FLOAT, 32_32, V), + F1B(R64_FLOAT, NONE, C0, xx, xx, xx, FLOAT, 32, V), +}; + +#if 0 +const uint8_t nv50_rt_format_map[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_Z16_UNORM] = NV50_ZETA_FORMAT_Z16_UNORM, + [PIPE_FORMAT_Z24X8_UNORM] = NV50_ZETA_FORMAT_Z24_X8_UNORM, + [PIPE_FORMAT_Z24_UNORM_S8_UINT] = NV50_ZETA_FORMAT_Z24_S8_UNORM, + [PIPE_FORMAT_S8_UINT_Z24_UNORM] = NV50_ZETA_FORMAT_S8_Z24_UNORM, + [PIPE_FORMAT_Z32_FLOAT] = NV50_ZETA_FORMAT_Z32_FLOAT, + [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT, + + [PIPE_FORMAT_R1_UNORM] = NV50_SURFACE_FORMAT_BITMAP, + + [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV50_SURFACE_FORMAT_RGBA32_FLOAT, + [PIPE_FORMAT_R32G32B32X32_FLOAT] = NV50_SURFACE_FORMAT_RGBX32_FLOAT, + [PIPE_FORMAT_R32G32B32A32_SINT] = NV50_SURFACE_FORMAT_RGBA32_SINT, + [PIPE_FORMAT_R32G32B32X32_SINT] = NV50_SURFACE_FORMAT_RGBX32_SINT, + [PIPE_FORMAT_R32G32B32A32_UINT] = NV50_SURFACE_FORMAT_RGBA32_UINT, + [PIPE_FORMAT_R32G32B32X32_UINT] = NV50_SURFACE_FORMAT_RGBX32_UINT, + + [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV50_SURFACE_FORMAT_RGBA16_FLOAT, + [PIPE_FORMAT_R16G16B16X16_FLOAT] = NV50_SURFACE_FORMAT_RGBX16_FLOAT, + [PIPE_FORMAT_R16G16B16A16_UNORM] = NV50_SURFACE_FORMAT_RGBA16_UNORM, + [PIPE_FORMAT_R16G16B16A16_SNORM] = NV50_SURFACE_FORMAT_RGBA16_SNORM, + [PIPE_FORMAT_R16G16B16A16_SINT] = NV50_SURFACE_FORMAT_RGBA16_SINT, + [PIPE_FORMAT_R16G16B16A16_UINT] = NV50_SURFACE_FORMAT_RGBA16_UINT, + + [PIPE_FORMAT_B8G8R8A8_UNORM] = NV50_SURFACE_FORMAT_BGRA8_UNORM, + [PIPE_FORMAT_R8G8B8A8_UNORM] = NV50_SURFACE_FORMAT_RGBA8_UNORM, + [PIPE_FORMAT_B8G8R8X8_UNORM] = NV50_SURFACE_FORMAT_BGRX8_UNORM, + [PIPE_FORMAT_R8G8B8X8_UNORM] = NV50_SURFACE_FORMAT_RGBX8_UNORM, + [PIPE_FORMAT_B8G8R8A8_SRGB] = NV50_SURFACE_FORMAT_BGRA8_SRGB, + [PIPE_FORMAT_R8G8B8A8_SRGB] = NV50_SURFACE_FORMAT_RGBA8_SRGB, + [PIPE_FORMAT_B8G8R8X8_SRGB] = NV50_SURFACE_FORMAT_BGRX8_SRGB, + [PIPE_FORMAT_R8G8B8X8_SRGB] = NV50_SURFACE_FORMAT_RGBX8_SRGB, + [PIPE_FORMAT_R8G8B8A8_SNORM] = NV50_SURFACE_FORMAT_RGBA8_SNORM, + [PIPE_FORMAT_R8G8B8A8_SINT] = NV50_SURFACE_FORMAT_RGBA8_SINT, + [PIPE_FORMAT_R8G8B8A8_UINT] = NV50_SURFACE_FORMAT_RGBA8_UINT, + + [PIPE_FORMAT_R11G11B10_FLOAT] = NV50_SURFACE_FORMAT_R11G11B10_FLOAT, + + [PIPE_FORMAT_B10G10R10A2_UNORM] = NV50_SURFACE_FORMAT_BGR10_A2_UNORM, + [PIPE_FORMAT_R10G10B10A2_UNORM] = NV50_SURFACE_FORMAT_RGB10_A2_UNORM, + [PIPE_FORMAT_R10G10B10A2_UINT] = NV50_SURFACE_FORMAT_RGB10_A2_UINT, + + [PIPE_FORMAT_B5G6R5_UNORM] = NV50_SURFACE_FORMAT_B5G6R5_UNORM, + + [PIPE_FORMAT_B5G5R5A1_UNORM] = NV50_SURFACE_FORMAT_BGR5_A1_UNORM, + [PIPE_FORMAT_B5G5R5X1_UNORM] = NV50_SURFACE_FORMAT_BGR5_X1_UNORM, + + [PIPE_FORMAT_R32G32_FLOAT] = NV50_SURFACE_FORMAT_RG32_FLOAT, + [PIPE_FORMAT_R32G32_SINT] = NV50_SURFACE_FORMAT_RG32_SINT, + [PIPE_FORMAT_R32G32_UINT] = NV50_SURFACE_FORMAT_RG32_UINT, + + [PIPE_FORMAT_R16G16_FLOAT] = NV50_SURFACE_FORMAT_RG16_FLOAT, + [PIPE_FORMAT_R16G16_UNORM] = NV50_SURFACE_FORMAT_RG16_UNORM, + [PIPE_FORMAT_R16G16_SNORM] = NV50_SURFACE_FORMAT_RG16_SNORM, + [PIPE_FORMAT_R16G16_SINT] = NV50_SURFACE_FORMAT_RG16_SINT, + [PIPE_FORMAT_R16G16_UINT] = NV50_SURFACE_FORMAT_RG16_UINT, + + [PIPE_FORMAT_R8G8_UNORM] = NV50_SURFACE_FORMAT_RG8_UNORM, + [PIPE_FORMAT_R8G8_SNORM] = NV50_SURFACE_FORMAT_RG8_SNORM, + [PIPE_FORMAT_R8G8_SINT] = NV50_SURFACE_FORMAT_RG8_SINT, + [PIPE_FORMAT_R8G8_UINT] = NV50_SURFACE_FORMAT_RG8_UINT, + + [PIPE_FORMAT_R32_FLOAT] = NV50_SURFACE_FORMAT_R32_FLOAT, + [PIPE_FORMAT_R32_SINT] = NV50_SURFACE_FORMAT_R32_SINT, + [PIPE_FORMAT_R32_UINT] = NV50_SURFACE_FORMAT_R32_UINT, + + [PIPE_FORMAT_R16_FLOAT] = NV50_SURFACE_FORMAT_R16_FLOAT, + [PIPE_FORMAT_R16_UNORM] = NV50_SURFACE_FORMAT_R16_UNORM, + [PIPE_FORMAT_R16_SNORM] = NV50_SURFACE_FORMAT_R16_SNORM, + [PIPE_FORMAT_R16_SINT] = NV50_SURFACE_FORMAT_R16_SINT, + [PIPE_FORMAT_R16_UINT] = NV50_SURFACE_FORMAT_R16_UINT, + + [PIPE_FORMAT_R8_UNORM] = NV50_SURFACE_FORMAT_R8_UNORM, + [PIPE_FORMAT_R8_SNORM] = NV50_SURFACE_FORMAT_R8_SNORM, + [PIPE_FORMAT_R8_SINT] = NV50_SURFACE_FORMAT_R8_SINT, + [PIPE_FORMAT_R8_UINT] = NV50_SURFACE_FORMAT_R8_UINT, + + [PIPE_FORMAT_A8_UNORM] = NV50_SURFACE_FORMAT_A8_UNORM +}; +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c new file mode 100644 index 00000000000..513d8f96aac --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c @@ -0,0 +1,498 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nv50/nv50_context.h" +#include "nv50/nv50_resource.h" + +uint32_t +nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz) +{ + uint32_t tile_mode = 0x000; + + if (ny > 64) tile_mode = 0x040; /* height 128 tiles */ + else + if (ny > 32) tile_mode = 0x030; /* height 64 tiles */ + else + if (ny > 16) tile_mode = 0x020; /* height 32 tiles */ + else + if (ny > 8) tile_mode = 0x010; /* height 16 tiles */ + + if (nz == 1) + return tile_mode; + else + if (tile_mode > 0x020) + tile_mode = 0x020; + + if (nz > 16 && tile_mode < 0x020) + return tile_mode | 0x500; /* depth 32 tiles */ + if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */ + if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */ + if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */ + + return tile_mode | 0x100; +} + +static uint32_t +nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz) +{ + return nv50_tex_choose_tile_dims_helper(nx, ny * 2, nz); +} + +static uint32_t +nv50_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed) +{ + const unsigned ms = mt->ms_x + mt->ms_y; + + uint32_t tile_flags; + + if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR)) + return 0; + if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR)) + return 0; + + switch (mt->base.base.format) { + case PIPE_FORMAT_Z16_UNORM: + tile_flags = 0x6c + ms; + break; + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + tile_flags = 0x18 + ms; + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + tile_flags = 0x128 + ms; + break; + case PIPE_FORMAT_Z32_FLOAT: + tile_flags = 0x40 + ms; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + tile_flags = 0x60 + ms; + break; + default: + switch (util_format_get_blocksizebits(mt->base.base.format)) { + case 128: + assert(ms < 3); + tile_flags = 0x74; + break; + case 64: + switch (ms) { + case 2: tile_flags = 0xfc; break; + case 3: tile_flags = 0xfd; break; + default: + tile_flags = 0x70; + break; + } + break; + case 32: + if (mt->base.base.bind & PIPE_BIND_SCANOUT) { + assert(ms == 0); + tile_flags = 0x7a; + } else { + switch (ms) { + case 2: tile_flags = 0xf8; break; + case 3: tile_flags = 0xf9; break; + default: + tile_flags = 0x70; + break; + } + } + break; + case 16: + case 8: + tile_flags = 0x70; + break; + default: + return 0; + } + if (mt->base.base.bind & PIPE_BIND_CURSOR) + tile_flags = 0; + } + + if (!compressed) + tile_flags &= ~0x180; + + return tile_flags; +} + +void +nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt) +{ + struct nv50_miptree *mt = nv50_miptree(pt); + + nouveau_bo_ref(NULL, &mt->base.bo); + + nouveau_fence_ref(NULL, &mt->base.fence); + nouveau_fence_ref(NULL, &mt->base.fence_wr); + + NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_count, -1); + NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_bytes, + -(uint64_t)mt->total_size); + + FREE(mt); +} + +boolean +nv50_miptree_get_handle(struct pipe_screen *pscreen, + struct pipe_resource *pt, + struct winsys_handle *whandle) +{ + struct nv50_miptree *mt = nv50_miptree(pt); + unsigned stride; + + if (!mt || !mt->base.bo) + return FALSE; + + stride = mt->level[0].pitch; + + return nouveau_screen_bo_get_handle(pscreen, + mt->base.bo, + stride, + whandle); +} + +const struct u_resource_vtbl nv50_miptree_vtbl = +{ + nv50_miptree_get_handle, /* get_handle */ + nv50_miptree_destroy, /* resource_destroy */ + nv50_miptree_transfer_map, /* transfer_map */ + u_default_transfer_flush_region, /* transfer_flush_region */ + nv50_miptree_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + +static INLINE boolean +nv50_miptree_init_ms_mode(struct nv50_miptree *mt) +{ + switch (mt->base.base.nr_samples) { + case 8: + mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS8; + mt->ms_x = 2; + mt->ms_y = 1; + break; + case 4: + mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS4; + mt->ms_x = 1; + mt->ms_y = 1; + break; + case 2: + mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS2; + mt->ms_x = 1; + break; + case 1: + case 0: + mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1; + break; + default: + NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples); + return FALSE; + } + return TRUE; +} + +boolean +nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align) +{ + struct pipe_resource *pt = &mt->base.base; + const unsigned blocksize = util_format_get_blocksize(pt->format); + unsigned h = pt->height0; + + if (util_format_is_depth_or_stencil(pt->format)) + return FALSE; + + if ((pt->last_level > 0) || (pt->depth0 > 1) || (pt->array_size > 1)) + return FALSE; + if (mt->ms_x | mt->ms_y) + return FALSE; + + mt->level[0].pitch = align(pt->width0 * blocksize, pitch_align); + + /* Account for very generous prefetch (allocate size as if tiled). */ + h = MAX2(h, 8); + h = util_next_power_of_two(h); + + mt->total_size = mt->level[0].pitch * h; + + return TRUE; +} + +static void +nv50_miptree_init_layout_video(struct nv50_miptree *mt) +{ + const struct pipe_resource *pt = &mt->base.base; + const unsigned blocksize = util_format_get_blocksize(pt->format); + + assert(pt->last_level == 0); + assert(mt->ms_x == 0 && mt->ms_y == 0); + assert(!util_format_is_compressed(pt->format)); + + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; + + mt->level[0].tile_mode = 0x20; + mt->level[0].pitch = align(pt->width0 * blocksize, 64); + mt->total_size = align(pt->height0, 16) * mt->level[0].pitch * (mt->layout_3d ? pt->depth0 : 1); + + if (pt->array_size > 1) { + mt->layer_stride = align(mt->total_size, NV50_TILE_SIZE(0x20)); + mt->total_size = mt->layer_stride * pt->array_size; + } +} + +static void +nv50_miptree_init_layout_tiled(struct nv50_miptree *mt) +{ + struct pipe_resource *pt = &mt->base.base; + unsigned w, h, d, l; + const unsigned blocksize = util_format_get_blocksize(pt->format); + + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; + + w = pt->width0 << mt->ms_x; + h = pt->height0 << mt->ms_y; + + /* For 3D textures, a mipmap is spanned by all the layers, for array + * textures and cube maps, each layer contains its own mipmaps. + */ + d = mt->layout_3d ? pt->depth0 : 1; + + for (l = 0; l <= pt->last_level; ++l) { + struct nv50_miptree_level *lvl = &mt->level[l]; + unsigned tsx, tsy, tsz; + unsigned nbx = util_format_get_nblocksx(pt->format, w); + unsigned nby = util_format_get_nblocksy(pt->format, h); + + lvl->offset = mt->total_size; + + lvl->tile_mode = nv50_tex_choose_tile_dims(nbx, nby, d); + + tsx = NV50_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */ + tsy = NV50_TILE_SIZE_Y(lvl->tile_mode); + tsz = NV50_TILE_SIZE_Z(lvl->tile_mode); + + lvl->pitch = align(nbx * blocksize, tsx); + + mt->total_size += lvl->pitch * align(nby, tsy) * align(d, tsz); + + w = u_minify(w, 1); + h = u_minify(h, 1); + d = u_minify(d, 1); + } + + if (pt->array_size > 1) { + mt->layer_stride = align(mt->total_size, + NV50_TILE_SIZE(mt->level[0].tile_mode)); + mt->total_size = mt->layer_stride * pt->array_size; + } +} + +struct pipe_resource * +nv50_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ) +{ + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); + struct pipe_resource *pt = &mt->base.base; + int ret; + union nouveau_bo_config bo_config; + uint32_t bo_flags; + + if (!mt) + return NULL; + + mt->base.vtbl = &nv50_miptree_vtbl; + *pt = *templ; + pipe_reference_init(&pt->reference, 1); + pt->screen = pscreen; + + if (pt->bind & PIPE_BIND_LINEAR) + pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR; + + bo_config.nv50.memtype = nv50_mt_choose_storage_type(mt, TRUE); + + if (!nv50_miptree_init_ms_mode(mt)) { + FREE(mt); + return NULL; + } + + if (unlikely(pt->flags & NV50_RESOURCE_FLAG_VIDEO)) { + nv50_miptree_init_layout_video(mt); + if (pt->flags & NV50_RESOURCE_FLAG_NOALLOC) { + /* BO allocation done by client */ + return pt; + } + } else + if (bo_config.nv50.memtype != 0) { + nv50_miptree_init_layout_tiled(mt); + } else + if (!nv50_miptree_init_layout_linear(mt, 64)) { + FREE(mt); + return NULL; + } + bo_config.nv50.tile_mode = mt->level[0].tile_mode; + + bo_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP; + if (mt->base.base.bind & (PIPE_BIND_CURSOR | PIPE_BIND_DISPLAY_TARGET)) + bo_flags |= NOUVEAU_BO_CONTIG; + + ret = nouveau_bo_new(dev, bo_flags, 4096, mt->total_size, &bo_config, + &mt->base.bo); + if (ret) { + FREE(mt); + return NULL; + } + mt->base.domain = NOUVEAU_BO_VRAM; + mt->base.address = mt->base.bo->offset; + + return pt; +} + +struct pipe_resource * +nv50_miptree_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + struct nv50_miptree *mt; + unsigned stride; + + /* only supports 2D, non-mipmapped textures for the moment */ + if ((templ->target != PIPE_TEXTURE_2D && + templ->target != PIPE_TEXTURE_RECT) || + templ->last_level != 0 || + templ->depth0 != 1 || + templ->array_size > 1) + return NULL; + + mt = CALLOC_STRUCT(nv50_miptree); + if (!mt) + return NULL; + + mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride); + if (mt->base.bo == NULL) { + FREE(mt); + return NULL; + } + mt->base.domain = NOUVEAU_BO_VRAM; + mt->base.address = mt->base.bo->offset; + + mt->base.base = *templ; + mt->base.vtbl = &nv50_miptree_vtbl; + pipe_reference_init(&mt->base.base.reference, 1); + mt->base.base.screen = pscreen; + mt->level[0].pitch = stride; + mt->level[0].offset = 0; + mt->level[0].tile_mode = mt->base.bo->config.nv50.tile_mode; + + /* no need to adjust bo reference count */ + return &mt->base.base; +} + + +/* Offset of zslice @z from start of level @l. */ +INLINE unsigned +nv50_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z) +{ + const struct pipe_resource *pt = &mt->base.base; + + unsigned tds = NV50_TILE_SHIFT_Z(mt->level[l].tile_mode); + unsigned ths = NV50_TILE_SHIFT_Y(mt->level[l].tile_mode); + + unsigned nby = util_format_get_nblocksy(pt->format, + u_minify(pt->height0, l)); + + /* to next 2D tile slice within a 3D tile */ + unsigned stride_2d = NV50_TILE_SIZE_2D(mt->level[l].tile_mode); + + /* to slice in the next (in z direction) 3D tile */ + unsigned stride_3d = (align(nby, (1 << ths)) * mt->level[l].pitch) << tds; + + return (z & ((1 << tds) - 1)) * stride_2d + (z >> tds) * stride_3d; +} + +/* Surface functions. + */ + +struct nv50_surface * +nv50_surface_from_miptree(struct nv50_miptree *mt, + const struct pipe_surface *templ) +{ + struct pipe_surface *ps; + struct nv50_surface *ns = CALLOC_STRUCT(nv50_surface); + if (!ns) + return NULL; + ps = &ns->base; + + pipe_reference_init(&ps->reference, 1); + pipe_resource_reference(&ps->texture, &mt->base.base); + + ps->format = templ->format; + ps->writable = templ->writable; + ps->u.tex.level = templ->u.tex.level; + ps->u.tex.first_layer = templ->u.tex.first_layer; + ps->u.tex.last_layer = templ->u.tex.last_layer; + + ns->width = u_minify(mt->base.base.width0, ps->u.tex.level); + ns->height = u_minify(mt->base.base.height0, ps->u.tex.level); + ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1; + ns->offset = mt->level[templ->u.tex.level].offset; + + /* comment says there are going to be removed, but they're used by the st */ + ps->width = ns->width; + ps->height = ns->height; + + ns->width <<= mt->ms_x; + ns->height <<= mt->ms_y; + + return ns; +} + +struct pipe_surface * +nv50_miptree_surface_new(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *templ) +{ + struct nv50_miptree *mt = nv50_miptree(pt); + struct nv50_surface *ns = nv50_surface_from_miptree(mt, templ); + if (!ns) + return NULL; + ns->base.context = pipe; + + if (ns->base.u.tex.first_layer) { + const unsigned l = ns->base.u.tex.level; + const unsigned z = ns->base.u.tex.first_layer; + + if (mt->layout_3d) { + ns->offset += nv50_mt_zslice_offset(mt, l, z); + + /* TODO: switch to depth 1 tiles; but actually this shouldn't happen */ + if (ns->depth > 1 && + (z & (NV50_TILE_SIZE_Z(mt->level[l].tile_mode) - 1))) + NOUVEAU_ERR("Creating unsupported 3D surface !\n"); + } else { + ns->offset += mt->layer_stride * z; + } + } + + return &ns->base; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c new file mode 100644 index 00000000000..73df71c61e2 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -0,0 +1,445 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv50_program.h" +#include "nv50/nv50_context.h" + +#include "codegen/nv50_ir_driver.h" + +static INLINE unsigned +bitcount4(const uint32_t val) +{ + static const uint8_t cnt[16] + = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; + return cnt[val & 0xf]; +} + +static int +nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) +{ + struct nv50_program *prog = (struct nv50_program *)info->driverPriv; + unsigned i, n, c; + + n = 0; + for (i = 0; i < info->numInputs; ++i) { + prog->in[i].id = i; + prog->in[i].sn = info->in[i].sn; + prog->in[i].si = info->in[i].si; + prog->in[i].hw = n; + prog->in[i].mask = info->in[i].mask; + + prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32); + + for (c = 0; c < 4; ++c) + if (info->in[i].mask & (1 << c)) + info->in[i].slot[c] = n++; + } + prog->in_nr = info->numInputs; + + for (i = 0; i < info->numSysVals; ++i) { + switch (info->sv[i].sn) { + case TGSI_SEMANTIC_INSTANCEID: + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID; + continue; + case TGSI_SEMANTIC_VERTEXID: + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12; + continue; + default: + break; + } + } + + /* + * Corner case: VP has no inputs, but we will still need to submit data to + * draw it. HW will shout at us and won't draw anything if we don't enable + * any input, so let's just pretend it's the first one. + */ + if (prog->vp.attrs[0] == 0 && + prog->vp.attrs[1] == 0 && + prog->vp.attrs[2] == 0) + prog->vp.attrs[0] |= 0xf; + + /* VertexID before InstanceID */ + if (info->io.vertexId < info->numSysVals) + info->sv[info->io.vertexId].slot[0] = n++; + if (info->io.instanceId < info->numSysVals) + info->sv[info->io.instanceId].slot[0] = n++; + + n = 0; + for (i = 0; i < info->numOutputs; ++i) { + switch (info->out[i].sn) { + case TGSI_SEMANTIC_PSIZE: + prog->vp.psiz = i; + break; + case TGSI_SEMANTIC_CLIPDIST: + prog->vp.clpd[info->out[i].si] = n; + break; + case TGSI_SEMANTIC_EDGEFLAG: + prog->vp.edgeflag = i; + break; + case TGSI_SEMANTIC_BCOLOR: + prog->vp.bfc[info->out[i].si] = i; + break; + default: + break; + } + prog->out[i].id = i; + prog->out[i].sn = info->out[i].sn; + prog->out[i].si = info->out[i].si; + prog->out[i].hw = n; + prog->out[i].mask = info->out[i].mask; + + for (c = 0; c < 4; ++c) + if (info->out[i].mask & (1 << c)) + info->out[i].slot[c] = n++; + } + prog->out_nr = info->numOutputs; + prog->max_out = n; + + if (prog->vp.psiz < info->numOutputs) + prog->vp.psiz = prog->out[prog->vp.psiz].hw; + + return 0; +} + +static int +nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info) +{ + struct nv50_program *prog = (struct nv50_program *)info->driverPriv; + unsigned i, n, m, c; + unsigned nvary; + unsigned nflat; + unsigned nintp = 0; + + /* count recorded non-flat inputs */ + for (m = 0, i = 0; i < info->numInputs; ++i) { + switch (info->in[i].sn) { + case TGSI_SEMANTIC_POSITION: + case TGSI_SEMANTIC_FACE: + continue; + default: + m += info->in[i].flat ? 0 : 1; + break; + } + } + /* careful: id may be != i in info->in[prog->in[i].id] */ + + /* Fill prog->in[] so that non-flat inputs are first and + * kick out special inputs that don't use the RESULT_MAP. + */ + for (n = 0, i = 0; i < info->numInputs; ++i) { + if (info->in[i].sn == TGSI_SEMANTIC_POSITION) { + prog->fp.interp |= info->in[i].mask << 24; + for (c = 0; c < 4; ++c) + if (info->in[i].mask & (1 << c)) + info->in[i].slot[c] = nintp++; + } else + if (info->in[i].sn == TGSI_SEMANTIC_FACE) { + info->in[i].slot[0] = 255; + } else { + unsigned j = info->in[i].flat ? m++ : n++; + + if (info->in[i].sn == TGSI_SEMANTIC_COLOR) + prog->vp.bfc[info->in[i].si] = j; + + prog->in[j].id = i; + prog->in[j].mask = info->in[i].mask; + prog->in[j].sn = info->in[i].sn; + prog->in[j].si = info->in[i].si; + prog->in[j].linear = info->in[i].linear; + + prog->in_nr++; + } + } + if (!(prog->fp.interp & (8 << 24))) { + ++nintp; + prog->fp.interp |= 8 << 24; + } + + for (i = 0; i < prog->in_nr; ++i) { + int j = prog->in[i].id; + + prog->in[i].hw = nintp; + for (c = 0; c < 4; ++c) + if (prog->in[i].mask & (1 << c)) + info->in[j].slot[c] = nintp++; + } + /* (n == m) if m never increased, i.e. no flat inputs */ + nflat = (n < m) ? (nintp - prog->in[n].hw) : 0; + nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */ + nvary = nintp - nflat; + + prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT; + prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT; + + /* put front/back colors right after HPOS */ + prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT; + for (i = 0; i < 2; ++i) + if (prog->vp.bfc[i] < 0xff) + prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16; + + /* FP outputs */ + + if (info->prop.fp.numColourResults > 1) + prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS; + + for (i = 0; i < info->numOutputs; ++i) { + prog->out[i].id = i; + prog->out[i].sn = info->out[i].sn; + prog->out[i].si = info->out[i].si; + prog->out[i].mask = info->out[i].mask; + + if (i == info->io.fragDepth || i == info->io.sampleMask) + continue; + prog->out[i].hw = info->out[i].si * 4; + + for (c = 0; c < 4; ++c) + info->out[i].slot[c] = prog->out[i].hw + c; + + prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4); + } + + if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) + info->out[info->io.sampleMask].slot[0] = prog->max_out++; + + if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) + info->out[info->io.fragDepth].slot[2] = prog->max_out++; + + if (!prog->max_out) + prog->max_out = 4; + + return 0; +} + +static int +nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info) +{ + switch (info->type) { + case PIPE_SHADER_VERTEX: + return nv50_vertprog_assign_slots(info); + case PIPE_SHADER_GEOMETRY: + return nv50_vertprog_assign_slots(info); + case PIPE_SHADER_FRAGMENT: + return nv50_fragprog_assign_slots(info); + default: + return -1; + } +} + +static struct nv50_stream_output_state * +nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info, + const struct pipe_stream_output_info *pso) +{ + struct nv50_stream_output_state *so; + unsigned b, i, c; + unsigned base[4]; + + so = MALLOC_STRUCT(nv50_stream_output_state); + if (!so) + return NULL; + memset(so->map, 0xff, sizeof(so->map)); + + for (b = 0; b < 4; ++b) + so->num_attribs[b] = 0; + for (i = 0; i < pso->num_outputs; ++i) { + unsigned end = pso->output[i].dst_offset + pso->output[i].num_components; + b = pso->output[i].output_buffer; + assert(b < 4); + so->num_attribs[b] = MAX2(so->num_attribs[b], end); + } + + so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED; + + so->stride[0] = pso->stride[0] * 4; + base[0] = 0; + for (b = 1; b < 4; ++b) { + assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]); + so->stride[b] = so->num_attribs[b] * 4; + if (so->num_attribs[b]) + so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT; + base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4); + } + if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) { + assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX); + so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT; + } + + so->map_size = base[3] + so->num_attribs[3]; + + for (i = 0; i < pso->num_outputs; ++i) { + const unsigned s = pso->output[i].start_component; + const unsigned p = pso->output[i].dst_offset; + const unsigned r = pso->output[i].register_index; + b = pso->output[i].output_buffer; + + for (c = 0; c < pso->output[i].num_components; ++c) + so->map[base[b] + p + c] = info->out[r].slot[s + c]; + } + + return so; +} + +boolean +nv50_program_translate(struct nv50_program *prog, uint16_t chipset) +{ + struct nv50_ir_prog_info *info; + int ret; + const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80; + + info = CALLOC_STRUCT(nv50_ir_prog_info); + if (!info) + return FALSE; + + info->type = prog->type; + info->target = chipset; + info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; + info->bin.source = (void *)prog->pipe.tokens; + + info->io.ucpCBSlot = 15; + info->io.ucpBase = 0; + info->io.genUserClip = prog->vp.clpd_nr; + + info->assignSlots = nv50_program_assign_varying_slots; + + prog->vp.bfc[0] = 0xff; + prog->vp.bfc[1] = 0xff; + prog->vp.edgeflag = 0xff; + prog->vp.clpd[0] = map_undef; + prog->vp.clpd[1] = map_undef; + prog->vp.psiz = map_undef; + prog->gp.primid = 0x80; + + info->driverPriv = prog; + +#ifdef DEBUG + info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); + info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); +#else + info->optLevel = 3; +#endif + + ret = nv50_ir_generate_code(info); + if (ret) { + NOUVEAU_ERR("shader translation failed: %i\n", ret); + goto out; + } + FREE(info->bin.syms); + + prog->code = info->bin.code; + prog->code_size = info->bin.codeSize; + prog->fixups = info->bin.relocData; + prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); + prog->tls_space = info->bin.tlsSpace; + + if (prog->type == PIPE_SHADER_FRAGMENT) { + if (info->prop.fp.writesDepth) { + prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; + prog->fp.flags[1] = 0x11; + } + if (info->prop.fp.usesDiscard) + prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; + } + + if (prog->pipe.stream_output.num_outputs) + prog->so = nv50_program_create_strmout_state(info, + &prog->pipe.stream_output); + +out: + FREE(info); + return !ret; +} + +boolean +nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) +{ + struct nouveau_heap *heap; + int ret; + uint32_t size = align(prog->code_size, 0x40); + + switch (prog->type) { + case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; + case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break; + case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break; + default: + assert(!"invalid program type"); + return FALSE; + } + + ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); + if (ret) { + /* Out of space: evict everything to compactify the code segment, hoping + * the working set is much smaller and drifts slowly. Improve me ! + */ + while (heap->next) { + struct nv50_program *evict = heap->next->priv; + if (evict) + nouveau_heap_free(&evict->mem); + } + debug_printf("WARNING: out of code space, evicting all shaders.\n"); + ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); + if (ret) { + NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); + return FALSE; + } + } + prog->code_base = prog->mem->start; + + ret = nv50_tls_realloc(nv50->screen, prog->tls_space); + if (ret < 0) + return FALSE; + if (ret > 0) + nv50->state.new_tls_space = TRUE; + + if (prog->fixups) + nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0); + + nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, + (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, + NOUVEAU_BO_VRAM, prog->code_size, prog->code); + + BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); + PUSH_DATA (nv50->base.pushbuf, 0); + + return TRUE; +} + +void +nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) +{ + const struct pipe_shader_state pipe = p->pipe; + const ubyte type = p->type; + + if (p->mem) + nouveau_heap_free(&p->mem); + + FREE(p->code); + + FREE(p->fixups); + + FREE(p->so); + + memset(p, 0, sizeof(*p)); + + p->pipe = pipe; + p->type = type; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h new file mode 100644 index 00000000000..13b9516a3e4 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -0,0 +1,106 @@ +/* + * Copyright 2010 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NV50_PROG_H__ +#define __NV50_PROG_H__ + +struct nv50_context; + +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" + +struct nv50_varying { + uint8_t id; /* tgsi index */ + uint8_t hw; /* hw index, nv50 wants flat FP inputs last */ + + unsigned mask : 4; + unsigned linear : 1; + unsigned pad : 3; + + ubyte sn; /* semantic name */ + ubyte si; /* semantic index */ +}; + +struct nv50_stream_output_state +{ + uint32_t ctrl; + uint16_t stride[4]; + uint8_t num_attribs[4]; + uint8_t map_size; + uint8_t map[128]; +}; + +struct nv50_program { + struct pipe_shader_state pipe; + + ubyte type; + boolean translated; + + uint32_t *code; + unsigned code_size; + unsigned code_base; + uint32_t *immd; + unsigned immd_size; + unsigned parm_size; /* size limit of uniform buffer */ + uint32_t tls_space; /* required local memory per thread */ + + ubyte max_gpr; /* REG_ALLOC_TEMP */ + ubyte max_out; /* REG_ALLOC_RESULT or FP_RESULT_COUNT */ + + ubyte in_nr; + ubyte out_nr; + struct nv50_varying in[16]; + struct nv50_varying out[16]; + + struct { + uint32_t attrs[3]; /* VP_ATTR_EN_0,1 and VP_GP_BUILTIN_ATTR_EN */ + ubyte psiz; /* output slot of point size */ + ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */ + ubyte edgeflag; + ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */ + ubyte clpd_nr; + } vp; + + struct { + uint32_t flags[2]; /* 0x19a8, 196c */ + uint32_t interp; /* 0x1988 */ + uint32_t colors; /* 0x1904 */ + } fp; + + struct { + ubyte primid; /* primitive id output register */ + uint8_t vert_count; + uint8_t prim_type; /* point, line strip or tri strip */ + } gp; + + void *fixups; /* relocation records */ + + struct nouveau_heap *mem; + + struct nv50_stream_output_state *so; +}; + +boolean nv50_program_translate(struct nv50_program *, uint16_t chipset); +boolean nv50_program_upload_code(struct nv50_context *, struct nv50_program *); +void nv50_program_destroy(struct nv50_context *, struct nv50_program *); + +#endif /* __NV50_PROG_H__ */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c new file mode 100644 index 00000000000..3e9a4096cf0 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c @@ -0,0 +1,309 @@ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nv50/nv50_context.h" +#include "nv50/nv50_resource.h" + +#include "nv50/nv50_3d.xml.h" + +struct push_context { + struct nouveau_pushbuf *push; + + const void *idxbuf; + + float edgeflag; + int edgeflag_attr; + + uint32_t vertex_words; + uint32_t packet_vertex_limit; + + struct translate *translate; + + boolean primitive_restart; + uint32_t prim; + uint32_t restart_index; + uint32_t instance_id; +}; + +static INLINE unsigned +prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static void +emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) +{ + uint8_t *elts = (uint8_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i08(elts, push, ctx->restart_index); + + size = ctx->vertex_words * nr; + + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); + + ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id, + ctx->push->cur); + + ctx->push->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (ctx->push, ctx->restart_index); + } + } +} + +static void +emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) +{ + uint16_t *elts = (uint16_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i16(elts, push, ctx->restart_index); + + size = ctx->vertex_words * nr; + + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); + + ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id, + ctx->push->cur); + + ctx->push->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (ctx->push, ctx->restart_index); + } + } +} + +static void +emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) +{ + uint32_t *elts = (uint32_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i32(elts, push, ctx->restart_index); + + size = ctx->vertex_words * nr; + + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); + + ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id, + ctx->push->cur); + + ctx->push->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (ctx->push, ctx->restart_index); + } + } +} + +static void +emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) +{ + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size = ctx->vertex_words * push; + + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); + + ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id, + ctx->push->cur); + ctx->push->cur += size; + count -= push; + start += push; + } +} + + +#define NV50_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nv50_prim_gl(unsigned prim) +{ + switch (prim) { + NV50_PRIM_GL_CASE(POINTS); + NV50_PRIM_GL_CASE(LINES); + NV50_PRIM_GL_CASE(LINE_LOOP); + NV50_PRIM_GL_CASE(LINE_STRIP); + NV50_PRIM_GL_CASE(TRIANGLES); + NV50_PRIM_GL_CASE(TRIANGLE_STRIP); + NV50_PRIM_GL_CASE(TRIANGLE_FAN); + NV50_PRIM_GL_CASE(QUADS); + NV50_PRIM_GL_CASE(QUAD_STRIP); + NV50_PRIM_GL_CASE(POLYGON); + NV50_PRIM_GL_CASE(LINES_ADJACENCY); + NV50_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NV50_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NV50_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NV50_PRIM_GL_CASE(PATCHES); */ + default: + return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } +} + +void +nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) +{ + struct push_context ctx; + unsigned i, index_size; + unsigned inst_count = info->instance_count; + unsigned vert_count = info->count; + boolean apply_bias = info->indexed && info->index_bias; + + ctx.push = nv50->base.pushbuf; + ctx.translate = nv50->vertex->translate; + ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit; + ctx.vertex_words = nv50->vertex->vertex_size; + + for (i = 0; i < nv50->num_vtxbufs; ++i) { + const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; + const uint8_t *data; + + if (unlikely(vb->buffer)) + data = nouveau_resource_map_offset(&nv50->base, + nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD); + else + data = vb->user_buffer; + + if (apply_bias && likely(!(nv50->vertex->instance_bufs & (1 << i)))) + data += (ptrdiff_t)info->index_bias * vb->stride; + + ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); + } + + if (info->indexed) { + if (nv50->idxbuf.buffer) { + ctx.idxbuf = nouveau_resource_map_offset(&nv50->base, + nv04_resource(nv50->idxbuf.buffer), nv50->idxbuf.offset, + NOUVEAU_BO_RD); + } else { + ctx.idxbuf = nv50->idxbuf.user_buffer; + } + if (!ctx.idxbuf) + return; + index_size = nv50->idxbuf.index_size; + ctx.primitive_restart = info->primitive_restart; + ctx.restart_index = info->restart_index; + } else { + if (unlikely(info->count_from_stream_output)) { + struct pipe_context *pipe = &nv50->base.pipe; + struct nv50_so_target *targ; + targ = nv50_so_target(info->count_from_stream_output); + if (!targ->pq) { + NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n"); + return; + } + pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count); + vert_count /= targ->stride; + } + ctx.idxbuf = NULL; + index_size = 0; + ctx.primitive_restart = FALSE; + ctx.restart_index = 0; + } + + ctx.instance_id = info->start_instance; + ctx.prim = nv50_prim_gl(info->mode); + + if (info->primitive_restart) { + BEGIN_NV04(ctx.push, NV50_3D(PRIM_RESTART_ENABLE), 2); + PUSH_DATA (ctx.push, 1); + PUSH_DATA (ctx.push, info->restart_index); + } else + if (nv50->state.prim_restart) { + BEGIN_NV04(ctx.push, NV50_3D(PRIM_RESTART_ENABLE), 1); + PUSH_DATA (ctx.push, 0); + } + nv50->state.prim_restart = info->primitive_restart; + + while (inst_count--) { + BEGIN_NV04(ctx.push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (ctx.push, ctx.prim); + switch (index_size) { + case 0: + emit_vertices_seq(&ctx, info->start, vert_count); + break; + case 1: + emit_vertices_i08(&ctx, info->start, vert_count); + break; + case 2: + emit_vertices_i16(&ctx, info->start, vert_count); + break; + case 4: + emit_vertices_i32(&ctx, info->start, vert_count); + break; + default: + assert(0); + break; + } + BEGIN_NV04(ctx.push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (ctx.push, 0); + + ctx.instance_id++; + ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c new file mode 100644 index 00000000000..6f25a0822c4 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -0,0 +1,399 @@ +/* + * Copyright 2011 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christoph Bumiller + */ + +#define NV50_PUSH_EXPLICIT_SPACE_CHECKING + +#include "nv50/nv50_context.h" +#include "nv_object.xml.h" + +/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts + * (since we use only a single GPU channel per screen) will not work properly. + * + * The first is not that big of an issue because OpenGL does not allow nested + * queries anyway. + */ + +struct nv50_query { + uint32_t *data; + uint16_t type; + uint16_t index; + uint32_t sequence; + struct nouveau_bo *bo; + uint32_t base; + uint32_t offset; /* base + i * 16 */ + boolean ready; + boolean flushed; + boolean is64bit; + struct nouveau_mm_allocation *mm; +}; + +#define NV50_QUERY_ALLOC_SPACE 128 + +static INLINE struct nv50_query * +nv50_query(struct pipe_query *pipe) +{ + return (struct nv50_query *)pipe; +} + +static boolean +nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) +{ + struct nv50_screen *screen = nv50->screen; + int ret; + + if (q->bo) { + nouveau_bo_ref(NULL, &q->bo); + if (q->mm) { + if (q->ready) + nouveau_mm_free(q->mm); + else + nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, + q->mm); + } + } + if (size) { + q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base); + if (!q->bo) + return FALSE; + q->offset = q->base; + + ret = nouveau_bo_map(q->bo, 0, screen->base.client); + if (ret) { + nv50_query_allocate(nv50, q, 0); + return FALSE; + } + q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base); + } + return TRUE; +} + +static void +nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); + FREE(nv50_query(pq)); +} + +static struct pipe_query * +nv50_query_create(struct pipe_context *pipe, unsigned type) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_query *q; + + q = CALLOC_STRUCT(nv50_query); + if (!q) + return NULL; + + if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) { + FREE(q); + return NULL; + } + + q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || + type == PIPE_QUERY_PRIMITIVES_EMITTED || + type == PIPE_QUERY_SO_STATISTICS); + q->type = type; + + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + q->offset -= 16; + q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */ + } + + return (struct pipe_query *)q; +} + +static void +nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, + unsigned offset, uint32_t get) +{ + offset += q->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); + PUSH_DATAh(push, q->bo->offset + offset); + PUSH_DATA (push, q->bo->offset + offset); + PUSH_DATA (push, q->sequence); + PUSH_DATA (push, get); +} + +static void +nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_query *q = nv50_query(pq); + + /* For occlusion queries we have to change the storage, because a previous + * query might set the initial render conition to FALSE even *after* we re- + * initialized it to TRUE. + */ + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + q->offset += 16; + q->data += 16 / sizeof(*q->data); + if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE) + nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE); + + /* XXX: can we do this with the GPU, and sync with respect to a previous + * query ? + */ + q->data[1] = 1; /* initial render condition = TRUE */ + } + if (!q->is64bit) + q->data[0] = q->sequence++; /* the previously used one */ + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + PUSH_SPACE(push, 4); + BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); + PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 1); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nv50_query_get(push, q, 0x10, 0x06805002); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nv50_query_get(push, q, 0x10, 0x05805002); + break; + case PIPE_QUERY_SO_STATISTICS: + nv50_query_get(push, q, 0x20, 0x05805002); + nv50_query_get(push, q, 0x30, 0x06805002); + break; + case PIPE_QUERY_TIME_ELAPSED: + nv50_query_get(push, q, 0x10, 0x00005002); + break; + default: + break; + } + q->ready = FALSE; +} + +static void +nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_query *q = nv50_query(pq); + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + nv50_query_get(push, q, 0, 0x0100f002); + PUSH_SPACE(push, 2); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 0); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nv50_query_get(push, q, 0, 0x06805002); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nv50_query_get(push, q, 0, 0x05805002); + break; + case PIPE_QUERY_SO_STATISTICS: + nv50_query_get(push, q, 0x00, 0x05805002); + nv50_query_get(push, q, 0x10, 0x06805002); + break; + case PIPE_QUERY_TIMESTAMP: + q->sequence++; + /* fall through */ + case PIPE_QUERY_TIME_ELAPSED: + nv50_query_get(push, q, 0, 0x00005002); + break; + case PIPE_QUERY_GPU_FINISHED: + q->sequence++; + nv50_query_get(push, q, 0, 0x1000f010); + break; + case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + break; + default: + assert(0); + break; + } + q->ready = q->flushed = FALSE; +} + +static INLINE boolean +nv50_query_ready(struct nv50_query *q) +{ + return q->ready || (!q->is64bit && (q->data[0] == q->sequence)); +} + +static boolean +nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, union pipe_query_result *result) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_query *q = nv50_query(pq); + uint64_t *res64 = (uint64_t *)result; + uint32_t *res32 = (uint32_t *)result; + boolean *res8 = (boolean *)result; + uint64_t *data64 = (uint64_t *)q->data; + + if (!q->ready) /* update ? */ + q->ready = nv50_query_ready(q); + if (!q->ready) { + if (!wait) { + /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ + if (!q->flushed) { + q->flushed = TRUE; + PUSH_KICK(nv50->base.pushbuf); + } + return FALSE; + } + if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) + return FALSE; + } + q->ready = TRUE; + + switch (q->type) { + case PIPE_QUERY_GPU_FINISHED: + res8[0] = TRUE; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ + res64[0] = q->data[1]; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ + case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ + res64[0] = data64[0] - data64[2]; + break; + case PIPE_QUERY_SO_STATISTICS: + res64[0] = data64[0] - data64[4]; + res64[1] = data64[2] - data64[6]; + break; + case PIPE_QUERY_TIMESTAMP: + res64[0] = data64[1]; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + res64[0] = 1000000000; + res8[8] = FALSE; + break; + case PIPE_QUERY_TIME_ELAPSED: + res64[0] = data64[1] - data64[3]; + break; + case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + res32[0] = q->data[1]; + break; + default: + return FALSE; + } + + return TRUE; +} + +void +nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq) +{ + struct nv50_query *q = nv50_query(pq); + unsigned offset = q->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); + PUSH_DATAh(push, q->bo->offset + offset); + PUSH_DATA (push, q->bo->offset + offset); + PUSH_DATA (push, q->sequence); + PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); +} + +static void +nv50_render_condition(struct pipe_context *pipe, + struct pipe_query *pq, + boolean condition, uint mode) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_query *q; + + nv50->cond_query = pq; + nv50->cond_cond = condition; + nv50->cond_mode = mode; + + PUSH_SPACE(push, 6); + + if (!pq) { + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + return; + } + q = nv50_query(pq); + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + } + + BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3); + PUSH_DATAh(push, q->bo->offset + q->offset); + PUSH_DATA (push, q->bo->offset + q->offset); + PUSH_DATA (push, NV50_3D_COND_MODE_RES_NON_ZERO); +} + +void +nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, + struct pipe_query *pq, unsigned result_offset) +{ + struct nv50_query *q = nv50_query(pq); + + /* XXX: does this exist ? */ +#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8)) + + nouveau_pushbuf_space(push, 0, 0, 1); + nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 | + NV50_IB_ENTRY_1_NO_PREFETCH); +} + +void +nva0_so_target_save_offset(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg, + unsigned index, boolean serialize) +{ + struct nv50_so_target *targ = nv50_so_target(ptarg); + + if (serialize) { + struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf; + PUSH_SPACE(push, 2); + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + } + + nv50_query(targ->pq)->index = index; + nv50_query_end(pipe, targ->pq); +} + +void +nv50_init_query_functions(struct nv50_context *nv50) +{ + struct pipe_context *pipe = &nv50->base.pipe; + + pipe->create_query = nv50_query_create; + pipe->destroy_query = nv50_query_destroy; + pipe->begin_query = nv50_query_begin; + pipe->end_query = nv50_query_end; + pipe->get_query_result = nv50_query_result; + pipe->render_condition = nv50_render_condition; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.c b/src/gallium/drivers/nouveau/nv50/nv50_resource.c new file mode 100644 index 00000000000..7fbb0a92bf6 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.c @@ -0,0 +1,104 @@ + +#include "pipe/p_context.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nouveau_screen.h" + +#include "nv50/nv50_resource.h" + +static struct pipe_resource * +nv50_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + switch (templ->target) { + case PIPE_BUFFER: + return nouveau_buffer_create(screen, templ); + default: + return nv50_miptree_create(screen, templ); + } +} + +static struct pipe_resource * +nv50_resource_from_handle(struct pipe_screen * screen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + if (templ->target == PIPE_BUFFER) + return NULL; + else + return nv50_miptree_from_handle(screen, templ, whandle); +} + +struct pipe_surface * +nv50_surface_from_buffer(struct pipe_context *pipe, + struct pipe_resource *pbuf, + const struct pipe_surface *templ) +{ + struct nv50_surface *sf = CALLOC_STRUCT(nv50_surface); + if (!sf) + return NULL; + + pipe_reference_init(&sf->base.reference, 1); + pipe_resource_reference(&sf->base.texture, pbuf); + + sf->base.format = templ->format; + sf->base.writable = templ->writable; + sf->base.u.buf.first_element = templ->u.buf.first_element; + sf->base.u.buf.last_element = templ->u.buf.last_element; + + sf->offset = + templ->u.buf.first_element * util_format_get_blocksize(sf->base.format); + + sf->offset &= ~0x7f; /* FIXME: RT_ADDRESS requires 128 byte alignment */ + + sf->width = templ->u.buf.last_element - templ->u.buf.first_element + 1; + sf->height = 1; + sf->depth = 1; + + sf->base.width = sf->width; + sf->base.height = sf->height; + + sf->base.context = pipe; + return &sf->base; +} + +static struct pipe_surface * +nv50_surface_create(struct pipe_context *pipe, + struct pipe_resource *pres, + const struct pipe_surface *templ) +{ + if (unlikely(pres->target == PIPE_BUFFER)) + return nv50_surface_from_buffer(pipe, pres, templ); + return nv50_miptree_surface_new(pipe, pres, templ); +} + +void +nv50_surface_destroy(struct pipe_context *pipe, struct pipe_surface *ps) +{ + struct nv50_surface *s = nv50_surface(ps); + + pipe_resource_reference(&ps->texture, NULL); + + FREE(s); +} + +void +nv50_init_resource_functions(struct pipe_context *pcontext) +{ + pcontext->transfer_map = u_transfer_map_vtbl; + pcontext->transfer_flush_region = u_transfer_flush_region_vtbl; + pcontext->transfer_unmap = u_transfer_unmap_vtbl; + pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; + pcontext->create_surface = nv50_surface_create; + pcontext->surface_destroy = nv50_surface_destroy; +} + +void +nv50_screen_init_resource_functions(struct pipe_screen *pscreen) +{ + pscreen->resource_create = nv50_resource_create; + pscreen->resource_from_handle = nv50_resource_from_handle; + pscreen->resource_get_handle = u_resource_get_handle_vtbl; + pscreen->resource_destroy = u_resource_destroy_vtbl; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h new file mode 100644 index 00000000000..c06daa31c5d --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h @@ -0,0 +1,153 @@ + +#ifndef __NV50_RESOURCE_H__ +#define __NV50_RESOURCE_H__ + +#include "util/u_transfer.h" +#include "util/u_double_list.h" + +#include "nouveau_winsys.h" +#include "nouveau_buffer.h" + +#ifndef __NVC0_RESOURCE_H__ /* make sure we don't use these in nvc0: */ + +void +nv50_init_resource_functions(struct pipe_context *pcontext); + +void +nv50_screen_init_resource_functions(struct pipe_screen *pscreen); + +#define NV50_RESOURCE_FLAG_VIDEO (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 0) +#define NV50_RESOURCE_FLAG_NOALLOC (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 1) + +#define NV50_TILE_SHIFT_X(m) 6 +#define NV50_TILE_SHIFT_Y(m) ((((m) >> 4) & 0xf) + 2) +#define NV50_TILE_SHIFT_Z(m) ((((m) >> 8) & 0xf) + 0) + +#define NV50_TILE_SIZE_X(m) 64 +#define NV50_TILE_SIZE_Y(m) ( 4 << (((m) >> 4) & 0xf)) +#define NV50_TILE_SIZE_Z(m) ( 1 << (((m) >> 8) & 0xf)) + +#define NV50_TILE_SIZE_2D(m) (NV50_TILE_SIZE_X(m) << NV50_TILE_SHIFT_Y(m)) + +#define NV50_TILE_SIZE(m) (NV50_TILE_SIZE_2D(m) << NV50_TILE_SHIFT_Z(m)) + +#endif /* __NVC0_RESOURCE_H__ */ + +uint32_t +nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz); + +struct nv50_miptree_level { + uint32_t offset; + uint32_t pitch; + uint32_t tile_mode; +}; + +#define NV50_MAX_TEXTURE_LEVELS 16 + +struct nv50_miptree { + struct nv04_resource base; + struct nv50_miptree_level level[NV50_MAX_TEXTURE_LEVELS]; + uint32_t total_size; + uint32_t layer_stride; + boolean layout_3d; /* TRUE if layer count varies with mip level */ + uint8_t ms_x; /* log2 of number of samples in x/y dimension */ + uint8_t ms_y; + uint8_t ms_mode; +}; + +static INLINE struct nv50_miptree * +nv50_miptree(struct pipe_resource *pt) +{ + return (struct nv50_miptree *)pt; +} + + +#define NV50_TEXVIEW_SCALED_COORDS (1 << 0) +#define NV50_TEXVIEW_FILTER_MSAA8 (1 << 1) +#define NV50_TEXVIEW_ACCESS_RESOLVE (1 << 2) + + +/* Internal functions: + */ +boolean +nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align); + +struct pipe_resource * +nv50_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmp); + +void +nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt); + +struct pipe_resource * +nv50_miptree_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *template, + struct winsys_handle *whandle); + +boolean +nv50_miptree_get_handle(struct pipe_screen *pscreen, + struct pipe_resource *pt, + struct winsys_handle *whandle); + +struct nv50_surface { + struct pipe_surface base; + uint32_t offset; + uint32_t width; + uint16_t height; + uint16_t depth; +}; + +static INLINE struct nv50_surface * +nv50_surface(struct pipe_surface *ps) +{ + return (struct nv50_surface *)ps; +} + +static INLINE enum pipe_format +nv50_zs_to_s_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: return PIPE_FORMAT_X24S8_UINT; + case PIPE_FORMAT_S8_UINT_Z24_UNORM: return PIPE_FORMAT_S8X24_UINT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return PIPE_FORMAT_X32_S8X24_UINT; + default: + return format; + } +} + +#ifndef __NVC0_RESOURCE_H__ + +unsigned +nv50_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z); + +struct pipe_surface * +nv50_miptree_surface_new(struct pipe_context *, + struct pipe_resource *, + const struct pipe_surface *templ); + +void * +nv50_miptree_transfer_map(struct pipe_context *pctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer); +void +nv50_miptree_transfer_unmap(struct pipe_context *pcontext, + struct pipe_transfer *ptx); + +#endif /* __NVC0_RESOURCE_H__ */ + +struct nv50_surface * +nv50_surface_from_miptree(struct nv50_miptree *mt, + const struct pipe_surface *templ); + +struct pipe_surface * +nv50_surface_from_buffer(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *templ); + +void +nv50_surface_destroy(struct pipe_context *, struct pipe_surface *); + +#endif /* __NV50_RESOURCE_H__ */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c new file mode 100644 index 00000000000..f454ec77656 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -0,0 +1,845 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_format_s3tc.h" +#include "pipe/p_screen.h" + +#include "nv50/nv50_context.h" +#include "nv50/nv50_screen.h" + +#include "nouveau_vp3_video.h" + +#include "nv_object.xml.h" +#include <errno.h> + +#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS +# define NOUVEAU_GETPARAM_GRAPH_UNITS 13 +#endif + +/* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */ +#define LOCAL_WARPS_ALLOC 32 +/* affected by STACK_WARPS_LOG_ALLOC / STACK_WARPS_NO_CLAMP */ +#define STACK_WARPS_ALLOC 32 + +#define THREADS_IN_WARP 32 + +#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float)) + +static boolean +nv50_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bindings) +{ + if (sample_count > 8) + return FALSE; + if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */ + return FALSE; + if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128) + return FALSE; + + if (!util_format_is_supported(format, bindings)) + return FALSE; + + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS) + return FALSE; + break; + default: + break; + } + + /* transfers & shared are always supported */ + bindings &= ~(PIPE_BIND_TRANSFER_READ | + PIPE_BIND_TRANSFER_WRITE | + PIPE_BIND_SHARED); + + return (nv50_format_table[format].usage & bindings) == bindings; +} + +static int +nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; + + switch (param) { + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 64; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 14; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 12; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 14; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 512; + case PIPE_CAP_MIN_TEXEL_OFFSET: + return -8; + case PIPE_CAP_MAX_TEXEL_OFFSET: + return 7; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_SCALED_RESOLVE: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + return 1; + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return 65536; + case PIPE_CAP_SEAMLESS_CUBE_MAP: + return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS; + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + return 0; + case PIPE_CAP_CUBE_MAP_ARRAY: + return 0; + /* + return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS; + */ + case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 140; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + return 1; + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + return 1; + case PIPE_CAP_QUERY_TIMESTAMP: + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return 4; + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + return 64; + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + return (class_3d >= NVA0_3D_CLASS) ? 1 : 0; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_INDEP_BLEND_ENABLE: + return 1; + case PIPE_CAP_INDEP_BLEND_FUNC: + return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 0; + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_START_INSTANCE: + return 1; + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + return 0; /* state trackers will know better */ + case PIPE_CAP_USER_CONSTANT_BUFFERS: + case PIPE_CAP_USER_INDEX_BUFFERS: + case PIPE_CAP_USER_VERTEX_BUFFERS: + return 1; + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 256; + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + return 1; /* 256 for binding as RT, but that's not possible in GL */ + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return NOUVEAU_MIN_BUFFER_MAP_ALIGN; + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_TGSI_TEXCOORD: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return 0; + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 1; + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + return 0; + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50; + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static int +nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, + enum pipe_shader_cap param) +{ + switch (shader) { + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_GEOMETRY: + case PIPE_SHADER_FRAGMENT: + break; + default: + return 0; + } + + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 4; + case PIPE_SHADER_CAP_MAX_INPUTS: + if (shader == PIPE_SHADER_VERTEX) + return 32; + return 0x300 / 16; + case PIPE_SHADER_CAP_MAX_CONSTS: + return 65536 / 16; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return NV50_MAX_PIPE_CONSTBUFS; + case PIPE_SHADER_CAP_MAX_ADDRS: + return 1; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + return shader != PIPE_SHADER_FRAGMENT; + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; + case PIPE_SHADER_CAP_MAX_TEMPS: + return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE; + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 1; + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + return 0; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; /* please inline, or provide function declarations */ + case PIPE_SHADER_CAP_INTEGERS: + return 1; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + return 32; + default: + NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); + return 0; + } +} + +static float +nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 10.0f; + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 64.0f; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 4.0f; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0.0f; + } +} + +static void +nv50_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + + if (screen->base.fence.current) { + nouveau_fence_wait(screen->base.fence.current); + nouveau_fence_ref (NULL, &screen->base.fence.current); + } + if (screen->base.pushbuf) + screen->base.pushbuf->user_priv = NULL; + + if (screen->blitter) + nv50_blitter_destroy(screen); + + nouveau_bo_ref(NULL, &screen->code); + nouveau_bo_ref(NULL, &screen->tls_bo); + nouveau_bo_ref(NULL, &screen->stack_bo); + nouveau_bo_ref(NULL, &screen->txc); + nouveau_bo_ref(NULL, &screen->uniforms); + nouveau_bo_ref(NULL, &screen->fence.bo); + + nouveau_heap_destroy(&screen->vp_code_heap); + nouveau_heap_destroy(&screen->gp_code_heap); + nouveau_heap_destroy(&screen->fp_code_heap); + + FREE(screen->tic.entries); + + nouveau_object_del(&screen->tesla); + nouveau_object_del(&screen->eng2d); + nouveau_object_del(&screen->m2mf); + nouveau_object_del(&screen->sync); + + nouveau_screen_fini(&screen->base); + + FREE(screen); +} + +static void +nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + struct nouveau_pushbuf *push = screen->base.pushbuf; + + /* we need to do it after possible flush in MARK_RING */ + *sequence = ++screen->base.fence.sequence; + + PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4)); + PUSH_DATAh(push, screen->fence.bo->offset); + PUSH_DATA (push, screen->fence.bo->offset); + PUSH_DATA (push, *sequence); + PUSH_DATA (push, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 | + NV50_3D_QUERY_GET_UNK4 | + NV50_3D_QUERY_GET_UNIT_CROP | + NV50_3D_QUERY_GET_TYPE_QUERY | + NV50_3D_QUERY_GET_QUERY_SELECT_ZERO | + NV50_3D_QUERY_GET_SHORT); +} + +static u32 +nv50_screen_fence_update(struct pipe_screen *pscreen) +{ + return nv50_screen(pscreen)->fence.map[0]; +} + +static void +nv50_screen_init_hwctx(struct nv50_screen *screen) +{ + struct nouveau_pushbuf *push = screen->base.pushbuf; + struct nv04_fifo *fifo; + unsigned i; + + fifo = (struct nv04_fifo *)screen->base.channel->data; + + BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->m2mf->handle); + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3); + PUSH_DATA (push, screen->sync->handle); + PUSH_DATA (push, fifo->vram); + PUSH_DATA (push, fifo->vram); + + BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->eng2d->handle); + BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4); + PUSH_DATA (push, screen->sync->handle); + PUSH_DATA (push, fifo->vram); + PUSH_DATA (push, fifo->vram); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_2D(OPERATION), 1); + PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY); + BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, SUBC_2D(0x0888), 1); + PUSH_DATA (push, 1); + + BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->tesla->handle); + + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + + BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1); + PUSH_DATA (push, screen->sync->handle); + BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11); + for (i = 0; i < 11; ++i) + PUSH_DATA(push, fifo->vram); + BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN); + for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i) + PUSH_DATA(push, fifo->vram); + + BEGIN_NV04(push, NV50_3D(REG_MODE), 1); + PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED); + BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1); + PUSH_DATA (push, 0xf); + + if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) { + BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1); + PUSH_DATA (push, 0x18); + } + + BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); + PUSH_DATA (push, 1); + + BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1); + PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1); + BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(LINE_LAST_PIXEL), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1); + PUSH_DATA (push, 1); + + if (screen->tesla->oclass >= NVA0_3D_CLASS) { + BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1); + PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); + } + + BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1); + PUSH_DATA (push, 0x3f); + + BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2)); + PUSH_DATA (push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2)); + + BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2)); + PUSH_DATA (push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2)); + + BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2)); + PUSH_DATA (push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2)); + + BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->tls_bo->offset); + PUSH_DATA (push, screen->tls_bo->offset); + PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8)); + + BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->stack_bo->offset); + PUSH_DATA (push, screen->stack_bo->offset); + PUSH_DATA (push, 4); + + BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->uniforms->offset + (0 << 16)); + PUSH_DATA (push, screen->uniforms->offset + (0 << 16)); + PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000); + + BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->uniforms->offset + (1 << 16)); + PUSH_DATA (push, screen->uniforms->offset + (1 << 16)); + PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000); + + BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->uniforms->offset + (2 << 16)); + PUSH_DATA (push, screen->uniforms->offset + (2 << 16)); + PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000); + + BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->uniforms->offset + (3 << 16)); + PUSH_DATA (push, screen->uniforms->offset + (3 << 16)); + PUSH_DATA (push, (NV50_CB_AUX << 16) | 0x0200); + + BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3); + PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01); + PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21); + PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31); + + /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */ + BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); + PUSH_DATA (push, ((1 << 9) << 6) | NV50_CB_AUX); + BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + (1 << 9)); + PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + (1 << 9)); + + /* max TIC (bits 4:8) & TSC bindings, per program type */ + for (i = 0; i < 3; ++i) { + BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1); + PUSH_DATA (push, 0x54); + } + + BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset); + PUSH_DATA (push, screen->txc->offset); + PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); + + BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset + 65536); + PUSH_DATA (push, screen->txc->offset + 65536); + PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); + + BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1); + PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY); + BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2); + for (i = 0; i < 8 * 2; ++i) + PUSH_DATA(push, 0); + BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(0)), 2); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 1.0f); + + BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1); +#ifdef NV50_SCISSORS_CLIPPING + PUSH_DATA (push, 0x0000); +#else + PUSH_DATA (push, 0x1080); +#endif + + BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1); + PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT); + + /* We use scissors instead of exact view volume clipping, + * so they're always enabled. + */ + BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 3); + PUSH_DATA (push, 1); + PUSH_DATA (push, 8192 << 16); + PUSH_DATA (push, 8192 << 16); + + BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1); + PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL); + BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1); + PUSH_DATA (push, 0x11111111); + BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1); + PUSH_DATA (push, 1); + + PUSH_KICK (push); +} + +static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space, + uint64_t *tls_size) +{ + struct nouveau_device *dev = screen->base.device; + int ret; + + screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) * + ONE_TEMP_SIZE; + if (nouveau_mesa_debug) + debug_printf("allocating space for %u temps\n", + util_next_power_of_two(tls_space / ONE_TEMP_SIZE)); + *tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) * + screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, + *tls_size, NULL, &screen->tls_bo); + if (ret) { + NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret); + return ret; + } + + return 0; +} + +int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space) +{ + struct nouveau_pushbuf *push = screen->base.pushbuf; + int ret; + uint64_t tls_size; + + if (tls_space < screen->cur_tls_space) + return 0; + if (tls_space > screen->max_tls_space) { + /* fixable by limiting number of warps (LOCAL_WARPS_LOG_ALLOC / + * LOCAL_WARPS_NO_CLAMP) */ + NOUVEAU_ERR("Unsupported number of temporaries (%u > %u). Fixable if someone cares.\n", + (unsigned)(tls_space / ONE_TEMP_SIZE), + (unsigned)(screen->max_tls_space / ONE_TEMP_SIZE)); + return -ENOMEM; + } + + nouveau_bo_ref(NULL, &screen->tls_bo); + ret = nv50_tls_alloc(screen, tls_space, &tls_size); + if (ret) + return ret; + + BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->tls_bo->offset); + PUSH_DATA (push, screen->tls_bo->offset); + PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8)); + + return 1; +} + +struct pipe_screen * +nv50_screen_create(struct nouveau_device *dev) +{ + struct nv50_screen *screen; + struct pipe_screen *pscreen; + struct nouveau_object *chan; + uint64_t value; + uint32_t tesla_class; + unsigned stack_size; + int ret; + + screen = CALLOC_STRUCT(nv50_screen); + if (!screen) + return NULL; + pscreen = &screen->base.base; + + ret = nouveau_screen_init(&screen->base, dev); + if (ret) { + NOUVEAU_ERR("nouveau_screen_init failed: %d\n", ret); + goto fail; + } + + /* TODO: Prevent FIFO prefetch before transfer of index buffers and + * admit them to VRAM. + */ + screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER | + PIPE_BIND_VERTEX_BUFFER; + screen->base.sysmem_bindings |= + PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER; + + screen->base.pushbuf->user_priv = screen; + screen->base.pushbuf->rsvd_kick = 5; + + chan = screen->base.channel; + + pscreen->destroy = nv50_screen_destroy; + pscreen->context_create = nv50_create; + pscreen->is_format_supported = nv50_screen_is_format_supported; + pscreen->get_param = nv50_screen_get_param; + pscreen->get_shader_param = nv50_screen_get_shader_param; + pscreen->get_paramf = nv50_screen_get_paramf; + + nv50_screen_init_resource_functions(pscreen); + + if (screen->base.device->chipset < 0x84 || + debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) { + /* PMPEG */ + nouveau_screen_init_vdec(&screen->base); + } else if (screen->base.device->chipset < 0x98 || + screen->base.device->chipset == 0xa0) { + /* VP2 */ + screen->base.base.get_video_param = nv84_screen_get_video_param; + screen->base.base.is_video_format_supported = nv84_screen_video_supported; + } else { + /* VP3/4 */ + screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param; + screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported; + } + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, + NULL, &screen->fence.bo); + if (ret) { + NOUVEAU_ERR("Failed to allocate fence bo: %d\n", ret); + goto fail; + } + + nouveau_bo_map(screen->fence.bo, 0, NULL); + screen->fence.map = screen->fence.bo->map; + screen->base.fence.emit = nv50_screen_fence_emit; + screen->base.fence.update = nv50_screen_fence_update; + + ret = nouveau_object_new(chan, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS, + &(struct nv04_notify){ .length = 32 }, + sizeof(struct nv04_notify), &screen->sync); + if (ret) { + NOUVEAU_ERR("Failed to allocate notifier: %d\n", ret); + goto fail; + } + + ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS, + NULL, 0, &screen->m2mf); + if (ret) { + NOUVEAU_ERR("Failed to allocate PGRAPH context for M2MF: %d\n", ret); + goto fail; + } + + ret = nouveau_object_new(chan, 0xbeef502d, NV50_2D_CLASS, + NULL, 0, &screen->eng2d); + if (ret) { + NOUVEAU_ERR("Failed to allocate PGRAPH context for 2D: %d\n", ret); + goto fail; + } + + switch (dev->chipset & 0xf0) { + case 0x50: + tesla_class = NV50_3D_CLASS; + break; + case 0x80: + case 0x90: + tesla_class = NV84_3D_CLASS; + break; + case 0xa0: + switch (dev->chipset) { + case 0xa0: + case 0xaa: + case 0xac: + tesla_class = NVA0_3D_CLASS; + break; + case 0xaf: + tesla_class = NVAF_3D_CLASS; + break; + default: + tesla_class = NVA3_3D_CLASS; + break; + } + break; + default: + NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", dev->chipset); + goto fail; + } + screen->base.class_3d = tesla_class; + + ret = nouveau_object_new(chan, 0xbeef5097, tesla_class, + NULL, 0, &screen->tesla); + if (ret) { + NOUVEAU_ERR("Failed to allocate PGRAPH context for 3D: %d\n", ret); + goto fail; + } + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, + 3 << NV50_CODE_BO_SIZE_LOG2, NULL, &screen->code); + if (ret) { + NOUVEAU_ERR("Failed to allocate code bo: %d\n", ret); + goto fail; + } + + nouveau_heap_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); + nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); + nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); + + nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); + + screen->TPs = util_bitcount(value & 0xffff); + screen->MPsInTP = util_bitcount((value >> 24) & 0xf); + + stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP * + STACK_WARPS_ALLOC * 64 * 8; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, NULL, + &screen->stack_bo); + if (ret) { + NOUVEAU_ERR("Failed to allocate stack bo: %d\n", ret); + goto fail; + } + + uint64_t size_of_one_temp = util_next_power_of_two(screen->TPs) * + screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP * + ONE_TEMP_SIZE; + screen->max_tls_space = dev->vram_size / size_of_one_temp * ONE_TEMP_SIZE; + screen->max_tls_space /= 2; /* half of vram */ + + /* hw can address max 64 KiB */ + screen->max_tls_space = MIN2(screen->max_tls_space, 64 << 10); + + uint64_t tls_size; + unsigned tls_space = 4/*temps*/ * ONE_TEMP_SIZE; + ret = nv50_tls_alloc(screen, tls_space, &tls_size); + if (ret) + goto fail; + + if (nouveau_mesa_debug) + debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n", + screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16, NULL, + &screen->uniforms); + if (ret) { + NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret); + goto fail; + } + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16, NULL, + &screen->txc); + if (ret) { + NOUVEAU_ERR("Failed to allocate TIC/TSC bo: %d\n", ret); + goto fail; + } + + screen->tic.entries = CALLOC(4096, sizeof(void *)); + screen->tsc.entries = screen->tic.entries + 2048; + + if (!nv50_blitter_create(screen)) + goto fail; + + nv50_screen_init_hwctx(screen); + + nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE); + + return pscreen; + +fail: + nv50_screen_destroy(pscreen); + return NULL; +} + +int +nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry) +{ + int i = screen->tic.next; + + while (screen->tic.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1); + + screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1); + + if (screen->tic.entries[i]) + nv50_tic_entry(screen->tic.entries[i])->id = -1; + + screen->tic.entries[i] = entry; + return i; +} + +int +nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry) +{ + int i = screen->tsc.next; + + while (screen->tsc.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1); + + screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1); + + if (screen->tsc.entries[i]) + nv50_tsc_entry(screen->tsc.entries[i])->id = -1; + + screen->tsc.entries[i] = entry; + return i; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h new file mode 100644 index 00000000000..091a3921a4b --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -0,0 +1,153 @@ +#ifndef __NV50_SCREEN_H__ +#define __NV50_SCREEN_H__ + +#include "nouveau_screen.h" +#include "nouveau_fence.h" +#include "nouveau_mm.h" +#include "nouveau_heap.h" + +#include "nv50/nv50_winsys.h" +#include "nv50/nv50_stateobj.h" + +#define NV50_TIC_MAX_ENTRIES 2048 +#define NV50_TSC_MAX_ENTRIES 2048 + +/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */ +#define NV50_MAX_PIPE_CONSTBUFS 14 + +struct nv50_context; + +#define NV50_CODE_BO_SIZE_LOG2 19 + +#define NV50_SCREEN_RESIDENT_BO_COUNT 5 + +struct nv50_blitter; + +struct nv50_screen { + struct nouveau_screen base; + + struct nv50_context *cur_ctx; + + struct nouveau_bo *code; + struct nouveau_bo *uniforms; + struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ + struct nouveau_bo *stack_bo; + struct nouveau_bo *tls_bo; + + unsigned TPs; + unsigned MPsInTP; + unsigned max_tls_space; + unsigned cur_tls_space; + + struct nouveau_heap *vp_code_heap; + struct nouveau_heap *gp_code_heap; + struct nouveau_heap *fp_code_heap; + + struct nv50_blitter *blitter; + + struct { + void **entries; + int next; + uint32_t lock[NV50_TIC_MAX_ENTRIES / 32]; + } tic; + + struct { + void **entries; + int next; + uint32_t lock[NV50_TSC_MAX_ENTRIES / 32]; + } tsc; + + struct { + uint32_t *map; + struct nouveau_bo *bo; + } fence; + + struct nouveau_object *sync; + + struct nouveau_object *tesla; + struct nouveau_object *eng2d; + struct nouveau_object *m2mf; +}; + +static INLINE struct nv50_screen * +nv50_screen(struct pipe_screen *screen) +{ + return (struct nv50_screen *)screen; +} + +boolean nv50_blitter_create(struct nv50_screen *); +void nv50_blitter_destroy(struct nv50_screen *); + +int nv50_screen_tic_alloc(struct nv50_screen *, void *); +int nv50_screen_tsc_alloc(struct nv50_screen *, void *); + +static INLINE void +nv50_resource_fence(struct nv04_resource *res, uint32_t flags) +{ + struct nv50_screen *screen = nv50_screen(res->base.screen); + + if (res->mm) { + nouveau_fence_ref(screen->base.fence.current, &res->fence); + if (flags & NOUVEAU_BO_WR) + nouveau_fence_ref(screen->base.fence.current, &res->fence_wr); + } +} + +static INLINE void +nv50_resource_validate(struct nv04_resource *res, uint32_t flags) +{ + if (likely(res->bo)) { + if (flags & NOUVEAU_BO_WR) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING | + NOUVEAU_BUFFER_STATUS_DIRTY; + if (flags & NOUVEAU_BO_RD) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nv50_resource_fence(res, flags); + } +} + +struct nv50_format { + uint32_t rt; + uint32_t tic; + uint32_t vtx; + uint32_t usage; +}; + +extern const struct nv50_format nv50_format_table[]; + +static INLINE void +nv50_screen_tic_unlock(struct nv50_screen *screen, struct nv50_tic_entry *tic) +{ + if (tic->id >= 0) + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); +} + +static INLINE void +nv50_screen_tsc_unlock(struct nv50_screen *screen, struct nv50_tsc_entry *tsc) +{ + if (tsc->id >= 0) + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); +} + +static INLINE void +nv50_screen_tic_free(struct nv50_screen *screen, struct nv50_tic_entry *tic) +{ + if (tic->id >= 0) { + screen->tic.entries[tic->id] = NULL; + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); + } +} + +static INLINE void +nv50_screen_tsc_free(struct nv50_screen *screen, struct nv50_tsc_entry *tsc) +{ + if (tsc->id >= 0) { + screen->tsc.entries[tsc->id] = NULL; + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); + } +} + +extern int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space); + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c new file mode 100644 index 00000000000..9144fc48d95 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -0,0 +1,623 @@ +/* + * Copyright 2008 Ben Skeggs + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" + +#include "nv50/nv50_context.h" + +void +nv50_constbufs_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned s; + + for (s = 0; s < 3; ++s) { + unsigned p; + + if (s == PIPE_SHADER_FRAGMENT) + p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT; + else + if (s == PIPE_SHADER_GEOMETRY) + p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY; + else + p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX; + + while (nv50->constbuf_dirty[s]) { + const int i = ffs(nv50->constbuf_dirty[s]) - 1; + nv50->constbuf_dirty[s] &= ~(1 << i); + + if (nv50->constbuf[s][i].user) { + const unsigned b = NV50_CB_PVP + s; + unsigned start = 0; + unsigned words = nv50->constbuf[s][0].size / 4; + if (i) { + NOUVEAU_ERR("user constbufs only supported in slot 0\n"); + continue; + } + if (!nv50->state.uniform_buffer_bound[s]) { + nv50->state.uniform_buffer_bound[s] = TRUE; + BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); + PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); + } + while (words) { + unsigned nr; + + if (!PUSH_SPACE(push, 16)) + break; + nr = PUSH_AVAIL(push); + assert(nr >= 16); + nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); + PUSH_DATA (push, (start << 8) | b); + BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr); + PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr); + + start += nr; + words -= nr; + } + } else { + struct nv04_resource *res = + nv04_resource(nv50->constbuf[s][i].u.buf); + if (res) { + /* TODO: allocate persistent bindings */ + const unsigned b = s * 16 + i; + + assert(nouveau_resource_mapped_by_gpu(&res->base)); + + BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset); + PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset); + PUSH_DATA (push, (b << 16) | + (nv50->constbuf[s][i].size & 0xffff)); + BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); + PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); + + BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD); + } else { + BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); + PUSH_DATA (push, (i << 8) | p | 0); + } + if (i == 0) + nv50->state.uniform_buffer_bound[s] = FALSE; + } + } + } +} + +static boolean +nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog) +{ + if (!prog->translated) { + prog->translated = nv50_program_translate( + prog, nv50->screen->base.device->chipset); + if (!prog->translated) + return FALSE; + } else + if (prog->mem) + return TRUE; + + return nv50_program_upload_code(nv50, prog); +} + +static INLINE void +nv50_program_update_context_state(struct nv50_context *nv50, + struct nv50_program *prog, int stage) +{ + const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR; + + if (prog && prog->tls_space) { + if (nv50->state.new_tls_space) + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS); + if (!nv50->state.tls_required || nv50->state.new_tls_space) + BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo); + nv50->state.new_tls_space = FALSE; + nv50->state.tls_required |= 1 << stage; + } else { + if (nv50->state.tls_required == (1 << stage)) + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS); + nv50->state.tls_required &= ~(1 << stage); + } +} + +void +nv50_vertprog_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *vp = nv50->vertprog; + + if (!nv50_program_validate(nv50, vp)) + return; + nv50_program_update_context_state(nv50, vp, 0); + + BEGIN_NV04(push, NV50_3D(VP_ATTR_EN(0)), 2); + PUSH_DATA (push, vp->vp.attrs[0]); + PUSH_DATA (push, vp->vp.attrs[1]); + BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_RESULT), 1); + PUSH_DATA (push, vp->max_out); + BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_TEMP), 1); + PUSH_DATA (push, vp->max_gpr); + BEGIN_NV04(push, NV50_3D(VP_START_ID), 1); + PUSH_DATA (push, vp->code_base); +} + +void +nv50_fragprog_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *fp = nv50->fragprog; + + if (!nv50_program_validate(nv50, fp)) + return; + nv50_program_update_context_state(nv50, fp, 1); + + BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1); + PUSH_DATA (push, fp->max_gpr); + BEGIN_NV04(push, NV50_3D(FP_RESULT_COUNT), 1); + PUSH_DATA (push, fp->max_out); + BEGIN_NV04(push, NV50_3D(FP_CONTROL), 1); + PUSH_DATA (push, fp->fp.flags[0]); + BEGIN_NV04(push, NV50_3D(FP_CTRL_UNK196C), 1); + PUSH_DATA (push, fp->fp.flags[1]); + BEGIN_NV04(push, NV50_3D(FP_START_ID), 1); + PUSH_DATA (push, fp->code_base); +} + +void +nv50_gmtyprog_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *gp = nv50->gmtyprog; + + if (gp) { + BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_TEMP), 1); + PUSH_DATA (push, gp->max_gpr); + BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_RESULT), 1); + PUSH_DATA (push, gp->max_out); + BEGIN_NV04(push, NV50_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1); + PUSH_DATA (push, gp->gp.prim_type); + BEGIN_NV04(push, NV50_3D(GP_VERTEX_OUTPUT_COUNT), 1); + PUSH_DATA (push, gp->gp.vert_count); + BEGIN_NV04(push, NV50_3D(GP_START_ID), 1); + PUSH_DATA (push, gp->code_base); + + nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */ + } + nv50_program_update_context_state(nv50, gp, 2); + + /* GP_ENABLE is updated in linkage validation */ +} + +static void +nv50_sprite_coords_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + uint32_t pntc[8], mode; + struct nv50_program *fp = nv50->fragprog; + unsigned i, c; + unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff; + + if (!nv50->rast->pipe.point_quad_rasterization) { + if (nv50->state.point_sprite) { + BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8); + for (i = 0; i < 8; ++i) + PUSH_DATA(push, 0); + + nv50->state.point_sprite = FALSE; + } + return; + } else { + nv50->state.point_sprite = TRUE; + } + + memset(pntc, 0, sizeof(pntc)); + + for (i = 0; i < fp->in_nr; i++) { + unsigned n = util_bitcount(fp->in[i].mask); + + if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) { + m += n; + continue; + } + if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) { + m += n; + continue; + } + + for (c = 0; c < 4; ++c) { + if (fp->in[i].mask & (1 << c)) { + pntc[m / 8] |= (c + 1) << ((m % 8) * 4); + ++m; + } + } + } + + if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) + mode = 0x00; + else + mode = 0x10; + + BEGIN_NV04(push, NV50_3D(POINT_SPRITE_CTRL), 1); + PUSH_DATA (push, mode); + + BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8); + PUSH_DATAp(push, pntc, 8); +} + +/* Validate state derived from shaders and the rasterizer cso. */ +void +nv50_validate_derived_rs(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + uint32_t color, psize; + + nv50_sprite_coords_validate(nv50); + + if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) { + nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard; + BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1); + PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard); + } + + if (nv50->dirty & NV50_NEW_FRAGPROG) + return; + psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK; + color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN; + + if (nv50->rast->pipe.clamp_vertex_color) + color |= NV50_3D_SEMANTIC_COLOR_CLMP_EN; + + if (color != nv50->state.semantic_color) { + nv50->state.semantic_color = color; + BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 1); + PUSH_DATA (push, color); + } + + if (nv50->rast->pipe.point_size_per_vertex) + psize |= NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK; + + if (psize != nv50->state.semantic_psize) { + nv50->state.semantic_psize = psize; + BEGIN_NV04(push, NV50_3D(SEMANTIC_PTSZ), 1); + PUSH_DATA (push, psize); + } +} + +static int +nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4], + struct nv50_varying *in, struct nv50_varying *out) +{ + int c; + uint8_t mv = out->mask, mf = in->mask, oid = out->hw; + + for (c = 0; c < 4; ++c) { + if (mf & 1) { + if (in->linear) + lin[mid / 32] |= 1 << (mid % 32); + if (mv & 1) + map[mid] = oid; + else + if (c == 3) + map[mid] |= 1; + ++mid; + } + + oid += mv & 1; + mf >>= 1; + mv >>= 1; + } + + return mid; +} + +void +nv50_fp_linkage_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog; + struct nv50_program *fp = nv50->fragprog; + struct nv50_varying dummy; + int i, n, c, m; + uint32_t primid = 0; + uint32_t psiz = 0x000; + uint32_t interp = fp->fp.interp; + uint32_t colors = fp->fp.colors; + uint32_t lin[4]; + uint8_t map[64]; + uint8_t so_map[64]; + + if (!(nv50->dirty & (NV50_NEW_VERTPROG | + NV50_NEW_FRAGPROG | + NV50_NEW_GMTYPROG))) { + uint8_t bfc, ffc; + ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK); + bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK) + >> 8; + if (nv50->rast->pipe.light_twoside == ((ffc == bfc) ? 0 : 1)) + return; + } + + memset(lin, 0x00, sizeof(lin)); + + /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx + * or is it the first byte ? + */ + memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map)); + + dummy.mask = 0xf; /* map all components of HPOS */ + dummy.linear = 0; + m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]); + + for (c = 0; c < vp->vp.clpd_nr; ++c) + map[m++] = vp->vp.clpd[c / 4] + (c % 4); + + colors |= m << 8; /* adjust BFC0 id */ + + dummy.mask = 0x0; + + /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */ + if (nv50->rast->pipe.light_twoside) { + for (i = 0; i < 2; ++i) { + n = vp->vp.bfc[i]; + if (fp->vp.bfc[i] >= fp->in_nr) + continue; + m = nv50_vec4_map(map, m, lin, &fp->in[fp->vp.bfc[i]], + (n < vp->out_nr) ? &vp->out[n] : &dummy); + } + } + colors += m - 4; /* adjust FFC0 id */ + interp |= m << 8; /* set map id where 'normal' FP inputs start */ + + for (i = 0; i < fp->in_nr; ++i) { + for (n = 0; n < vp->out_nr; ++n) + if (vp->out[n].sn == fp->in[i].sn && + vp->out[n].si == fp->in[i].si) + break; + m = nv50_vec4_map(map, m, lin, + &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy); + } + + /* PrimitiveID either is replaced by the system value, or + * written by the geometry shader into an output register + */ + if (fp->gp.primid < 0x80) { + primid = m; + map[m++] = vp->gp.primid; + } + + if (nv50->rast->pipe.point_size_per_vertex) { + psiz = (m << 4) | 1; + map[m++] = vp->vp.psiz; + } + + if (nv50->rast->pipe.clamp_vertex_color) + colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN; + + if (unlikely(vp->so)) { + /* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP + * gets written. + * + * TODO: + * Inverting vp->so->map (output -> offset) would probably speed this up. + */ + memset(so_map, 0, sizeof(so_map)); + for (i = 0; i < vp->so->map_size; ++i) { + if (vp->so->map[i] == 0xff) + continue; + for (c = 0; c < m; ++c) + if (map[c] == vp->so->map[i] && !so_map[c]) + break; + if (c == m) { + c = m; + map[m++] = vp->so->map[i]; + } + so_map[c] = 0x80 | i; + } + for (c = m; c & 3; ++c) + so_map[c] = 0; + } + + n = (m + 3) / 4; + assert(m <= 64); + + if (unlikely(nv50->gmtyprog)) { + BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP_SIZE), 1); + PUSH_DATA (push, m); + BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP(0)), n); + PUSH_DATAp(push, map, n); + } else { + BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1); + PUSH_DATA (push, vp->vp.attrs[2]); + + BEGIN_NV04(push, NV50_3D(SEMANTIC_PRIM_ID), 1); + PUSH_DATA (push, primid); + + BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1); + PUSH_DATA (push, m); + BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n); + PUSH_DATAp(push, map, n); + } + + BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 4); + PUSH_DATA (push, colors); + PUSH_DATA (push, (vp->vp.clpd_nr << 8) | 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, psiz); + + BEGIN_NV04(push, NV50_3D(FP_INTERPOLANT_CTRL), 1); + PUSH_DATA (push, interp); + + nv50->state.interpolant_ctrl = interp; + + nv50->state.semantic_color = colors; + nv50->state.semantic_psize = psiz; + + BEGIN_NV04(push, NV50_3D(NOPERSPECTIVE_BITMAP(0)), 4); + PUSH_DATAp(push, lin, 4); + + BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1); + PUSH_DATA (push, nv50->gmtyprog ? 1 : 0); + + if (vp->so) { + BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n); + PUSH_DATAp(push, so_map, n); + } +} + +static int +nv50_vp_gp_mapping(uint8_t *map, int m, + struct nv50_program *vp, struct nv50_program *gp) +{ + int i, j, c; + + for (i = 0; i < gp->in_nr; ++i) { + uint8_t oid = 0, mv = 0, mg = gp->in[i].mask; + + for (j = 0; j < vp->out_nr; ++j) { + if (vp->out[j].sn == gp->in[i].sn && + vp->out[j].si == gp->in[i].si) { + mv = vp->out[j].mask; + oid = vp->out[j].hw; + break; + } + } + + for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) { + if (mg & mv & 1) + map[m++] = oid; + else + if (mg & 1) + map[m++] = (c == 3) ? 0x41 : 0x40; + oid += mv & 1; + } + } + return m; +} + +void +nv50_gp_linkage_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *vp = nv50->vertprog; + struct nv50_program *gp = nv50->gmtyprog; + int m = 0; + int n; + uint8_t map[64]; + + if (!gp) + return; + memset(map, 0, sizeof(map)); + + m = nv50_vp_gp_mapping(map, m, vp, gp); + + n = (m + 3) / 4; + + BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1); + PUSH_DATA (push, vp->vp.attrs[2] | gp->vp.attrs[2]); + + BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1); + PUSH_DATA (push, m); + BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n); + PUSH_DATAp(push, map, n); +} + +void +nv50_stream_output_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_stream_output_state *so; + uint32_t ctrl; + unsigned i; + unsigned prims = ~0; + + so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so; + + BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1); + PUSH_DATA (push, 0); + if (!so || !nv50->num_so_targets) { + if (nv50->screen->base.class_3d < NVA0_3D_CLASS) { + BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1); + PUSH_DATA (push, 0); + } + BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1); + PUSH_DATA (push, 1); + return; + } + + /* previous TFB needs to complete */ + if (nv50->screen->base.class_3d < NVA0_3D_CLASS) { + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + } + + ctrl = so->ctrl; + if (nv50->screen->base.class_3d >= NVA0_3D_CLASS) + ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET; + + BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1); + PUSH_DATA (push, ctrl); + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO); + + for (i = 0; i < nv50->num_so_targets; ++i) { + struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]); + struct nv04_resource *buf = nv04_resource(targ->pipe.buffer); + + const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3; + + if (n == 4 && !targ->clean) + nv84_query_fifo_wait(push, targ->pq); + BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n); + PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset); + PUSH_DATA (push, buf->address + targ->pipe.buffer_offset); + PUSH_DATA (push, so->num_attribs[i]); + if (n == 4) { + PUSH_DATA(push, targ->pipe.buffer_size); + + BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1); + if (!targ->clean) { + assert(targ->pq); + nv50_query_pushbuf_submit(push, targ->pq, 0x4); + } else { + PUSH_DATA(push, 0); + targ->clean = FALSE; + } + } else { + const unsigned limit = targ->pipe.buffer_size / + (so->stride[i] * nv50->state.prim_size); + prims = MIN2(prims, limit); + } + BCTX_REFN(nv50->bufctx_3d, SO, buf, WR); + } + if (prims != ~0) { + BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1); + PUSH_DATA (push, prims); + } + BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1); + PUSH_DATA (push, 1); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c new file mode 100644 index 00000000000..7dceb51c19e --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -0,0 +1,1110 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "util/u_helpers.h" +#include "util/u_inlines.h" +#include "util/u_transfer.h" +#include "util/u_format_srgb.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv50/nv50_stateobj.h" +#include "nv50/nv50_context.h" + +#include "nv50/nv50_3d.xml.h" +#include "nv50/nv50_texture.xml.h" + +#include "nouveau_gldefs.h" + +/* Caveats: + * ! pipe_sampler_state.normalized_coords is ignored - rectangle textures will + * use non-normalized coordinates, everything else won't + * (The relevant bit is in the TIC entry and not the TSC entry.) + * + * ! pipe_sampler_state.seamless_cube_map is ignored - seamless filtering is + * always activated on NVA0 + + * (Give me the global bit, otherwise it's not worth the CPU work.) + * + * ! pipe_sampler_state.border_color is not swizzled according to the texture + * swizzle in pipe_sampler_view + * (This will be ugly with indirect independent texture/sampler access, + * we'd have to emulate the logic in the shader. GL doesn't have that, + * D3D doesn't have swizzle, if we knew what we were implementing we'd be + * good.) + * + * ! pipe_rasterizer_state.line_last_pixel is ignored - it is never drawn + * + * ! pipe_rasterizer_state.flatshade_first also applies to QUADS + * (There's a GL query for that, forcing an exception is just ridiculous.) + * + * ! pipe_rasterizer_state.half_pixel_center is ignored - pixel centers + * are always at half integer coordinates and the top-left rule applies + * (There does not seem to be a hardware switch for this.) + * + * ! pipe_rasterizer_state.sprite_coord_enable is masked with 0xff on NVC0 + * (The hardware only has 8 slots meant for TexCoord and we have to assign + * in advance to maintain elegant separate shader objects.) + */ + +static INLINE uint32_t +nv50_colormask(unsigned mask) +{ + uint32_t ret = 0; + + if (mask & PIPE_MASK_R) + ret |= 0x0001; + if (mask & PIPE_MASK_G) + ret |= 0x0010; + if (mask & PIPE_MASK_B) + ret |= 0x0100; + if (mask & PIPE_MASK_A) + ret |= 0x1000; + + return ret; +} + +#define NV50_BLEND_FACTOR_CASE(a, b) \ + case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b + +static INLINE uint32_t +nv50_blend_fac(unsigned factor) +{ + switch (factor) { + NV50_BLEND_FACTOR_CASE(ONE, ONE); + NV50_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR); + NV50_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA); + NV50_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA); + NV50_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR); + NV50_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE); + NV50_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR); + NV50_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA); + NV50_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR); + NV50_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA); + NV50_BLEND_FACTOR_CASE(ZERO, ZERO); + NV50_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR); + NV50_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA); + NV50_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA); + NV50_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR); + NV50_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR); + NV50_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA); + NV50_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR); + NV50_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA); + default: + return NV50_3D_BLEND_FACTOR_ZERO; + } +} + +static void * +nv50_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj); + int i; + boolean emit_common_func = cso->rt[0].blend_enable; + uint32_t ms; + + if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) { + SB_BEGIN_3D(so, BLEND_INDEPENDENT, 1); + SB_DATA (so, cso->independent_blend_enable); + } + + so->pipe = *cso; + + SB_BEGIN_3D(so, COLOR_MASK_COMMON, 1); + SB_DATA (so, !cso->independent_blend_enable); + + SB_BEGIN_3D(so, BLEND_ENABLE_COMMON, 1); + SB_DATA (so, !cso->independent_blend_enable); + + if (cso->independent_blend_enable) { + SB_BEGIN_3D(so, BLEND_ENABLE(0), 8); + for (i = 0; i < 8; ++i) { + SB_DATA(so, cso->rt[i].blend_enable); + if (cso->rt[i].blend_enable) + emit_common_func = TRUE; + } + + if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) { + emit_common_func = FALSE; + + for (i = 0; i < 8; ++i) { + if (!cso->rt[i].blend_enable) + continue; + SB_BEGIN_3D_(so, NVA3_3D_IBLEND_EQUATION_RGB(i), 6); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func)); + SB_DATA (so, nv50_blend_fac(cso->rt[i].rgb_src_factor)); + SB_DATA (so, nv50_blend_fac(cso->rt[i].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func)); + SB_DATA (so, nv50_blend_fac(cso->rt[i].alpha_src_factor)); + SB_DATA (so, nv50_blend_fac(cso->rt[i].alpha_dst_factor)); + } + } + } else { + SB_BEGIN_3D(so, BLEND_ENABLE(0), 1); + SB_DATA (so, cso->rt[0].blend_enable); + } + + if (emit_common_func) { + SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5); + SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); + SB_DATA (so, nv50_blend_fac(cso->rt[0].rgb_src_factor)); + SB_DATA (so, nv50_blend_fac(cso->rt[0].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[0].alpha_func)); + SB_DATA (so, nv50_blend_fac(cso->rt[0].alpha_src_factor)); + SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1); + SB_DATA (so, nv50_blend_fac(cso->rt[0].alpha_dst_factor)); + } + + if (cso->logicop_enable) { + SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2); + SB_DATA (so, 1); + SB_DATA (so, nvgl_logicop_func(cso->logicop_func)); + } else { + SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 1); + SB_DATA (so, 0); + } + + if (cso->independent_blend_enable) { + SB_BEGIN_3D(so, COLOR_MASK(0), 8); + for (i = 0; i < 8; ++i) + SB_DATA(so, nv50_colormask(cso->rt[i].colormask)); + } else { + SB_BEGIN_3D(so, COLOR_MASK(0), 1); + SB_DATA (so, nv50_colormask(cso->rt[0].colormask)); + } + + ms = 0; + if (cso->alpha_to_coverage) + ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE; + if (cso->alpha_to_one) + ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE; + + SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1); + SB_DATA (so, ms); + + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); + return so; +} + +static void +nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->blend = hwcso; + nv50->dirty |= NV50_NEW_BLEND; +} + +static void +nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +/* NOTE: ignoring line_last_pixel, using FALSE (set on screen init) */ +static void * +nv50_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv50_rasterizer_stateobj *so; + uint32_t reg; + + so = CALLOC_STRUCT(nv50_rasterizer_stateobj); + if (!so) + return NULL; + so->pipe = *cso; + +#ifndef NV50_SCISSORS_CLIPPING + SB_BEGIN_3D(so, SCISSOR_ENABLE(0), 1); + SB_DATA (so, cso->scissor); +#endif + + SB_BEGIN_3D(so, SHADE_MODEL, 1); + SB_DATA (so, cso->flatshade ? NV50_3D_SHADE_MODEL_FLAT : + NV50_3D_SHADE_MODEL_SMOOTH); + SB_BEGIN_3D(so, PROVOKING_VERTEX_LAST, 1); + SB_DATA (so, !cso->flatshade_first); + SB_BEGIN_3D(so, VERTEX_TWO_SIDE_ENABLE, 1); + SB_DATA (so, cso->light_twoside); + + SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1); + SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000); + + SB_BEGIN_3D(so, MULTISAMPLE_ENABLE, 1); + SB_DATA (so, cso->multisample); + + SB_BEGIN_3D(so, LINE_WIDTH, 1); + SB_DATA (so, fui(cso->line_width)); + SB_BEGIN_3D(so, LINE_SMOOTH_ENABLE, 1); + SB_DATA (so, cso->line_smooth); + + SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1); + if (cso->line_stipple_enable) { + SB_DATA (so, 1); + SB_BEGIN_3D(so, LINE_STIPPLE, 1); + SB_DATA (so, (cso->line_stipple_pattern << 8) | + cso->line_stipple_factor); + } else { + SB_DATA (so, 0); + } + + if (!cso->point_size_per_vertex) { + SB_BEGIN_3D(so, POINT_SIZE, 1); + SB_DATA (so, fui(cso->point_size)); + } + SB_BEGIN_3D(so, POINT_SPRITE_ENABLE, 1); + SB_DATA (so, cso->point_quad_rasterization); + SB_BEGIN_3D(so, POINT_SMOOTH_ENABLE, 1); + SB_DATA (so, cso->point_smooth); + + SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 3); + SB_DATA (so, nvgl_polygon_mode(cso->fill_front)); + SB_DATA (so, nvgl_polygon_mode(cso->fill_back)); + SB_DATA (so, cso->poly_smooth); + + SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3); + SB_DATA (so, cso->cull_face != PIPE_FACE_NONE); + SB_DATA (so, cso->front_ccw ? NV50_3D_FRONT_FACE_CCW : + NV50_3D_FRONT_FACE_CW); + switch (cso->cull_face) { + case PIPE_FACE_FRONT_AND_BACK: + SB_DATA(so, NV50_3D_CULL_FACE_FRONT_AND_BACK); + break; + case PIPE_FACE_FRONT: + SB_DATA(so, NV50_3D_CULL_FACE_FRONT); + break; + case PIPE_FACE_BACK: + default: + SB_DATA(so, NV50_3D_CULL_FACE_BACK); + break; + } + + SB_BEGIN_3D(so, POLYGON_STIPPLE_ENABLE, 1); + SB_DATA (so, cso->poly_stipple_enable); + SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3); + SB_DATA (so, cso->offset_point); + SB_DATA (so, cso->offset_line); + SB_DATA (so, cso->offset_tri); + + if (cso->offset_point || cso->offset_line || cso->offset_tri) { + SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1); + SB_DATA (so, fui(cso->offset_scale)); + SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1); + SB_DATA (so, fui(cso->offset_units * 2.0f)); + SB_BEGIN_3D(so, POLYGON_OFFSET_CLAMP, 1); + SB_DATA (so, fui(cso->offset_clamp)); + } + + if (cso->depth_clip) { + reg = 0; + } else { + reg = + NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR | + NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR | + NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1; + } +#ifndef NV50_SCISSORS_CLIPPING + reg |= + NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 | + NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1; +#endif + SB_BEGIN_3D(so, VIEW_VOLUME_CLIP_CTRL, 1); + SB_DATA (so, reg); + + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; +} + +static void +nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->rast = hwcso; + nv50->dirty |= NV50_NEW_RASTERIZER; +} + +static void +nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nv50_zsa_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv50_zsa_stateobj *so = CALLOC_STRUCT(nv50_zsa_stateobj); + + so->pipe = *cso; + + SB_BEGIN_3D(so, DEPTH_WRITE_ENABLE, 1); + SB_DATA (so, cso->depth.writemask); + SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1); + if (cso->depth.enabled) { + SB_DATA (so, 1); + SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1); + SB_DATA (so, nvgl_comparison_op(cso->depth.func)); + } else { + SB_DATA (so, 0); + } + + if (cso->stencil[0].enabled) { + SB_BEGIN_3D(so, STENCIL_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func)); + SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2); + SB_DATA (so, cso->stencil[0].writemask); + SB_DATA (so, cso->stencil[0].valuemask); + } else { + SB_BEGIN_3D(so, STENCIL_ENABLE, 1); + SB_DATA (so, 0); + } + + if (cso->stencil[1].enabled) { + assert(cso->stencil[0].enabled); + SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func)); + SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2); + SB_DATA (so, cso->stencil[1].writemask); + SB_DATA (so, cso->stencil[1].valuemask); + } else { + SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 1); + SB_DATA (so, 0); + } + + SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1); + if (cso->alpha.enabled) { + SB_DATA (so, 1); + SB_BEGIN_3D(so, ALPHA_TEST_REF, 2); + SB_DATA (so, fui(cso->alpha.ref_value)); + SB_DATA (so, nvgl_comparison_op(cso->alpha.func)); + } else { + SB_DATA (so, 0); + } + + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; +} + +static void +nv50_zsa_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->zsa = hwcso; + nv50->dirty |= NV50_NEW_ZSA; +} + +static void +nv50_zsa_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +/* ====================== SAMPLERS AND TEXTURES ================================ + */ + +#define NV50_TSC_WRAP_CASE(n) \ + case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n + +static INLINE unsigned +nv50_tsc_wrap_mode(unsigned wrap) +{ + switch (wrap) { + NV50_TSC_WRAP_CASE(REPEAT); + NV50_TSC_WRAP_CASE(MIRROR_REPEAT); + NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(CLAMP); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP); + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + return NV50_TSC_WRAP_REPEAT; + } +} + +void * +nv50_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv50_tsc_entry *so = MALLOC_STRUCT(nv50_tsc_entry); + float f[2]; + + so->id = -1; + + so->tsc[0] = (0x00026000 | + (nv50_tsc_wrap_mode(cso->wrap_s) << 0) | + (nv50_tsc_wrap_mode(cso->wrap_t) << 3) | + (nv50_tsc_wrap_mode(cso->wrap_r) << 6)); + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + so->tsc[1] = NV50_TSC_1_MAGF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + so->tsc[1] = NV50_TSC_1_MAGF_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + so->tsc[1] |= NV50_TSC_1_MINF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + so->tsc[1] |= NV50_TSC_1_MINF_NEAREST; + break; + } + + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_LINEAR: + so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + so->tsc[1] |= NV50_TSC_1_MIPF_NONE; + break; + } + + if (nouveau_screen(pipe->screen)->class_3d >= NVE4_3D_CLASS) { + if (cso->seamless_cube_map) + so->tsc[1] |= NVE4_TSC_1_CUBE_SEAMLESS; + if (!cso->normalized_coords) + so->tsc[1] |= NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS; + } + + if (cso->max_anisotropy >= 16) + so->tsc[0] |= (7 << 20); + else + if (cso->max_anisotropy >= 12) + so->tsc[0] |= (6 << 20); + else { + so->tsc[0] |= (cso->max_anisotropy >> 1) << 20; + + if (cso->max_anisotropy >= 4) + so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35; + else + if (cso->max_anisotropy >= 2) + so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15; + } + + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + /* NOTE: must be deactivated for non-shadow textures */ + so->tsc[0] |= (1 << 9); + so->tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10; + } + + f[0] = CLAMP(cso->lod_bias, -16.0f, 15.0f); + so->tsc[1] |= ((int)(f[0] * 256.0f) & 0x1fff) << 12; + + f[0] = CLAMP(cso->min_lod, 0.0f, 15.0f); + f[1] = CLAMP(cso->max_lod, 0.0f, 15.0f); + so->tsc[2] = + (((int)(f[1] * 256.0f) & 0xfff) << 12) | ((int)(f[0] * 256.0f) & 0xfff); + + so->tsc[2] |= + util_format_linear_float_to_srgb_8unorm(cso->border_color.f[0]) << 24; + so->tsc[3] = + util_format_linear_float_to_srgb_8unorm(cso->border_color.f[1]) << 12; + so->tsc[3] |= + util_format_linear_float_to_srgb_8unorm(cso->border_color.f[2]) << 20; + + so->tsc[4] = fui(cso->border_color.f[0]); + so->tsc[5] = fui(cso->border_color.f[1]); + so->tsc[6] = fui(cso->border_color.f[2]); + so->tsc[7] = fui(cso->border_color.f[3]); + + return (void *)so; +} + +static void +nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + unsigned s, i; + + for (s = 0; s < 3; ++s) + for (i = 0; i < nv50_context(pipe)->num_samplers[s]; ++i) + if (nv50_context(pipe)->samplers[s][i] == hwcso) + nv50_context(pipe)->samplers[s][i] = NULL; + + nv50_screen_tsc_free(nv50_context(pipe)->screen, nv50_tsc_entry(hwcso)); + + FREE(hwcso); +} + +static INLINE void +nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s, + unsigned nr, void **hwcso) +{ + unsigned i; + + for (i = 0; i < nr; ++i) { + struct nv50_tsc_entry *old = nv50->samplers[s][i]; + + nv50->samplers[s][i] = nv50_tsc_entry(hwcso[i]); + if (old) + nv50_screen_tsc_unlock(nv50->screen, old); + } + for (; i < nv50->num_samplers[s]; ++i) + if (nv50->samplers[s][i]) + nv50_screen_tsc_unlock(nv50->screen, nv50->samplers[s][i]); + + nv50->num_samplers[s] = nr; + + nv50->dirty |= NV50_NEW_SAMPLERS; +} + +static void +nv50_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nv50_stage_sampler_states_bind(nv50_context(pipe), 0, nr, s); +} + +static void +nv50_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nv50_stage_sampler_states_bind(nv50_context(pipe), 2, nr, s); +} + +static void +nv50_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nv50_stage_sampler_states_bind(nv50_context(pipe), 1, nr, s); +} + +/* NOTE: only called when not referenced anywhere, won't be bound */ +static void +nv50_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + + nv50_screen_tic_free(nv50_context(pipe)->screen, nv50_tic_entry(view)); + + FREE(nv50_tic_entry(view)); +} + +static INLINE void +nv50_stage_set_sampler_views(struct nv50_context *nv50, int s, + unsigned nr, + struct pipe_sampler_view **views) +{ + unsigned i; + + for (i = 0; i < nr; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]); + if (old) + nv50_screen_tic_unlock(nv50->screen, old); + + pipe_sampler_view_reference(&nv50->textures[s][i], views[i]); + } + + for (i = nr; i < nv50->num_textures[s]; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]); + if (!old) + continue; + nv50_screen_tic_unlock(nv50->screen, old); + + pipe_sampler_view_reference(&nv50->textures[s][i], NULL); + } + + nv50->num_textures[s] = nr; + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + + nv50->dirty |= NV50_NEW_TEXTURES; +} + +static void +nv50_vp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nv50_stage_set_sampler_views(nv50_context(pipe), 0, nr, views); +} + +static void +nv50_fp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nv50_stage_set_sampler_views(nv50_context(pipe), 2, nr, views); +} + +static void +nv50_gp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nv50_stage_set_sampler_views(nv50_context(pipe), 1, nr, views); +} + +/* ============================= SHADERS ======================================= + */ + +static void * +nv50_sp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso, unsigned type) +{ + struct nv50_program *prog; + + prog = CALLOC_STRUCT(nv50_program); + if (!prog) + return NULL; + + prog->type = type; + prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + if (cso->stream_output.num_outputs) + prog->pipe.stream_output = cso->stream_output; + + return (void *)prog; +} + +static void +nv50_sp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_program *prog = (struct nv50_program *)hwcso; + + nv50_program_destroy(nv50_context(pipe), prog); + + FREE((void *)prog->pipe.tokens); + FREE(prog); +} + +static void * +nv50_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nv50_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX); +} + +static void +nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->vertprog = hwcso; + nv50->dirty |= NV50_NEW_VERTPROG; +} + +static void * +nv50_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nv50_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT); +} + +static void +nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->fragprog = hwcso; + nv50->dirty |= NV50_NEW_FRAGPROG; +} + +static void * +nv50_gp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nv50_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY); +} + +static void +nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->gmtyprog = hwcso; + nv50->dirty |= NV50_NEW_GMTYPROG; +} + +static void +nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + struct pipe_constant_buffer *cb) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct pipe_resource *res = cb ? cb->buffer : NULL; + const unsigned s = nv50_context_shader_stage(shader); + const unsigned i = index; + + if (shader == PIPE_SHADER_COMPUTE) + return; + + if (nv50->constbuf[s][i].user) + nv50->constbuf[s][i].u.buf = NULL; + else + if (nv50->constbuf[s][i].u.buf) + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i)); + + pipe_resource_reference(&nv50->constbuf[s][i].u.buf, res); + + nv50->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE; + if (nv50->constbuf[s][i].user) { + nv50->constbuf[s][i].u.data = cb->user_buffer; + nv50->constbuf[s][i].size = cb->buffer_size; + nv50->constbuf_valid[s] |= 1 << i; + } else + if (res) { + nv50->constbuf[s][i].offset = cb->buffer_offset; + nv50->constbuf[s][i].size = align(cb->buffer_size, 0x100); + nv50->constbuf_valid[s] |= 1 << i; + } else { + nv50->constbuf_valid[s] &= ~(1 << i); + } + nv50->constbuf_dirty[s] |= 1 << i; + + nv50->dirty |= NV50_NEW_CONSTBUF; +} + +/* ============================================================================= + */ + +static void +nv50_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->blend_colour = *bcol; + nv50->dirty |= NV50_NEW_BLEND_COLOUR; +} + +static void +nv50_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *sr) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->stencil_ref = *sr; + nv50->dirty |= NV50_NEW_STENCIL_REF; +} + +static void +nv50_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + memcpy(nv50->clip.ucp, clip->ucp, sizeof(clip->ucp)); + + nv50->dirty |= NV50_NEW_CLIP; +} + +static void +nv50_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->sample_mask = sample_mask; + nv50->dirty |= NV50_NEW_SAMPLE_MASK; +} + + +static void +nv50_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv50_context *nv50 = nv50_context(pipe); + unsigned i; + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + + for (i = 0; i < fb->nr_cbufs; ++i) + pipe_surface_reference(&nv50->framebuffer.cbufs[i], fb->cbufs[i]); + for (; i < nv50->framebuffer.nr_cbufs; ++i) + pipe_surface_reference(&nv50->framebuffer.cbufs[i], NULL); + + nv50->framebuffer.nr_cbufs = fb->nr_cbufs; + + nv50->framebuffer.width = fb->width; + nv50->framebuffer.height = fb->height; + + pipe_surface_reference(&nv50->framebuffer.zsbuf, fb->zsbuf); + + nv50->dirty |= NV50_NEW_FRAMEBUFFER; +} + +static void +nv50_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->stipple = *stipple; + nv50->dirty |= NV50_NEW_STIPPLE; +} + +static void +nv50_set_scissor_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *scissor) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->scissor = *scissor; + nv50->dirty |= NV50_NEW_SCISSOR; +} + +static void +nv50_set_viewport_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *vpt) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->viewport = *vpt; + nv50->dirty |= NV50_NEW_VIEWPORT; +} + +static void +nv50_set_vertex_buffers(struct pipe_context *pipe, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv50_context *nv50 = nv50_context(pipe); + unsigned i; + + util_set_vertex_buffers_count(nv50->vtxbuf, &nv50->num_vtxbufs, vb, + start_slot, count); + + if (!vb) { + nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot); + nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot); + return; + } + + for (i = 0; i < count; ++i) { + unsigned dst_index = start_slot + i; + + if (!vb[i].buffer && vb[i].user_buffer) { + nv50->vbo_user |= 1 << dst_index; + if (!vb[i].stride) + nv50->vbo_constant |= 1 << dst_index; + else + nv50->vbo_constant &= ~(1 << dst_index); + } else { + nv50->vbo_user &= ~(1 << dst_index); + nv50->vbo_constant &= ~(1 << dst_index); + } + } + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX); + + nv50->dirty |= NV50_NEW_ARRAYS; +} + +static void +nv50_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + if (nv50->idxbuf.buffer) + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX); + + if (ib) { + pipe_resource_reference(&nv50->idxbuf.buffer, ib->buffer); + nv50->idxbuf.index_size = ib->index_size; + if (ib->buffer) { + nv50->idxbuf.offset = ib->offset; + BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(ib->buffer), RD); + } else { + nv50->idxbuf.user_buffer = ib->user_buffer; + } + } else { + pipe_resource_reference(&nv50->idxbuf.buffer, NULL); + } +} + +static void +nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->vertex = hwcso; + nv50->dirty |= NV50_NEW_VERTEX; +} + +static struct pipe_stream_output_target * +nv50_so_target_create(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size) +{ + struct nv50_so_target *targ = MALLOC_STRUCT(nv50_so_target); + if (!targ) + return NULL; + + if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) { + targ->pq = pipe->create_query(pipe, + NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET); + if (!targ->pq) { + FREE(targ); + return NULL; + } + } else { + targ->pq = NULL; + } + targ->clean = TRUE; + + targ->pipe.buffer_size = size; + targ->pipe.buffer_offset = offset; + targ->pipe.context = pipe; + targ->pipe.buffer = NULL; + pipe_resource_reference(&targ->pipe.buffer, res); + pipe_reference_init(&targ->pipe.reference, 1); + + return &targ->pipe; +} + +static void +nv50_so_target_destroy(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg) +{ + struct nv50_so_target *targ = nv50_so_target(ptarg); + if (targ->pq) + pipe->destroy_query(pipe, targ->pq); + pipe_resource_reference(&targ->pipe.buffer, NULL); + FREE(targ); +} + +static void +nv50_set_stream_output_targets(struct pipe_context *pipe, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_mask) +{ + struct nv50_context *nv50 = nv50_context(pipe); + unsigned i; + boolean serialize = TRUE; + const boolean can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS; + + assert(num_targets <= 4); + + for (i = 0; i < num_targets; ++i) { + const boolean changed = nv50->so_target[i] != targets[i]; + if (!changed && (append_mask & (1 << i))) + continue; + nv50->so_targets_dirty |= 1 << i; + + if (can_resume && changed && nv50->so_target[i]) { + nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize); + serialize = FALSE; + } + + if (targets[i] && !(append_mask & (1 << i))) + nv50_so_target(targets[i])->clean = TRUE; + + pipe_so_target_reference(&nv50->so_target[i], targets[i]); + } + for (; i < nv50->num_so_targets; ++i) { + if (can_resume && nv50->so_target[i]) { + nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize); + serialize = FALSE; + } + pipe_so_target_reference(&nv50->so_target[i], NULL); + nv50->so_targets_dirty |= 1 << i; + } + nv50->num_so_targets = num_targets; + + if (nv50->so_targets_dirty) + nv50->dirty |= NV50_NEW_STRMOUT; +} + +void +nv50_init_state_functions(struct nv50_context *nv50) +{ + struct pipe_context *pipe = &nv50->base.pipe; + + pipe->create_blend_state = nv50_blend_state_create; + pipe->bind_blend_state = nv50_blend_state_bind; + pipe->delete_blend_state = nv50_blend_state_delete; + + pipe->create_rasterizer_state = nv50_rasterizer_state_create; + pipe->bind_rasterizer_state = nv50_rasterizer_state_bind; + pipe->delete_rasterizer_state = nv50_rasterizer_state_delete; + + pipe->create_depth_stencil_alpha_state = nv50_zsa_state_create; + pipe->bind_depth_stencil_alpha_state = nv50_zsa_state_bind; + pipe->delete_depth_stencil_alpha_state = nv50_zsa_state_delete; + + pipe->create_sampler_state = nv50_sampler_state_create; + pipe->delete_sampler_state = nv50_sampler_state_delete; + pipe->bind_vertex_sampler_states = nv50_vp_sampler_states_bind; + pipe->bind_fragment_sampler_states = nv50_fp_sampler_states_bind; + pipe->bind_geometry_sampler_states = nv50_gp_sampler_states_bind; + + pipe->create_sampler_view = nv50_create_sampler_view; + pipe->sampler_view_destroy = nv50_sampler_view_destroy; + pipe->set_vertex_sampler_views = nv50_vp_set_sampler_views; + pipe->set_fragment_sampler_views = nv50_fp_set_sampler_views; + pipe->set_geometry_sampler_views = nv50_gp_set_sampler_views; + + pipe->create_vs_state = nv50_vp_state_create; + pipe->create_fs_state = nv50_fp_state_create; + pipe->create_gs_state = nv50_gp_state_create; + pipe->bind_vs_state = nv50_vp_state_bind; + pipe->bind_fs_state = nv50_fp_state_bind; + pipe->bind_gs_state = nv50_gp_state_bind; + pipe->delete_vs_state = nv50_sp_state_delete; + pipe->delete_fs_state = nv50_sp_state_delete; + pipe->delete_gs_state = nv50_sp_state_delete; + + pipe->set_blend_color = nv50_set_blend_color; + pipe->set_stencil_ref = nv50_set_stencil_ref; + pipe->set_clip_state = nv50_set_clip_state; + pipe->set_sample_mask = nv50_set_sample_mask; + pipe->set_constant_buffer = nv50_set_constant_buffer; + pipe->set_framebuffer_state = nv50_set_framebuffer_state; + pipe->set_polygon_stipple = nv50_set_polygon_stipple; + pipe->set_scissor_states = nv50_set_scissor_states; + pipe->set_viewport_states = nv50_set_viewport_states; + + pipe->create_vertex_elements_state = nv50_vertex_state_create; + pipe->delete_vertex_elements_state = nv50_vertex_state_delete; + pipe->bind_vertex_elements_state = nv50_vertex_state_bind; + + pipe->set_vertex_buffers = nv50_set_vertex_buffers; + pipe->set_index_buffer = nv50_set_index_buffer; + + pipe->create_stream_output_target = nv50_so_target_create; + pipe->stream_output_target_destroy = nv50_so_target_destroy; + pipe->set_stream_output_targets = nv50_set_stream_output_targets; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c new file mode 100644 index 00000000000..866829ca22d --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -0,0 +1,414 @@ + +#include "nv50/nv50_context.h" +#include "os/os_time.h" + +static void +nv50_validate_fb(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct pipe_framebuffer_state *fb = &nv50->framebuffer; + unsigned i; + unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1; + uint32_t array_size = 0xffff, array_mode = 0; + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + + BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); + PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs); + BEGIN_NV04(push, NV50_3D(SCREEN_SCISSOR_HORIZ), 2); + PUSH_DATA (push, fb->width << 16); + PUSH_DATA (push, fb->height << 16); + + for (i = 0; i < fb->nr_cbufs; ++i) { + struct nv50_miptree *mt = nv50_miptree(fb->cbufs[i]->texture); + struct nv50_surface *sf = nv50_surface(fb->cbufs[i]); + struct nouveau_bo *bo = mt->base.bo; + + array_size = MIN2(array_size, sf->depth); + if (mt->layout_3d) + array_mode = NV50_3D_RT_ARRAY_MODE_MODE_3D; /* 1 << 16 */ + + /* can't mix 3D with ARRAY or have RTs of different depth/array_size */ + assert(mt->layout_3d || !array_mode || array_size == 1); + + BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(i)), 5); + PUSH_DATAh(push, bo->offset + sf->offset); + PUSH_DATA (push, bo->offset + sf->offset); + PUSH_DATA (push, nv50_format_table[sf->base.format].rt); + if (likely(nouveau_bo_memtype(bo))) { + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, mt->layer_stride >> 2); + BEGIN_NV04(push, NV50_3D(RT_HORIZ(i)), 2); + PUSH_DATA (push, sf->width); + PUSH_DATA (push, sf->height); + BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1); + PUSH_DATA (push, array_mode | array_size); + } else { + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(RT_HORIZ(i)), 2); + PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | mt->level[0].pitch); + PUSH_DATA (push, sf->height); + BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1); + PUSH_DATA (push, 0); + + assert(!fb->zsbuf); + assert(!mt->ms_mode); + } + + ms_mode = mt->ms_mode; + + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) + nv50->state.rt_serialize = TRUE; + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING; + + /* only register for writing, otherwise we'd always serialize here */ + BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR); + } + + if (fb->zsbuf) { + struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture); + struct nv50_surface *sf = nv50_surface(fb->zsbuf); + struct nouveau_bo *bo = mt->base.bo; + int unk = mt->base.base.target == PIPE_TEXTURE_3D || sf->depth == 1; + + BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5); + PUSH_DATAh(push, bo->offset + sf->offset); + PUSH_DATA (push, bo->offset + sf->offset); + PUSH_DATA (push, nv50_format_table[fb->zsbuf->format].rt); + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, mt->layer_stride >> 2); + BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(ZETA_HORIZ), 3); + PUSH_DATA (push, sf->width); + PUSH_DATA (push, sf->height); + PUSH_DATA (push, (unk << 16) | sf->depth); + + ms_mode = mt->ms_mode; + + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) + nv50->state.rt_serialize = TRUE; + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING; + + BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR); + } else { + BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 0); + } + + BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1); + PUSH_DATA (push, ms_mode); + + BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2); + PUSH_DATA (push, fb->width << 16); + PUSH_DATA (push, fb->height << 16); +} + +static void +nv50_validate_blend_colour(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + BEGIN_NV04(push, NV50_3D(BLEND_COLOR(0)), 4); + PUSH_DATAf(push, nv50->blend_colour.color[0]); + PUSH_DATAf(push, nv50->blend_colour.color[1]); + PUSH_DATAf(push, nv50->blend_colour.color[2]); + PUSH_DATAf(push, nv50->blend_colour.color[3]); +} + +static void +nv50_validate_stencil_ref(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + BEGIN_NV04(push, NV50_3D(STENCIL_FRONT_FUNC_REF), 1); + PUSH_DATA (push, nv50->stencil_ref.ref_value[0]); + BEGIN_NV04(push, NV50_3D(STENCIL_BACK_FUNC_REF), 1); + PUSH_DATA (push, nv50->stencil_ref.ref_value[1]); +} + +static void +nv50_validate_stipple(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned i; + + BEGIN_NV04(push, NV50_3D(POLYGON_STIPPLE_PATTERN(0)), 32); + for (i = 0; i < 32; ++i) + PUSH_DATA(push, util_bswap32(nv50->stipple.stipple[i])); +} + +static void +nv50_validate_scissor(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct pipe_scissor_state *s = &nv50->scissor; +#ifdef NV50_SCISSORS_CLIPPING + struct pipe_viewport_state *vp = &nv50->viewport; + int minx, maxx, miny, maxy; + + if (!(nv50->dirty & + (NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | NV50_NEW_FRAMEBUFFER)) && + nv50->state.scissor == nv50->rast->pipe.scissor) + return; + nv50->state.scissor = nv50->rast->pipe.scissor; + + if (nv50->state.scissor) { + minx = s->minx; + maxx = s->maxx; + miny = s->miny; + maxy = s->maxy; + } else { + minx = 0; + maxx = nv50->framebuffer.width; + miny = 0; + maxy = nv50->framebuffer.height; + } + + minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0]))); + maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0]))); + miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1]))); + maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1]))); + + BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2); + PUSH_DATA (push, (maxx << 16) | minx); + PUSH_DATA (push, (maxy << 16) | miny); +#else + BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2); + PUSH_DATA (push, (s->maxx << 16) | s->minx); + PUSH_DATA (push, (s->maxy << 16) | s->miny); +#endif +} + +static void +nv50_validate_viewport(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + float zmin, zmax; + + BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSLATE_X(0)), 3); + PUSH_DATAf(push, nv50->viewport.translate[0]); + PUSH_DATAf(push, nv50->viewport.translate[1]); + PUSH_DATAf(push, nv50->viewport.translate[2]); + BEGIN_NV04(push, NV50_3D(VIEWPORT_SCALE_X(0)), 3); + PUSH_DATAf(push, nv50->viewport.scale[0]); + PUSH_DATAf(push, nv50->viewport.scale[1]); + PUSH_DATAf(push, nv50->viewport.scale[2]); + + zmin = nv50->viewport.translate[2] - fabsf(nv50->viewport.scale[2]); + zmax = nv50->viewport.translate[2] + fabsf(nv50->viewport.scale[2]); + +#ifdef NV50_SCISSORS_CLIPPING + BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(0)), 2); + PUSH_DATAf(push, zmin); + PUSH_DATAf(push, zmax); +#endif +} + +static INLINE void +nv50_check_program_ucps(struct nv50_context *nv50, + struct nv50_program *vp, uint8_t mask) +{ + const unsigned n = util_logbase2(mask) + 1; + + if (vp->vp.clpd_nr >= n) + return; + nv50_program_destroy(nv50, vp); + + vp->vp.clpd_nr = n; + if (likely(vp == nv50->vertprog)) { + nv50->dirty |= NV50_NEW_VERTPROG; + nv50_vertprog_validate(nv50); + } else { + nv50->dirty |= NV50_NEW_GMTYPROG; + nv50_gmtyprog_validate(nv50); + } + nv50_fp_linkage_validate(nv50); +} + +static void +nv50_validate_clip(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *vp; + uint8_t clip_enable; + + if (nv50->dirty & NV50_NEW_CLIP) { + BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); + PUSH_DATA (push, (0 << 8) | NV50_CB_AUX); + BEGIN_NI04(push, NV50_3D(CB_DATA(0)), PIPE_MAX_CLIP_PLANES * 4); + PUSH_DATAp(push, &nv50->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4); + } + + vp = nv50->gmtyprog; + if (likely(!vp)) + vp = nv50->vertprog; + + clip_enable = nv50->rast->pipe.clip_plane_enable; + + BEGIN_NV04(push, NV50_3D(CLIP_DISTANCE_ENABLE), 1); + PUSH_DATA (push, clip_enable); + + if (clip_enable) + nv50_check_program_ucps(nv50, vp, clip_enable); +} + +static void +nv50_validate_blend(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + PUSH_SPACE(push, nv50->blend->size); + PUSH_DATAp(push, nv50->blend->state, nv50->blend->size); +} + +static void +nv50_validate_zsa(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + PUSH_SPACE(push, nv50->zsa->size); + PUSH_DATAp(push, nv50->zsa->state, nv50->zsa->size); +} + +static void +nv50_validate_rasterizer(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + PUSH_SPACE(push, nv50->rast->size); + PUSH_DATAp(push, nv50->rast->state, nv50->rast->size); +} + +static void +nv50_validate_sample_mask(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + unsigned mask[4] = + { + nv50->sample_mask & 0xffff, + nv50->sample_mask & 0xffff, + nv50->sample_mask & 0xffff, + nv50->sample_mask & 0xffff + }; + + BEGIN_NV04(push, NV50_3D(MSAA_MASK(0)), 4); + PUSH_DATA (push, mask[0]); + PUSH_DATA (push, mask[1]); + PUSH_DATA (push, mask[2]); + PUSH_DATA (push, mask[3]); +} + +static void +nv50_switch_pipe_context(struct nv50_context *ctx_to) +{ + struct nv50_context *ctx_from = ctx_to->screen->cur_ctx; + + if (ctx_from) + ctx_to->state = ctx_from->state; + + ctx_to->dirty = ~0; + + if (!ctx_to->vertex) + ctx_to->dirty &= ~(NV50_NEW_VERTEX | NV50_NEW_ARRAYS); + + if (!ctx_to->vertprog) + ctx_to->dirty &= ~NV50_NEW_VERTPROG; + if (!ctx_to->fragprog) + ctx_to->dirty &= ~NV50_NEW_FRAGPROG; + + if (!ctx_to->blend) + ctx_to->dirty &= ~NV50_NEW_BLEND; + if (!ctx_to->rast) +#ifdef NV50_SCISSORS_CLIPPING + ctx_to->dirty &= ~(NV50_NEW_RASTERIZER | NV50_NEW_SCISSOR); +#else + ctx_to->dirty &= ~NV50_NEW_RASTERIZER; +#endif + if (!ctx_to->zsa) + ctx_to->dirty &= ~NV50_NEW_ZSA; + + ctx_to->screen->cur_ctx = ctx_to; +} + +static struct state_validate { + void (*func)(struct nv50_context *); + uint32_t states; +} validate_list[] = { + { nv50_validate_fb, NV50_NEW_FRAMEBUFFER }, + { nv50_validate_blend, NV50_NEW_BLEND }, + { nv50_validate_zsa, NV50_NEW_ZSA }, + { nv50_validate_sample_mask, NV50_NEW_SAMPLE_MASK }, + { nv50_validate_rasterizer, NV50_NEW_RASTERIZER }, + { nv50_validate_blend_colour, NV50_NEW_BLEND_COLOUR }, + { nv50_validate_stencil_ref, NV50_NEW_STENCIL_REF }, + { nv50_validate_stipple, NV50_NEW_STIPPLE }, +#ifdef NV50_SCISSORS_CLIPPING + { nv50_validate_scissor, NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | + NV50_NEW_RASTERIZER | + NV50_NEW_FRAMEBUFFER }, +#else + { nv50_validate_scissor, NV50_NEW_SCISSOR }, +#endif + { nv50_validate_viewport, NV50_NEW_VIEWPORT }, + { nv50_vertprog_validate, NV50_NEW_VERTPROG }, + { nv50_gmtyprog_validate, NV50_NEW_GMTYPROG }, + { nv50_fragprog_validate, NV50_NEW_FRAGPROG }, + { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | + NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER }, + { nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG }, + { nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, + { nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, + { nv50_constbufs_validate, NV50_NEW_CONSTBUF }, + { nv50_validate_textures, NV50_NEW_TEXTURES }, + { nv50_validate_samplers, NV50_NEW_SAMPLERS }, + { nv50_stream_output_validate, NV50_NEW_STRMOUT | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, + { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS } +}; +#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) + +boolean +nv50_state_validate(struct nv50_context *nv50, uint32_t mask, unsigned words) +{ + uint32_t state_mask; + int ret; + unsigned i; + + if (nv50->screen->cur_ctx != nv50) + nv50_switch_pipe_context(nv50); + + state_mask = nv50->dirty & mask; + + if (state_mask) { + for (i = 0; i < validate_list_len; ++i) { + struct state_validate *validate = &validate_list[i]; + + if (state_mask & validate->states) + validate->func(nv50); + } + nv50->dirty &= ~state_mask; + + if (nv50->state.rt_serialize) { + nv50->state.rt_serialize = FALSE; + BEGIN_NV04(nv50->base.pushbuf, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (nv50->base.pushbuf, 0); + } + + nv50_bufctx_fence(nv50->bufctx_3d, FALSE); + } + nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_3d); + ret = nouveau_pushbuf_validate(nv50->base.pushbuf); + + if (unlikely(nv50->state.flushed)) { + nv50->state.flushed = FALSE; + nv50_bufctx_fence(nv50->bufctx_3d, TRUE); + } + return !ret; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h new file mode 100644 index 00000000000..238951733cf --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h @@ -0,0 +1,78 @@ + +#ifndef __NV50_STATEOBJ_H__ +#define __NV50_STATEOBJ_H__ + +#include "pipe/p_state.h" + +#define NV50_SCISSORS_CLIPPING + +#define SB_BEGIN_3D(so, m, s) \ + (so)->state[(so)->size++] = NV50_FIFO_PKHDR(NV50_3D(m), s) + +#define SB_BEGIN_3D_(so, m, s) \ + (so)->state[(so)->size++] = NV50_FIFO_PKHDR(SUBC_3D(m), s) + +#define SB_DATA(so, u) (so)->state[(so)->size++] = (u) + +#include "nv50/nv50_stateobj_tex.h" + +struct nv50_blend_stateobj { + struct pipe_blend_state pipe; + int size; + uint32_t state[84]; // TODO: allocate less if !independent_blend_enable +}; + +struct nv50_rasterizer_stateobj { + struct pipe_rasterizer_state pipe; + int size; + uint32_t state[48]; +}; + +struct nv50_zsa_stateobj { + struct pipe_depth_stencil_alpha_state pipe; + int size; + uint32_t state[29]; +}; + +struct nv50_constbuf { + union { + struct pipe_resource *buf; + const uint8_t *data; + } u; + uint32_t size; /* max 65536 */ + uint32_t offset; + boolean user; /* should only be TRUE if u.data is valid and non-NULL */ +}; + +struct nv50_vertex_element { + struct pipe_vertex_element pipe; + uint32_t state; +}; + +struct nv50_vertex_stateobj { + uint32_t min_instance_div[PIPE_MAX_ATTRIBS]; + uint16_t vb_access_size[PIPE_MAX_ATTRIBS]; + struct translate *translate; + unsigned num_elements; + uint32_t instance_elts; + uint32_t instance_bufs; + boolean need_conversion; + unsigned vertex_size; + unsigned packet_vertex_limit; + struct nv50_vertex_element element[0]; +}; + +struct nv50_so_target { + struct pipe_stream_output_target pipe; + struct pipe_query *pq; + unsigned stride; + boolean clean; +}; + +static INLINE struct nv50_so_target * +nv50_so_target(struct pipe_stream_output_target *ptarg) +{ + return (struct nv50_so_target *)ptarg; +} + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h new file mode 100644 index 00000000000..99548cbdb42 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h @@ -0,0 +1,34 @@ + +#ifndef __NV50_STATEOBJ_TEX_H__ +#define __NV50_STATEOBJ_TEX_H__ + +#include "pipe/p_state.h" + +struct nv50_tsc_entry { + int id; + uint32_t tsc[8]; +}; + +static INLINE struct nv50_tsc_entry * +nv50_tsc_entry(void *hwcso) +{ + return (struct nv50_tsc_entry *)hwcso; +} + +struct nv50_tic_entry { + struct pipe_sampler_view pipe; + int id; + uint32_t tic[8]; +}; + +static INLINE struct nv50_tic_entry * +nv50_tic_entry(struct pipe_sampler_view *view) +{ + return (struct nv50_tic_entry *)view; +} + +extern void * +nv50_sampler_state_create(struct pipe_context *, + const struct pipe_sampler_state *); + +#endif /* __NV50_STATEOBJ_TEX_H__ */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c new file mode 100644 index 00000000000..dcc1fce41c5 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -0,0 +1,1353 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <stdint.h> + +#include "pipe/p_defines.h" + +#include "util/u_inlines.h" +#include "util/u_pack_color.h" +#include "util/u_format.h" +#include "util/u_surface.h" + +#include "tgsi/tgsi_ureg.h" + +#include "os/os_thread.h" + +#include "nv50/nv50_context.h" +#include "nv50/nv50_resource.h" + +#include "nv50/nv50_defs.xml.h" +#include "nv50/nv50_texture.xml.h" + +/* these are used in nv50_blit.h */ +#define NV50_ENG2D_SUPPORTED_FORMATS 0xff0843e080608409ULL +#define NV50_ENG2D_NOCONVERT_FORMATS 0x0008402000000000ULL +#define NV50_ENG2D_LUMINANCE_FORMATS 0x0008402000000000ULL +#define NV50_ENG2D_INTENSITY_FORMATS 0x0000000000000000ULL +#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000608000ULL + +#define NOUVEAU_DRIVER 0x50 +#include "nv50/nv50_blit.h" + +static INLINE uint8_t +nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal) +{ + uint8_t id = nv50_format_table[format].rt; + + /* Hardware values for color formats range from 0xc0 to 0xff, + * but the 2D engine doesn't support all of them. + */ + if ((id >= 0xc0) && (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)))) + return id; + assert(dst_src_equal); + + switch (util_format_get_blocksize(format)) { + case 1: + return NV50_SURFACE_FORMAT_R8_UNORM; + case 2: + return NV50_SURFACE_FORMAT_R16_UNORM; + case 4: + return NV50_SURFACE_FORMAT_BGRA8_UNORM; + default: + return 0; + } +} + +static int +nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst, + struct nv50_miptree *mt, unsigned level, unsigned layer, + enum pipe_format pformat, boolean dst_src_pformat_equal) +{ + struct nouveau_bo *bo = mt->base.bo; + uint32_t width, height, depth; + uint32_t format; + uint32_t mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; + uint32_t offset = mt->level[level].offset; + + format = nv50_2d_format(pformat, dst, dst_src_pformat_equal); + if (!format) { + NOUVEAU_ERR("invalid/unsupported surface format: %s\n", + util_format_name(pformat)); + return 1; + } + + width = u_minify(mt->base.base.width0, level) << mt->ms_x; + height = u_minify(mt->base.base.height0, level) << mt->ms_y; + depth = u_minify(mt->base.base.depth0, level); + + offset = mt->level[level].offset; + if (!mt->layout_3d) { + offset += mt->layer_stride * layer; + depth = 1; + layer = 0; + } else + if (!dst) { + offset += nv50_mt_zslice_offset(mt, level, layer); + layer = 0; + } + + if (!nouveau_bo_memtype(bo)) { + BEGIN_NV04(push, SUBC_2D(mthd), 2); + PUSH_DATA (push, format); + PUSH_DATA (push, 1); + BEGIN_NV04(push, SUBC_2D(mthd + 0x14), 5); + PUSH_DATA (push, mt->level[level].pitch); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + PUSH_DATAh(push, bo->offset + offset); + PUSH_DATA (push, bo->offset + offset); + } else { + BEGIN_NV04(push, SUBC_2D(mthd), 5); + PUSH_DATA (push, format); + PUSH_DATA (push, 0); + PUSH_DATA (push, mt->level[level].tile_mode); + PUSH_DATA (push, depth); + PUSH_DATA (push, layer); + BEGIN_NV04(push, SUBC_2D(mthd + 0x18), 4); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + PUSH_DATAh(push, bo->offset + offset); + PUSH_DATA (push, bo->offset + offset); + } + +#if 0 + if (dst) { + BEGIN_NV04(push, SUBC_2D(NV50_2D_CLIP_X), 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + } +#endif + return 0; +} + +static int +nv50_2d_texture_do_copy(struct nouveau_pushbuf *push, + struct nv50_miptree *dst, unsigned dst_level, + unsigned dx, unsigned dy, unsigned dz, + struct nv50_miptree *src, unsigned src_level, + unsigned sx, unsigned sy, unsigned sz, + unsigned w, unsigned h) +{ + const enum pipe_format dfmt = dst->base.base.format; + const enum pipe_format sfmt = src->base.base.format; + int ret; + boolean eqfmt = dfmt == sfmt; + + if (!PUSH_SPACE(push, 2 * 16 + 32)) + return PIPE_ERROR; + + ret = nv50_2d_texture_set(push, 1, dst, dst_level, dz, dfmt, eqfmt); + if (ret) + return ret; + + ret = nv50_2d_texture_set(push, 0, src, src_level, sz, sfmt, eqfmt); + if (ret) + return ret; + + BEGIN_NV04(push, NV50_2D(BLIT_CONTROL), 1); + PUSH_DATA (push, NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE); + BEGIN_NV04(push, NV50_2D(BLIT_DST_X), 4); + PUSH_DATA (push, dx << dst->ms_x); + PUSH_DATA (push, dy << dst->ms_y); + PUSH_DATA (push, w << dst->ms_x); + PUSH_DATA (push, h << dst->ms_y); + BEGIN_NV04(push, NV50_2D(BLIT_DU_DX_FRACT), 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_2D(BLIT_SRC_X_FRACT), 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, sx << src->ms_x); + PUSH_DATA (push, 0); + PUSH_DATA (push, sy << src->ms_y); + + return 0; +} + +static void +nv50_resource_copy_region(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) +{ + struct nv50_context *nv50 = nv50_context(pipe); + int ret; + boolean m2mf; + unsigned dst_layer = dstz, src_layer = src_box->z; + + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + nouveau_copy_buffer(&nv50->base, + nv04_resource(dst), dstx, + nv04_resource(src), src_box->x, src_box->width); + return; + } + + /* 0 and 1 are equal, only supporting 0/1, 2, 4 and 8 */ + assert((src->nr_samples | 1) == (dst->nr_samples | 1)); + + m2mf = (src->format == dst->format) || + (util_format_get_blocksizebits(src->format) == + util_format_get_blocksizebits(dst->format)); + + nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + + if (m2mf) { + struct nv50_m2mf_rect drect, srect; + unsigned i; + unsigned nx = util_format_get_nblocksx(src->format, src_box->width); + unsigned ny = util_format_get_nblocksy(src->format, src_box->height); + + nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz); + nv50_m2mf_rect_setup(&srect, src, src_level, + src_box->x, src_box->y, src_box->z); + + for (i = 0; i < src_box->depth; ++i) { + nv50_m2mf_transfer_rect(nv50, &drect, &srect, nx, ny); + + if (nv50_miptree(dst)->layout_3d) + drect.z++; + else + drect.base += nv50_miptree(dst)->layer_stride; + + if (nv50_miptree(src)->layout_3d) + srect.z++; + else + srect.base += nv50_miptree(src)->layer_stride; + } + return; + } + + assert((src->format == dst->format) || + (nv50_2d_src_format_faithful(src->format) && + nv50_2d_dst_format_faithful(dst->format))); + + BCTX_REFN(nv50->bufctx, 2D, nv04_resource(src), RD); + BCTX_REFN(nv50->bufctx, 2D, nv04_resource(dst), WR); + nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx); + nouveau_pushbuf_validate(nv50->base.pushbuf); + + for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) { + ret = nv50_2d_texture_do_copy(nv50->base.pushbuf, + nv50_miptree(dst), dst_level, + dstx, dsty, dst_layer, + nv50_miptree(src), src_level, + src_box->x, src_box->y, src_layer, + src_box->width, src_box->height); + if (ret) + break; + } + nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D); +} + +static void +nv50_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nv50_surface *sf = nv50_surface(dst); + struct nouveau_bo *bo = mt->base.bo; + unsigned z; + + BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4); + PUSH_DATAf(push, color->f[0]); + PUSH_DATAf(push, color->f[1]); + PUSH_DATAf(push, color->f[2]); + PUSH_DATAf(push, color->f[3]); + + if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0)) + return; + + PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR); + + BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5); + PUSH_DATAh(push, bo->offset + sf->offset); + PUSH_DATA (push, bo->offset + sf->offset); + PUSH_DATA (push, nv50_format_table[dst->format].rt); + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2); + if (nouveau_bo_memtype(bo)) + PUSH_DATA(push, sf->width); + else + PUSH_DATA(push, NV50_3D_RT_HORIZ_LINEAR | mt->level[0].pitch); + PUSH_DATA (push, sf->height); + BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1); + PUSH_DATA (push, 1); + + if (!nouveau_bo_memtype(bo)) { + BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 0); + } + + /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ + + BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2); + PUSH_DATA (push, (width << 16) | dstx); + PUSH_DATA (push, (height << 16) | dsty); + + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth); + for (z = 0; z < sf->depth; ++z) { + PUSH_DATA (push, 0x3c | + (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT)); + } + + nv50->dirty |= NV50_NEW_FRAMEBUFFER; +} + +static void +nv50_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nv50_surface *sf = nv50_surface(dst); + struct nouveau_bo *bo = mt->base.bo; + uint32_t mode = 0; + unsigned z; + + assert(nouveau_bo_memtype(bo)); /* ZETA cannot be linear */ + + if (clear_flags & PIPE_CLEAR_DEPTH) { + BEGIN_NV04(push, NV50_3D(CLEAR_DEPTH), 1); + PUSH_DATAf(push, depth); + mode |= NV50_3D_CLEAR_BUFFERS_Z; + } + + if (clear_flags & PIPE_CLEAR_STENCIL) { + BEGIN_NV04(push, NV50_3D(CLEAR_STENCIL), 1); + PUSH_DATA (push, stencil & 0xff); + mode |= NV50_3D_CLEAR_BUFFERS_S; + } + + if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0)) + return; + + PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR); + + BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5); + PUSH_DATAh(push, bo->offset + sf->offset); + PUSH_DATA (push, bo->offset + sf->offset); + PUSH_DATA (push, nv50_format_table[dst->format].rt); + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(ZETA_HORIZ), 3); + PUSH_DATA (push, sf->width); + PUSH_DATA (push, sf->height); + PUSH_DATA (push, (1 << 16) | 1); + + BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2); + PUSH_DATA (push, (width << 16) | dstx); + PUSH_DATA (push, (height << 16) | dsty); + + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth); + for (z = 0; z < sf->depth; ++z) { + PUSH_DATA (push, mode | + (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT)); + } + + nv50->dirty |= NV50_NEW_FRAMEBUFFER; +} + +void +nv50_clear(struct pipe_context *pipe, unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct pipe_framebuffer_state *fb = &nv50->framebuffer; + unsigned i; + uint32_t mode = 0; + + /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ + if (!nv50_state_validate(nv50, NV50_NEW_FRAMEBUFFER, 9 + (fb->nr_cbufs * 2))) + return; + + if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { + BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4); + PUSH_DATAf(push, color->f[0]); + PUSH_DATAf(push, color->f[1]); + PUSH_DATAf(push, color->f[2]); + PUSH_DATAf(push, color->f[3]); + mode = + NV50_3D_CLEAR_BUFFERS_R | NV50_3D_CLEAR_BUFFERS_G | + NV50_3D_CLEAR_BUFFERS_B | NV50_3D_CLEAR_BUFFERS_A; + } + + if (buffers & PIPE_CLEAR_DEPTH) { + BEGIN_NV04(push, NV50_3D(CLEAR_DEPTH), 1); + PUSH_DATA (push, fui(depth)); + mode |= NV50_3D_CLEAR_BUFFERS_Z; + } + + if (buffers & PIPE_CLEAR_STENCIL) { + BEGIN_NV04(push, NV50_3D(CLEAR_STENCIL), 1); + PUSH_DATA (push, stencil & 0xff); + mode |= NV50_3D_CLEAR_BUFFERS_S; + } + + BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1); + PUSH_DATA (push, mode); + + for (i = 1; i < fb->nr_cbufs; i++) { + BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1); + PUSH_DATA (push, (i << 6) | 0x3c); + } +} + + +/* =============================== BLIT CODE =================================== + */ + +struct nv50_blitter +{ + struct nv50_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES]; + struct nv50_program vp; + + struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */ + + pipe_mutex mutex; +}; + +struct nv50_blitctx +{ + struct nv50_context *nv50; + struct nv50_program *fp; + uint8_t mode; + uint16_t color_mask; + uint8_t filter; + enum pipe_texture_target target; + struct { + struct pipe_framebuffer_state fb; + struct nv50_rasterizer_stateobj *rast; + struct nv50_program *vp; + struct nv50_program *gp; + struct nv50_program *fp; + unsigned num_textures[3]; + unsigned num_samplers[3]; + struct pipe_sampler_view *texture[2]; + struct nv50_tsc_entry *sampler[2]; + uint32_t dirty; + } saved; + struct nv50_rasterizer_stateobj rast; +}; + +static void +nv50_blitter_make_vp(struct nv50_blitter *blit) +{ + static const uint32_t code[] = + { + 0x10000001, 0x0423c788, /* mov b32 o[0x00] s[0x00] */ /* HPOS.x */ + 0x10000205, 0x0423c788, /* mov b32 o[0x04] s[0x04] */ /* HPOS.y */ + 0x10000409, 0x0423c788, /* mov b32 o[0x08] s[0x08] */ /* TEXC.x */ + 0x1000060d, 0x0423c788, /* mov b32 o[0x0c] s[0x0c] */ /* TEXC.y */ + 0x10000811, 0x0423c789, /* mov b32 o[0x10] s[0x10] */ /* TEXC.z */ + }; + + blit->vp.type = PIPE_SHADER_VERTEX; + blit->vp.translated = TRUE; + blit->vp.code = (uint32_t *)code; /* const_cast */ + blit->vp.code_size = sizeof(code); + blit->vp.max_gpr = 4; + blit->vp.max_out = 5; + blit->vp.out_nr = 2; + blit->vp.out[0].mask = 0x3; + blit->vp.out[0].sn = TGSI_SEMANTIC_POSITION; + blit->vp.out[1].hw = 2; + blit->vp.out[1].mask = 0x7; + blit->vp.out[1].sn = TGSI_SEMANTIC_GENERIC; + blit->vp.out[1].si = 0; + blit->vp.vp.attrs[0] = 0x73; + blit->vp.vp.psiz = 0x40; + blit->vp.vp.edgeflag = 0x40; +} + +void * +nv50_blitter_make_fp(struct pipe_context *pipe, + unsigned mode, + enum pipe_texture_target ptarg) +{ + struct ureg_program *ureg; + struct ureg_src tc; + struct ureg_dst out; + struct ureg_dst data; + + const unsigned target = nv50_blit_get_tgsi_texture_target(ptarg); + + boolean tex_rgbaz = FALSE; + boolean tex_s = FALSE; + boolean cvt_un8 = FALSE; + + if (mode != NV50_BLIT_MODE_PASS && + mode != NV50_BLIT_MODE_Z24X8 && + mode != NV50_BLIT_MODE_X8Z24) + tex_s = TRUE; + + if (mode != NV50_BLIT_MODE_X24S8 && + mode != NV50_BLIT_MODE_S8X24 && + mode != NV50_BLIT_MODE_XS) + tex_rgbaz = TRUE; + + if (mode != NV50_BLIT_MODE_PASS && + mode != NV50_BLIT_MODE_ZS && + mode != NV50_BLIT_MODE_XS) + cvt_un8 = TRUE; + + ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!ureg) + return NULL; + + out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); + tc = ureg_DECL_fs_input( + ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_LINEAR); + + data = ureg_DECL_temporary(ureg); + + if (tex_s) { + ureg_TEX(ureg, ureg_writemask(data, TGSI_WRITEMASK_X), + target, tc, ureg_DECL_sampler(ureg, 1)); + ureg_MOV(ureg, ureg_writemask(data, TGSI_WRITEMASK_Y), + ureg_scalar(ureg_src(data), TGSI_SWIZZLE_X)); + } + if (tex_rgbaz) { + const unsigned mask = (mode == NV50_BLIT_MODE_PASS) ? + TGSI_WRITEMASK_XYZW : TGSI_WRITEMASK_X; + ureg_TEX(ureg, ureg_writemask(data, mask), + target, tc, ureg_DECL_sampler(ureg, 0)); + } + + if (cvt_un8) { + struct ureg_src mask; + struct ureg_src scale; + struct ureg_dst outz; + struct ureg_dst outs; + struct ureg_dst zdst3 = ureg_writemask(data, TGSI_WRITEMASK_XYZ); + struct ureg_dst zdst = ureg_writemask(data, TGSI_WRITEMASK_X); + struct ureg_dst sdst = ureg_writemask(data, TGSI_WRITEMASK_Y); + struct ureg_src zsrc3 = ureg_src(data); + struct ureg_src zsrc = ureg_scalar(zsrc3, TGSI_SWIZZLE_X); + struct ureg_src ssrc = ureg_scalar(zsrc3, TGSI_SWIZZLE_Y); + struct ureg_src zshuf; + + mask = ureg_imm3u(ureg, 0x0000ff, 0x00ff00, 0xff0000); + scale = ureg_imm4f(ureg, + 1.0f / 0x0000ff, 1.0f / 0x00ff00, 1.0f / 0xff0000, + (1 << 24) - 1); + + if (mode == NV50_BLIT_MODE_Z24S8 || + mode == NV50_BLIT_MODE_X24S8 || + mode == NV50_BLIT_MODE_Z24X8) { + outz = ureg_writemask(out, TGSI_WRITEMASK_XYZ); + outs = ureg_writemask(out, TGSI_WRITEMASK_W); + zshuf = ureg_src(data); + } else { + outz = ureg_writemask(out, TGSI_WRITEMASK_YZW); + outs = ureg_writemask(out, TGSI_WRITEMASK_X); + zshuf = ureg_swizzle(zsrc3, TGSI_SWIZZLE_W, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z); + } + + if (tex_s) { + ureg_I2F(ureg, sdst, ssrc); + ureg_MUL(ureg, outs, ssrc, ureg_scalar(scale, TGSI_SWIZZLE_X)); + } + + if (tex_rgbaz) { + ureg_MUL(ureg, zdst, zsrc, ureg_scalar(scale, TGSI_SWIZZLE_W)); + ureg_F2I(ureg, zdst, zsrc); + ureg_AND(ureg, zdst3, zsrc, mask); + ureg_I2F(ureg, zdst3, zsrc3); + ureg_MUL(ureg, zdst3, zsrc3, scale); + ureg_MOV(ureg, outz, zshuf); + } + } else { + unsigned mask = TGSI_WRITEMASK_XYZW; + + if (mode != NV50_BLIT_MODE_PASS) { + mask &= ~TGSI_WRITEMASK_ZW; + if (!tex_s) + mask = TGSI_WRITEMASK_X; + if (!tex_rgbaz) + mask = TGSI_WRITEMASK_Y; + } + ureg_MOV(ureg, ureg_writemask(out, mask), ureg_src(data)); + } + ureg_END(ureg); + + return ureg_create_shader_and_destroy(ureg, pipe); +} + +static void +nv50_blitter_make_sampler(struct nv50_blitter *blit) +{ + /* clamp to edge, min/max lod = 0, nearest filtering */ + + blit->sampler[0].id = -1; + + blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED | + (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) | + (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) | + (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT); + blit->sampler[0].tsc[1] = + NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE; + + /* clamp to edge, min/max lod = 0, bilinear filtering */ + + blit->sampler[1].id = -1; + + blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0]; + blit->sampler[1].tsc[1] = + NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE; +} + +unsigned +nv50_blit_select_mode(const struct pipe_blit_info *info) +{ + const unsigned mask = info->mask; + + switch (info->dst.resource->format) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_Z24X8_UNORM: + switch (mask & PIPE_MASK_ZS) { + case PIPE_MASK_ZS: return NV50_BLIT_MODE_Z24S8; + case PIPE_MASK_Z: return NV50_BLIT_MODE_Z24X8; + default: + return NV50_BLIT_MODE_X24S8; + } + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + switch (mask & PIPE_MASK_ZS) { + case PIPE_MASK_ZS: return NV50_BLIT_MODE_S8Z24; + case PIPE_MASK_Z: return NV50_BLIT_MODE_X8Z24; + default: + return NV50_BLIT_MODE_S8X24; + } + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + switch (mask & PIPE_MASK_ZS) { + case PIPE_MASK_ZS: return NV50_BLIT_MODE_ZS; + case PIPE_MASK_Z: return NV50_BLIT_MODE_PASS; + default: + return NV50_BLIT_MODE_XS; + } + default: + return NV50_BLIT_MODE_PASS; + } +} + +static void +nv50_blit_select_fp(struct nv50_blitctx *ctx, const struct pipe_blit_info *info) +{ + struct nv50_blitter *blitter = ctx->nv50->screen->blitter; + + const enum pipe_texture_target ptarg = + nv50_blit_reinterpret_pipe_texture_target(info->src.resource->target); + + const unsigned targ = nv50_blit_texture_type(ptarg); + const unsigned mode = ctx->mode; + + if (!blitter->fp[targ][mode]) { + pipe_mutex_lock(blitter->mutex); + if (!blitter->fp[targ][mode]) + blitter->fp[targ][mode] = + nv50_blitter_make_fp(&ctx->nv50->base.pipe, mode, ptarg); + pipe_mutex_unlock(blitter->mutex); + } + ctx->fp = blitter->fp[targ][mode]; +} + +static void +nv50_blit_set_dst(struct nv50_blitctx *ctx, + struct pipe_resource *res, unsigned level, unsigned layer, + enum pipe_format format) +{ + struct nv50_context *nv50 = ctx->nv50; + struct pipe_context *pipe = &nv50->base.pipe; + struct pipe_surface templ; + + if (util_format_is_depth_or_stencil(format)) + templ.format = nv50_blit_zeta_to_colour_format(format); + else + templ.format = format; + + templ.u.tex.level = level; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + + if (layer == -1) { + templ.u.tex.first_layer = 0; + templ.u.tex.last_layer = + (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1; + } + + nv50->framebuffer.cbufs[0] = nv50_miptree_surface_new(pipe, res, &templ); + nv50->framebuffer.nr_cbufs = 1; + nv50->framebuffer.zsbuf = NULL; + nv50->framebuffer.width = nv50->framebuffer.cbufs[0]->width; + nv50->framebuffer.height = nv50->framebuffer.cbufs[0]->height; +} + +static void +nv50_blit_set_src(struct nv50_blitctx *blit, + struct pipe_resource *res, unsigned level, unsigned layer, + enum pipe_format format, const uint8_t filter) +{ + struct nv50_context *nv50 = blit->nv50; + struct pipe_context *pipe = &nv50->base.pipe; + struct pipe_sampler_view templ; + uint32_t flags; + enum pipe_texture_target target; + + target = nv50_blit_reinterpret_pipe_texture_target(res->target); + + templ.format = format; + templ.u.tex.first_level = templ.u.tex.last_level = level; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + templ.swizzle_r = PIPE_SWIZZLE_RED; + templ.swizzle_g = PIPE_SWIZZLE_GREEN; + templ.swizzle_b = PIPE_SWIZZLE_BLUE; + templ.swizzle_a = PIPE_SWIZZLE_ALPHA; + + if (layer == -1) { + templ.u.tex.first_layer = 0; + templ.u.tex.last_layer = + (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1; + } + + flags = res->last_level ? 0 : NV50_TEXVIEW_SCALED_COORDS; + flags |= NV50_TEXVIEW_ACCESS_RESOLVE; + if (filter && res->nr_samples == 8) + flags |= NV50_TEXVIEW_FILTER_MSAA8; + + nv50->textures[2][0] = nv50_create_texture_view( + pipe, res, &templ, flags, target); + nv50->textures[2][1] = NULL; + + nv50->num_textures[0] = nv50->num_textures[1] = 0; + nv50->num_textures[2] = 1; + + templ.format = nv50_zs_to_s_format(format); + if (templ.format != res->format) { + nv50->textures[2][1] = nv50_create_texture_view( + pipe, res, &templ, flags, target); + nv50->num_textures[2] = 2; + } +} + +static void +nv50_blitctx_prepare_state(struct nv50_blitctx *blit) +{ + struct nouveau_pushbuf *push = blit->nv50->base.pushbuf; + + if (blit->nv50->cond_query) { + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + } + + /* blend state */ + BEGIN_NV04(push, NV50_3D(COLOR_MASK(0)), 1); + PUSH_DATA (push, blit->color_mask); + BEGIN_NV04(push, NV50_3D(BLEND_ENABLE(0)), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(LOGIC_OP_ENABLE), 1); + PUSH_DATA (push, 0); + + /* rasterizer state */ +#ifndef NV50_SCISSORS_CLIPPING + BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 1); + PUSH_DATA (push, 1); +#endif + BEGIN_NV04(push, NV50_3D(VERTEX_TWO_SIDE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(MSAA_MASK(0)), 4); + PUSH_DATA (push, 0xffff); + PUSH_DATA (push, 0xffff); + PUSH_DATA (push, 0xffff); + PUSH_DATA (push, 0xffff); + BEGIN_NV04(push, NV50_3D(POLYGON_MODE_FRONT), 3); + PUSH_DATA (push, NV50_3D_POLYGON_MODE_FRONT_FILL); + PUSH_DATA (push, NV50_3D_POLYGON_MODE_BACK_FILL); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(CULL_FACE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(POLYGON_STIPPLE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(POLYGON_OFFSET_FILL_ENABLE), 1); + PUSH_DATA (push, 0); + + /* zsa state */ + BEGIN_NV04(push, NV50_3D(DEPTH_TEST_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(STENCIL_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(ALPHA_TEST_ENABLE), 1); + PUSH_DATA (push, 0); +} + +static void +nv50_blitctx_pre_blit(struct nv50_blitctx *ctx) +{ + struct nv50_context *nv50 = ctx->nv50; + struct nv50_blitter *blitter = nv50->screen->blitter; + int s; + + ctx->saved.fb.width = nv50->framebuffer.width; + ctx->saved.fb.height = nv50->framebuffer.height; + ctx->saved.fb.nr_cbufs = nv50->framebuffer.nr_cbufs; + ctx->saved.fb.cbufs[0] = nv50->framebuffer.cbufs[0]; + ctx->saved.fb.zsbuf = nv50->framebuffer.zsbuf; + + ctx->saved.rast = nv50->rast; + + ctx->saved.vp = nv50->vertprog; + ctx->saved.gp = nv50->gmtyprog; + ctx->saved.fp = nv50->fragprog; + + nv50->rast = &ctx->rast; + + nv50->vertprog = &blitter->vp; + nv50->gmtyprog = NULL; + nv50->fragprog = ctx->fp; + + for (s = 0; s < 3; ++s) { + ctx->saved.num_textures[s] = nv50->num_textures[s]; + ctx->saved.num_samplers[s] = nv50->num_samplers[s]; + } + ctx->saved.texture[0] = nv50->textures[2][0]; + ctx->saved.texture[1] = nv50->textures[2][1]; + ctx->saved.sampler[0] = nv50->samplers[2][0]; + ctx->saved.sampler[1] = nv50->samplers[2][1]; + + nv50->samplers[2][0] = &blitter->sampler[ctx->filter]; + nv50->samplers[2][1] = &blitter->sampler[ctx->filter]; + + nv50->num_samplers[0] = nv50->num_samplers[1] = 0; + nv50->num_samplers[2] = 2; + + ctx->saved.dirty = nv50->dirty; + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + + nv50->dirty = + NV50_NEW_FRAMEBUFFER | + NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG | + NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS; +} + +static void +nv50_blitctx_post_blit(struct nv50_blitctx *blit) +{ + struct nv50_context *nv50 = blit->nv50; + int s; + + pipe_surface_reference(&nv50->framebuffer.cbufs[0], NULL); + + nv50->framebuffer.width = blit->saved.fb.width; + nv50->framebuffer.height = blit->saved.fb.height; + nv50->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs; + nv50->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0]; + nv50->framebuffer.zsbuf = blit->saved.fb.zsbuf; + + nv50->rast = blit->saved.rast; + + nv50->vertprog = blit->saved.vp; + nv50->gmtyprog = blit->saved.gp; + nv50->fragprog = blit->saved.fp; + + pipe_sampler_view_reference(&nv50->textures[2][0], NULL); + pipe_sampler_view_reference(&nv50->textures[2][1], NULL); + + for (s = 0; s < 3; ++s) { + nv50->num_textures[s] = blit->saved.num_textures[s]; + nv50->num_samplers[s] = blit->saved.num_samplers[s]; + } + nv50->textures[2][0] = blit->saved.texture[0]; + nv50->textures[2][1] = blit->saved.texture[1]; + nv50->samplers[2][0] = blit->saved.sampler[0]; + nv50->samplers[2][1] = blit->saved.sampler[1]; + + if (nv50->cond_query) + nv50->base.pipe.render_condition(&nv50->base.pipe, nv50->cond_query, + nv50->cond_cond, nv50->cond_mode); + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + + nv50->dirty = blit->saved.dirty | + (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK | + NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND | + NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG); +} + + +static void +nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info) +{ + struct nv50_blitctx *blit = nv50->blit; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct pipe_resource *src = info->src.resource; + struct pipe_resource *dst = info->dst.resource; + int32_t minx, maxx, miny, maxy; + int32_t i; + float x0, x1, y0, y1, z; + float dz; + float x_range, y_range; + + blit->mode = nv50_blit_select_mode(info); + blit->color_mask = nv50_blit_derive_color_mask(info); + blit->filter = nv50_blit_get_filter(info); + + nv50_blit_select_fp(blit, info); + nv50_blitctx_pre_blit(blit); + + nv50_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format); + nv50_blit_set_src(blit, src, info->src.level, -1, info->src.format, + blit->filter); + + nv50_blitctx_prepare_state(blit); + + nv50_state_validate(nv50, ~0, 36); + + x_range = (float)info->src.box.width / (float)info->dst.box.width; + y_range = (float)info->src.box.height / (float)info->dst.box.height; + + x0 = (float)info->src.box.x - x_range * (float)info->dst.box.x; + y0 = (float)info->src.box.y - y_range * (float)info->dst.box.y; + + x1 = x0 + 16384.0f * x_range; + y1 = y0 + 16384.0f * y_range; + + x0 *= (float)(1 << nv50_miptree(src)->ms_x); + x1 *= (float)(1 << nv50_miptree(src)->ms_x); + y0 *= (float)(1 << nv50_miptree(src)->ms_y); + y1 *= (float)(1 << nv50_miptree(src)->ms_y); + + if (src->last_level > 0) { + /* If there are mip maps, GPU always assumes normalized coordinates. */ + const unsigned l = info->src.level; + const float fh = u_minify(src->width0 << nv50_miptree(src)->ms_x, l); + const float fv = u_minify(src->height0 << nv50_miptree(src)->ms_y, l); + x0 /= fh; + x1 /= fh; + y0 /= fv; + y1 /= fv; + } + + /* XXX: multiply by 6 for cube arrays ? */ + dz = (float)info->src.box.depth / (float)info->dst.box.depth; + z = (float)info->src.box.z; + if (nv50_miptree(src)->layout_3d) + z += 0.5f * dz; + + BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1); + PUSH_DATA (push, 0x1); + + /* Draw a large triangle in screen coordinates covering the whole + * render target, with scissors defining the destination region. + * The vertex is supplied with non-normalized texture coordinates + * arranged in a way to yield the desired offset and scale. + */ + + minx = info->dst.box.x; + maxx = info->dst.box.x + info->dst.box.width; + miny = info->dst.box.y; + maxy = info->dst.box.y + info->dst.box.height; + if (info->scissor_enable) { + minx = MAX2(minx, info->scissor.minx); + maxx = MIN2(maxx, info->scissor.maxx); + miny = MAX2(miny, info->scissor.miny); + maxy = MIN2(maxy, info->scissor.maxy); + } + BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2); + PUSH_DATA (push, (maxx << 16) | minx); + PUSH_DATA (push, (maxy << 16) | miny); + + for (i = 0; i < info->dst.box.depth; ++i, z += dz) { + if (info->dst.box.z + i) { + BEGIN_NV04(push, NV50_3D(LAYER), 1); + PUSH_DATA (push, info->dst.box.z + i); + } + PUSH_SPACE(push, 32); + BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); + BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3); + PUSH_DATAf(push, x0); + PUSH_DATAf(push, y0); + PUSH_DATAf(push, z); + BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3); + PUSH_DATAf(push, x1); + PUSH_DATAf(push, y0); + PUSH_DATAf(push, z); + BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2); + PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_x); + PUSH_DATAf(push, 0.0f); + BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3); + PUSH_DATAf(push, x0); + PUSH_DATAf(push, y1); + PUSH_DATAf(push, z); + BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_y); + BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (push, 0); + } + if (info->dst.box.z + info->dst.box.depth - 1) { + BEGIN_NV04(push, NV50_3D(LAYER), 1); + PUSH_DATA (push, 0); + } + + /* re-enable normally constant state */ + + BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1); + PUSH_DATA (push, 1); + + nv50_blitctx_post_blit(blit); +} + +static void +nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_miptree *dst = nv50_miptree(info->dst.resource); + struct nv50_miptree *src = nv50_miptree(info->src.resource); + const int32_t srcx_adj = info->src.box.width < 0 ? -1 : 0; + const int32_t srcy_adj = info->src.box.height < 0 ? -1 : 0; + const int32_t dz = info->dst.box.z; + const int32_t sz = info->src.box.z; + uint32_t dstw, dsth; + int32_t dstx, dsty; + int64_t srcx, srcy; + int64_t du_dx, dv_dy; + int i; + uint32_t mode; + uint32_t mask = nv50_blit_eng2d_get_mask(info); + boolean b; + + mode = nv50_blit_get_filter(info) ? + NV50_2D_BLIT_CONTROL_FILTER_BILINEAR : + NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE; + mode |= (src->base.base.nr_samples > dst->base.base.nr_samples) ? + NV50_2D_BLIT_CONTROL_ORIGIN_CORNER : NV50_2D_BLIT_CONTROL_ORIGIN_CENTER; + + du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width; + dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height; + + b = info->dst.format == info->src.format; + nv50_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b); + nv50_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b); + + if (info->scissor_enable) { + BEGIN_NV04(push, NV50_2D(CLIP_X), 5); + PUSH_DATA (push, info->scissor.minx << dst->ms_x); + PUSH_DATA (push, info->scissor.miny << dst->ms_y); + PUSH_DATA (push, (info->scissor.maxx - info->scissor.minx) << dst->ms_x); + PUSH_DATA (push, (info->scissor.maxy - info->scissor.miny) << dst->ms_y); + PUSH_DATA (push, 1); /* enable */ + } + + if (mask != 0xffffffff) { + BEGIN_NV04(push, NV50_2D(ROP), 1); + PUSH_DATA (push, 0xca); /* DPSDxax */ + BEGIN_NV04(push, NV50_2D(PATTERN_COLOR_FORMAT), 1); + PUSH_DATA (push, NV50_2D_PATTERN_COLOR_FORMAT_32BPP); + BEGIN_NV04(push, NV50_2D(PATTERN_COLOR(0)), 4); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, mask); + PUSH_DATA (push, 0xffffffff); + PUSH_DATA (push, 0xffffffff); + BEGIN_NV04(push, NV50_2D(OPERATION), 1); + PUSH_DATA (push, NV50_2D_OPERATION_ROP); + } else + if (info->src.format != info->dst.format) { + if (info->src.format == PIPE_FORMAT_R8_UNORM || + info->src.format == PIPE_FORMAT_R16_UNORM || + info->src.format == PIPE_FORMAT_R16_FLOAT || + info->src.format == PIPE_FORMAT_R32_FLOAT) { + mask = 0xffff0000; /* also makes condition for OPERATION reset true */ + BEGIN_NV04(push, NV50_2D(BETA4), 2); + PUSH_DATA (push, mask); + PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY_PREMULT); + } + } + + if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) { + /* ms_x is always >= ms_y */ + du_dx <<= src->ms_x - dst->ms_x; + dv_dy <<= src->ms_y - dst->ms_y; + } else { + du_dx >>= dst->ms_x - src->ms_x; + dv_dy >>= dst->ms_y - src->ms_y; + } + + srcx = (int64_t)(info->src.box.x + srcx_adj) << (src->ms_x + 32); + srcy = (int64_t)(info->src.box.y + srcy_adj) << (src->ms_y + 32); + + if (src->base.base.nr_samples > dst->base.base.nr_samples) { + /* center src coorinates for proper MS resolve filtering */ + srcx += (int64_t)src->ms_x << 32; + srcy += (int64_t)src->ms_y << 32; + } + + dstx = info->dst.box.x << dst->ms_x; + dsty = info->dst.box.y << dst->ms_y; + + dstw = info->dst.box.width << dst->ms_x; + dsth = info->dst.box.height << dst->ms_y; + + if (dstx < 0) { + dstw += dstx; + srcx -= du_dx * dstx; + dstx = 0; + } + if (dsty < 0) { + dsth += dsty; + srcy -= dv_dy * dsty; + dsty = 0; + } + + BEGIN_NV04(push, NV50_2D(BLIT_CONTROL), 1); + PUSH_DATA (push, mode); + BEGIN_NV04(push, NV50_2D(BLIT_DST_X), 4); + PUSH_DATA (push, dstx); + PUSH_DATA (push, dsty); + PUSH_DATA (push, dstw); + PUSH_DATA (push, dsth); + BEGIN_NV04(push, NV50_2D(BLIT_DU_DX_FRACT), 4); + PUSH_DATA (push, du_dx); + PUSH_DATA (push, du_dx >> 32); + PUSH_DATA (push, dv_dy); + PUSH_DATA (push, dv_dy >> 32); + + BCTX_REFN(nv50->bufctx, 2D, &dst->base, WR); + BCTX_REFN(nv50->bufctx, 2D, &src->base, RD); + nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx); + if (nouveau_pushbuf_validate(nv50->base.pushbuf)) + return; + + for (i = 0; i < info->dst.box.depth; ++i) { + if (i > 0) { + /* no scaling in z-direction possible for eng2d blits */ + if (dst->layout_3d) { + BEGIN_NV04(push, NV50_2D(DST_LAYER), 1); + PUSH_DATA (push, info->dst.box.z + i); + } else { + const unsigned z = info->dst.box.z + i; + BEGIN_NV04(push, NV50_2D(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, dst->base.address + z * dst->layer_stride); + PUSH_DATA (push, dst->base.address + z * dst->layer_stride); + } + if (src->layout_3d) { + /* not possible because of depth tiling */ + assert(0); + } else { + const unsigned z = info->src.box.z + i; + BEGIN_NV04(push, NV50_2D(SRC_ADDRESS_HIGH), 2); + PUSH_DATAh(push, src->base.address + z * src->layer_stride); + PUSH_DATA (push, src->base.address + z * src->layer_stride); + } + BEGIN_NV04(push, NV50_2D(BLIT_SRC_Y_INT), 1); /* trigger */ + PUSH_DATA (push, srcy >> 32); + } else { + BEGIN_NV04(push, NV50_2D(BLIT_SRC_X_FRACT), 4); + PUSH_DATA (push, srcx); + PUSH_DATA (push, srcx >> 32); + PUSH_DATA (push, srcy); + PUSH_DATA (push, srcy >> 32); + } + } + nv50_bufctx_fence(nv50->bufctx, FALSE); + + nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D); + + if (info->scissor_enable) { + BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1); + PUSH_DATA (push, 0); + } + if (mask != 0xffffffff) { + BEGIN_NV04(push, NV50_2D(OPERATION), 1); + PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY); + } +} + +static void +nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) +{ + struct nv50_context *nv50 = nv50_context(pipe); + boolean eng3d = FALSE; + + if (util_format_is_depth_or_stencil(info->dst.resource->format)) { + if (!(info->mask & PIPE_MASK_ZS)) + return; + if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT || + info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + eng3d = TRUE; + if (info->filter != PIPE_TEX_FILTER_NEAREST) + eng3d = TRUE; + } else { + if (!(info->mask & PIPE_MASK_RGBA)) + return; + if (info->mask != PIPE_MASK_RGBA) + eng3d = TRUE; + } + + if (nv50_miptree(info->src.resource)->layout_3d) { + eng3d = TRUE; + } else + if (info->src.box.depth != info->dst.box.depth) { + eng3d = TRUE; + debug_printf("blit: cannot filter array or cube textures in z direction"); + } + + if (!eng3d && info->dst.format != info->src.format) { + if (!nv50_2d_dst_format_faithful(info->dst.format) || + !nv50_2d_src_format_faithful(info->src.format)) { + eng3d = TRUE; + } else + if (!nv50_2d_src_format_faithful(info->src.format)) { + if (!util_format_is_luminance(info->src.format)) { + if (util_format_is_intensity(info->src.format)) + eng3d = TRUE; + else + if (!nv50_2d_dst_format_ops_supported(info->dst.format)) + eng3d = TRUE; + else + eng3d = !nv50_2d_format_supported(info->src.format); + } + } else + if (util_format_is_luminance_alpha(info->src.format)) + eng3d = TRUE; + } + + if (info->src.resource->nr_samples == 8 && + info->dst.resource->nr_samples <= 1) + eng3d = TRUE; + + /* FIXME: can't make this work with eng2d anymore */ + if (info->src.resource->nr_samples > 1 || + info->dst.resource->nr_samples > 1) + eng3d = TRUE; + + /* FIXME: find correct src coordinate adjustments */ + if ((info->src.box.width != info->dst.box.width && + info->src.box.width != -info->dst.box.width) || + (info->src.box.height != info->dst.box.height && + info->src.box.height != -info->dst.box.height)) + eng3d = TRUE; + + if (!eng3d) + nv50_blit_eng2d(nv50, info); + else + nv50_blit_3d(nv50, info); +} + +boolean +nv50_blitter_create(struct nv50_screen *screen) +{ + screen->blitter = CALLOC_STRUCT(nv50_blitter); + if (!screen->blitter) { + NOUVEAU_ERR("failed to allocate blitter struct\n"); + return FALSE; + } + + pipe_mutex_init(screen->blitter->mutex); + + nv50_blitter_make_vp(screen->blitter); + nv50_blitter_make_sampler(screen->blitter); + + return TRUE; +} + +void +nv50_blitter_destroy(struct nv50_screen *screen) +{ + struct nv50_blitter *blitter = screen->blitter; + unsigned i, m; + + for (i = 0; i < NV50_BLIT_MAX_TEXTURE_TYPES; ++i) { + for (m = 0; m < NV50_BLIT_MODES; ++m) { + struct nv50_program *prog = blitter->fp[i][m]; + if (prog) { + nv50_program_destroy(NULL, prog); + FREE((void *)prog->pipe.tokens); + FREE(prog); + } + } + } + + FREE(blitter); +} + +boolean +nv50_blitctx_create(struct nv50_context *nv50) +{ + nv50->blit = CALLOC_STRUCT(nv50_blitctx); + if (!nv50->blit) { + NOUVEAU_ERR("failed to allocate blit context\n"); + return FALSE; + } + + nv50->blit->nv50 = nv50; + + nv50->blit->rast.pipe.half_pixel_center = 1; + + return TRUE; +} + +void +nv50_init_surface_functions(struct nv50_context *nv50) +{ + struct pipe_context *pipe = &nv50->base.pipe; + + pipe->resource_copy_region = nv50_resource_copy_region; + pipe->blit = nv50_blit; + pipe->clear_render_target = nv50_clear_render_target; + pipe->clear_depth_stencil = nv50_clear_depth_stencil; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c new file mode 100644 index 00000000000..9e512928381 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c @@ -0,0 +1,352 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv50_context.h" +#include "nv50/nv50_resource.h" +#include "nv50/nv50_texture.xml.h" +#include "nv50/nv50_defs.xml.h" + +#include "util/u_format.h" + +#define NV50_TIC_0_SWIZZLE__MASK \ + (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \ + NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK) + +static INLINE uint32_t +nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int) +{ + switch (swz) { + case PIPE_SWIZZLE_RED: + return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT; + case PIPE_SWIZZLE_GREEN: + return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT; + case PIPE_SWIZZLE_BLUE: + return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT; + case PIPE_SWIZZLE_ALPHA: + return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT; + case PIPE_SWIZZLE_ONE: + return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT; + case PIPE_SWIZZLE_ZERO: + default: + return NV50_TIC_MAP_ZERO; + } +} + +struct pipe_sampler_view * +nv50_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *res, + const struct pipe_sampler_view *templ) +{ + uint32_t flags = 0; + + if (res->target == PIPE_TEXTURE_RECT || res->target == PIPE_BUFFER) + flags |= NV50_TEXVIEW_SCALED_COORDS; + + return nv50_create_texture_view(pipe, res, templ, flags, res->target); +} + +struct pipe_sampler_view * +nv50_create_texture_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ, + uint32_t flags, + enum pipe_texture_target target) +{ + const struct util_format_description *desc; + uint64_t addr; + uint32_t *tic; + uint32_t swz[4]; + uint32_t depth; + struct nv50_tic_entry *view; + struct nv50_miptree *mt = nv50_miptree(texture); + boolean tex_int; + + view = MALLOC_STRUCT(nv50_tic_entry); + if (!view) + return NULL; + + view->pipe = *templ; + view->pipe.reference.count = 1; + view->pipe.texture = NULL; + view->pipe.context = pipe; + + view->id = -1; + + pipe_resource_reference(&view->pipe.texture, texture); + + tic = &view->tic[0]; + + desc = util_format_description(view->pipe.format); + + /* TIC[0] */ + + tic[0] = nv50_format_table[view->pipe.format].tic; + + tex_int = util_format_is_pure_integer(view->pipe.format); + + swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int); + swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int); + swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int); + swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int); + tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | + (swz[0] << NV50_TIC_0_MAPR__SHIFT) | + (swz[1] << NV50_TIC_0_MAPG__SHIFT) | + (swz[2] << NV50_TIC_0_MAPB__SHIFT) | + (swz[3] << NV50_TIC_0_MAPA__SHIFT); + + addr = mt->base.address; + + if (mt->base.base.target == PIPE_TEXTURE_1D_ARRAY || + mt->base.base.target == PIPE_TEXTURE_2D_ARRAY) { + addr += view->pipe.u.tex.first_layer * mt->layer_stride; + depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1; + } else { + depth = mt->base.base.depth0; + } + + tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER; + + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; + + if (!(flags & NV50_TEXVIEW_SCALED_COORDS)) + tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; + + if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) { + if (target == PIPE_BUFFER) { + addr += view->pipe.u.buf.first_element * desc->block.bits / 8; + tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER; + tic[3] = 0; + tic[4] = /* width */ + view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1; + tic[5] = 0; + } else { + tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT; + tic[3] = mt->level[0].pitch; + tic[4] = mt->base.base.width0; + tic[5] = (1 << 16) | mt->base.base.height0; + } + tic[6] = + tic[7] = 0; + tic[1] = addr; + tic[2] |= addr >> 32; + return &view->pipe; + } + + tic[1] = addr; + tic[2] |= (addr >> 32) & 0xff; + + tic[2] |= + ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) | + ((mt->level[0].tile_mode & 0xf00) << (25 - 8)); + + switch (target) { + case PIPE_TEXTURE_1D: + tic[2] |= NV50_TIC_2_TARGET_1D; + break; + case PIPE_TEXTURE_2D: + tic[2] |= NV50_TIC_2_TARGET_2D; + break; + case PIPE_TEXTURE_RECT: + tic[2] |= NV50_TIC_2_TARGET_RECT; + break; + case PIPE_TEXTURE_3D: + tic[2] |= NV50_TIC_2_TARGET_3D; + break; + case PIPE_TEXTURE_CUBE: + depth /= 6; + tic[2] |= NV50_TIC_2_TARGET_CUBE; + break; + case PIPE_TEXTURE_1D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; + break; + case PIPE_TEXTURE_2D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; + break; + case PIPE_TEXTURE_CUBE_ARRAY: + depth /= 6; + tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; + break; + case PIPE_BUFFER: + assert(0); /* should be linear and handled above ! */ + tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR; + break; + default: + NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target); + return FALSE; + } + + tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000; + + tic[4] = (1 << 31) | (mt->base.base.width0 << mt->ms_x); + + tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff; + tic[5] |= depth << 16; + tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT; + + tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */ + + tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; + + if (unlikely(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS))) + if (mt->base.base.last_level) + tic[5] &= ~NV50_TIC_5_LAST_LEVEL__MASK; + + return &view->pipe; +} + +static boolean +nv50_validate_tic(struct nv50_context *nv50, int s) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nouveau_bo *txc = nv50->screen->txc; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nv50->num_textures[s]; ++i) { + struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]); + struct nv04_resource *res; + + if (!tic) { + BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); + PUSH_DATA (push, (i << 1) | 0); + continue; + } + res = &nv50_miptree(tic->pipe.texture)->base; + + if (tic->id < 0) { + tic->id = nv50_screen_tic_alloc(nv50->screen, tic); + + BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); + PUSH_DATA (push, 262144); + PUSH_DATA (push, 65536); + PUSH_DATA (push, 1); + PUSH_DATAh(push, txc->offset); + PUSH_DATA (push, txc->offset); + BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); + PUSH_DATA (push, 32); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, tic->id * 32); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NI04(push, NV50_2D(SIFC_DATA), 8); + PUSH_DATAp(push, &tic->tic[0], 8); + + need_flush = TRUE; + } else + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1); + PUSH_DATA (push, 0x20); + } + + nv50->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + + res->status &= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + BCTX_REFN(nv50->bufctx_3d, TEXTURES, res, RD); + + BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); + PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1); + } + for (; i < nv50->state.num_textures[s]; ++i) { + BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); + PUSH_DATA (push, (i << 1) | 0); + } + nv50->state.num_textures[s] = nv50->num_textures[s]; + + return need_flush; +} + +void nv50_validate_textures(struct nv50_context *nv50) +{ + boolean need_flush; + + need_flush = nv50_validate_tic(nv50, 0); + need_flush |= nv50_validate_tic(nv50, 2); + + if (need_flush) { + BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TIC_FLUSH), 1); + PUSH_DATA (nv50->base.pushbuf, 0); + } +} + +static boolean +nv50_validate_tsc(struct nv50_context *nv50, int s) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nv50->num_samplers[s]; ++i) { + struct nv50_tsc_entry *tsc = nv50_tsc_entry(nv50->samplers[s][i]); + + if (!tsc) { + BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); + PUSH_DATA (push, (i << 4) | 0); + continue; + } + if (tsc->id < 0) { + tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc); + + nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc, + 65536 + tsc->id * 32, + NOUVEAU_BO_VRAM, 32, tsc->tsc); + need_flush = TRUE; + } + nv50->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); + + BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); + PUSH_DATA (push, (tsc->id << 12) | (i << 4) | 1); + } + for (; i < nv50->state.num_samplers[s]; ++i) { + BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); + PUSH_DATA (push, (i << 4) | 0); + } + nv50->state.num_samplers[s] = nv50->num_samplers[s]; + + return need_flush; +} + +void nv50_validate_samplers(struct nv50_context *nv50) +{ + boolean need_flush; + + need_flush = nv50_validate_tsc(nv50, 0); + need_flush |= nv50_validate_tsc(nv50, 2); + + if (need_flush) { + BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TSC_FLUSH), 1); + PUSH_DATA (nv50->base.pushbuf, 0); + } +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h new file mode 100644 index 00000000000..31eab9b5d87 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h @@ -0,0 +1,306 @@ +#ifndef NV50_TEXTURE_XML +#define NV50_TEXTURE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- rnndb/nv50_texture.xml ( 8648 bytes, from 2013-04-13 12:49:11) +- rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) +- rnndb/nvchipsets.xml ( 3954 bytes, from 2013-03-26 01:26:43) +- rnndb/nv50_defs.xml ( 16652 bytes, from 2013-04-04 10:57:15) + +Copyright (C) 2006-2013 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_TIC_MAP_ZERO 0x00000000 +#define NV50_TIC_MAP_C0 0x00000002 +#define NV50_TIC_MAP_C1 0x00000003 +#define NV50_TIC_MAP_C2 0x00000004 +#define NV50_TIC_MAP_C3 0x00000005 +#define NV50_TIC_MAP_ONE_INT 0x00000006 +#define NV50_TIC_MAP_ONE_FLOAT 0x00000007 +#define NV50_TIC_TYPE_SNORM 0x00000001 +#define NV50_TIC_TYPE_UNORM 0x00000002 +#define NV50_TIC_TYPE_SINT 0x00000003 +#define NV50_TIC_TYPE_UINT 0x00000004 +#define NV50_TIC_TYPE_SSCALED 0x00000005 +#define NV50_TIC_TYPE_USCALED 0x00000006 +#define NV50_TIC_TYPE_FLOAT 0x00000007 +#define NV50_TSC_WRAP_REPEAT 0x00000000 +#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001 +#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002 +#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003 +#define NV50_TSC_WRAP_CLAMP 0x00000004 +#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005 +#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006 +#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007 +#define NV50_TIC__SIZE 0x00000020 +#define NV50_TIC_0 0x00000000 +#define NV50_TIC_0_MAPA__MASK 0x38000000 +#define NV50_TIC_0_MAPA__SHIFT 27 +#define NV50_TIC_0_MAPB__MASK 0x07000000 +#define NV50_TIC_0_MAPB__SHIFT 24 +#define NV50_TIC_0_MAPG__MASK 0x00e00000 +#define NV50_TIC_0_MAPG__SHIFT 21 +#define NV50_TIC_0_MAPR__MASK 0x001c0000 +#define NV50_TIC_0_MAPR__SHIFT 18 +#define NV50_TIC_0_TYPE3__MASK 0x00038000 +#define NV50_TIC_0_TYPE3__SHIFT 15 +#define NV50_TIC_0_TYPE2__MASK 0x00007000 +#define NV50_TIC_0_TYPE2__SHIFT 12 +#define NV50_TIC_0_TYPE1__MASK 0x00000e00 +#define NV50_TIC_0_TYPE1__SHIFT 9 +#define NV50_TIC_0_TYPE0__MASK 0x000001c0 +#define NV50_TIC_0_TYPE0__SHIFT 6 +#define NV50_TIC_0_FMT__MASK 0x0000003f +#define NV50_TIC_0_FMT__SHIFT 0 +#define NV50_TIC_0_FMT_32_32_32_32 0x00000001 +#define NVC0_TIC_0_FMT_32_32_32 0x00000002 +#define NV50_TIC_0_FMT_16_16_16_16 0x00000003 +#define NV50_TIC_0_FMT_32_32 0x00000004 +#define NV50_TIC_0_FMT_32_8_X24 0x00000005 +#define NV50_TIC_0_FMT_8_8_8_8 0x00000008 +#define NV50_TIC_0_FMT_10_10_10_2 0x00000009 +#define NV50_TIC_0_FMT_16_16 0x0000000c +#define NV50_TIC_0_FMT_24_8 0x0000000d +#define NV50_TIC_0_FMT_8_24 0x0000000e +#define NV50_TIC_0_FMT_32 0x0000000f +#define NV50_TIC_0_FMT_BPTC_FLOAT 0x00000010 +#define NV50_TIC_0_FMT_BPTC_UFLOAT 0x00000011 +#define NV50_TIC_0_FMT_4_4_4_4 0x00000012 +#define NV50_TIC_0_FMT_1_5_5_5 0x00000013 +#define NV50_TIC_0_FMT_5_5_5_1 0x00000014 +#define NV50_TIC_0_FMT_5_6_5 0x00000015 +#define NV50_TIC_0_FMT_5_5_6 0x00000016 +#define NV50_TIC_0_FMT_BPTC 0x00000017 +#define NV50_TIC_0_FMT_8_8 0x00000018 +#define NV50_TIC_0_FMT_16 0x0000001b +#define NV50_TIC_0_FMT_8 0x0000001d +#define NV50_TIC_0_FMT_4_4 0x0000001e +#define NV50_TIC_0_FMT_BITMAP 0x0000001f +#define NV50_TIC_0_FMT_9_9_9_E5 0x00000020 +#define NV50_TIC_0_FMT_11_11_10 0x00000021 +#define NV50_TIC_0_FMT_U8_YA8_V8_YB8 0x00000022 +#define NV50_TIC_0_FMT_YA8_U8_YB8_V8 0x00000023 +#define NV50_TIC_0_FMT_DXT1 0x00000024 +#define NV50_TIC_0_FMT_DXT3 0x00000025 +#define NV50_TIC_0_FMT_DXT5 0x00000026 +#define NV50_TIC_0_FMT_RGTC1 0x00000027 +#define NV50_TIC_0_FMT_RGTC2 0x00000028 +#define NV50_TIC_0_FMT_S8_Z24 0x00000029 +#define NV50_TIC_0_FMT_Z24_X8 0x0000002a +#define NV50_TIC_0_FMT_Z24_S8 0x0000002b +#define NV50_TIC_0_FMT_Z24_C8_MS4_CS4 0x0000002c +#define NV50_TIC_0_FMT_Z24_C8_MS8_CS8 0x0000002d +#define NV50_TIC_0_FMT_Z24_C8_MS4_CS12 0x0000002e +#define NV50_TIC_0_FMT_Z32 0x0000002f +#define NV50_TIC_0_FMT_Z32_S8_X24 0x00000030 +#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS4 0x00000031 +#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS8_CS8 0x00000032 +#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS4 0x00000033 +#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS8_CS8 0x00000034 +#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS4 0x00000035 +#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS8_CS8 0x00000036 +#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS12 0x00000037 +#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS12 0x00000038 +#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS12 0x00000039 +#define NV50_TIC_0_FMT_Z16 0x0000003a + +#define NV50_TIC_1 0x00000004 +#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff +#define NV50_TIC_1_OFFSET_LOW__SHIFT 0 + +#define NV50_TIC_2 0x00000008 +#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff +#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0 +#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400 +#define NV50_TIC_2_TARGET__MASK 0x0003c000 +#define NV50_TIC_2_TARGET__SHIFT 14 +#define NV50_TIC_2_TARGET_1D 0x00000000 +#define NV50_TIC_2_TARGET_2D 0x00004000 +#define NV50_TIC_2_TARGET_3D 0x00008000 +#define NV50_TIC_2_TARGET_CUBE 0x0000c000 +#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000 +#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000 +#define NV50_TIC_2_TARGET_BUFFER 0x00018000 +#define NV50_TIC_2_TARGET_RECT 0x0001c000 +#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000 +#define NV50_TIC_2_LINEAR 0x00040000 +#define NV50_TIC_2_TILE_MODE_X__MASK 0x00380000 +#define NV50_TIC_2_TILE_MODE_X__SHIFT 19 +#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000 +#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22 +#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000 +#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25 +#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000 +#define NV50_TIC_2_2D_UNK0258__SHIFT 28 +#define NV50_TIC_2_NO_BORDER 0x40000000 +#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000 + +#define NV50_TIC_3 0x0000000c +#define NV50_TIC_3_PITCH__MASK 0xffffffff +#define NV50_TIC_3_PITCH__SHIFT 0 + +#define NV50_TIC_4 0x00000010 +#define NV50_TIC_4_WIDTH__MASK 0xffffffff +#define NV50_TIC_4_WIDTH__SHIFT 0 + +#define NV50_TIC_5 0x00000014 +#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000 +#define NV50_TIC_5_LAST_LEVEL__SHIFT 28 +#define NV50_TIC_5_DEPTH__MASK 0x0fff0000 +#define NV50_TIC_5_DEPTH__SHIFT 16 +#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff +#define NV50_TIC_5_HEIGHT__SHIFT 0 + +#define NV50_TIC_7 0x0000001c +#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f +#define NV50_TIC_7_BASE_LEVEL__SHIFT 0 +#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0 +#define NV50_TIC_7_MAX_LEVEL__SHIFT 4 +#define NV50_TIC_7_MS_MODE__MASK 0x0000f000 +#define NV50_TIC_7_MS_MODE__SHIFT 12 +#define NV50_TIC_7_MS_MODE_MS1 0x00000000 +#define NV50_TIC_7_MS_MODE_MS2 0x00001000 +#define NV50_TIC_7_MS_MODE_MS4 0x00002000 +#define NV50_TIC_7_MS_MODE_MS8 0x00003000 +#define NVA3_TIC_7_MS_MODE_MS8_ALT 0x00004000 +#define NVA3_TIC_7_MS_MODE_MS2_ALT 0x00005000 +#define NVC0_TIC_7_MS_MODE_UNK6 0x00006000 +#define NV50_TIC_7_MS_MODE_MS4_CS4 0x00008000 +#define NV50_TIC_7_MS_MODE_MS4_CS12 0x00009000 +#define NV50_TIC_7_MS_MODE_MS8_CS8 0x0000a000 +#define NVC0_TIC_7_MS_MODE_MS8_CS24 0x0000b000 + +#define NV50_TSC__SIZE 0x00000020 +#define NV50_TSC_0 0x00000000 +#define NV50_TSC_0_WRAPS__MASK 0x00000007 +#define NV50_TSC_0_WRAPS__SHIFT 0 +#define NV50_TSC_0_WRAPT__MASK 0x00000038 +#define NV50_TSC_0_WRAPT__SHIFT 3 +#define NV50_TSC_0_WRAPR__MASK 0x000001c0 +#define NV50_TSC_0_WRAPR__SHIFT 6 +#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200 +#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00 +#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10 +#define NV50_TSC_0_SRGB_CONVERSION_ALLOWED 0x00002000 +#define NV50_TSC_0_BOX_S__MASK 0x0001c000 +#define NV50_TSC_0_BOX_S__SHIFT 14 +#define NV50_TSC_0_BOX_T__MASK 0x000e0000 +#define NV50_TSC_0_BOX_T__SHIFT 17 +#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000 +#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20 + +#define NV50_TSC_1 0x00000004 +#define NV50_TSC_1_UNKN_ANISO_15 0x10000000 +#define NV50_TSC_1_UNKN_ANISO_35 0x18000000 +#define NV50_TSC_1_MAGF__MASK 0x00000003 +#define NV50_TSC_1_MAGF__SHIFT 0 +#define NV50_TSC_1_MAGF_NEAREST 0x00000001 +#define NV50_TSC_1_MAGF_LINEAR 0x00000002 +#define NV50_TSC_1_MINF__MASK 0x00000030 +#define NV50_TSC_1_MINF__SHIFT 4 +#define NV50_TSC_1_MINF_NEAREST 0x00000010 +#define NV50_TSC_1_MINF_LINEAR 0x00000020 +#define NV50_TSC_1_MIPF__MASK 0x000000c0 +#define NV50_TSC_1_MIPF__SHIFT 6 +#define NV50_TSC_1_MIPF_NONE 0x00000040 +#define NV50_TSC_1_MIPF_NEAREST 0x00000080 +#define NV50_TSC_1_MIPF_LINEAR 0x000000c0 +#define NVE4_TSC_1_CUBE_SEAMLESS 0x00000200 +#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000 +#define NV50_TSC_1_LOD_BIAS__SHIFT 12 +#define NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS 0x02000000 + +#define NV50_TSC_2 0x00000008 +#define NV50_TSC_2_MIN_LOD__MASK 0x00000fff +#define NV50_TSC_2_MIN_LOD__SHIFT 0 +#define NV50_TSC_2_MAX_LOD__MASK 0x00fff000 +#define NV50_TSC_2_MAX_LOD__SHIFT 12 +#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__MASK 0xff000000 +#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__SHIFT 24 + +#define NV50_TSC_3 0x0000000c +#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__MASK 0x000ff000 +#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__SHIFT 12 +#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__MASK 0x0ff00000 +#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__SHIFT 20 + +#define NV50_TSC_4 0x00000010 +#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff +#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0 + +#define NV50_TSC_5 0x00000014 +#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff +#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0 + +#define NV50_TSC_6 0x00000018 +#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff +#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0 + +#define NV50_TSC_7 0x0000001c +#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff +#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0 + + +#endif /* NV50_TEXTURE_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c new file mode 100644 index 00000000000..a9906829fec --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c @@ -0,0 +1,412 @@ + +#include "util/u_format.h" + +#include "nv50/nv50_context.h" + +#include "nv50/nv50_defs.xml.h" + +struct nv50_transfer { + struct pipe_transfer base; + struct nv50_m2mf_rect rect[2]; + uint32_t nblocksx; + uint32_t nblocksy; +}; + +void +nv50_m2mf_rect_setup(struct nv50_m2mf_rect *rect, + struct pipe_resource *restrict res, unsigned l, + unsigned x, unsigned y, unsigned z) +{ + struct nv50_miptree *mt = nv50_miptree(res); + const unsigned w = u_minify(res->width0, l); + const unsigned h = u_minify(res->height0, l); + + rect->bo = mt->base.bo; + rect->domain = mt->base.domain; + rect->base = mt->level[l].offset; + rect->pitch = mt->level[l].pitch; + if (util_format_is_plain(res->format)) { + rect->width = w << mt->ms_x; + rect->height = h << mt->ms_y; + rect->x = x << mt->ms_x; + rect->y = y << mt->ms_y; + } else { + rect->width = util_format_get_nblocksx(res->format, w); + rect->height = util_format_get_nblocksy(res->format, h); + rect->x = util_format_get_nblocksx(res->format, x); + rect->y = util_format_get_nblocksy(res->format, y); + } + rect->tile_mode = mt->level[l].tile_mode; + rect->cpp = util_format_get_blocksize(res->format); + + if (mt->layout_3d) { + rect->z = z; + rect->depth = u_minify(res->depth0, l); + } else { + rect->base += z * mt->layer_stride; + rect->z = 0; + rect->depth = 1; + } +} + +void +nv50_m2mf_transfer_rect(struct nv50_context *nv50, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nouveau_bufctx *bctx = nv50->bufctx; + const int cpp = dst->cpp; + uint32_t src_ofst = src->base; + uint32_t dst_ofst = dst->base; + uint32_t height = nblocksy; + uint32_t sy = src->y; + uint32_t dy = dst->y; + + assert(dst->cpp == src->cpp); + + nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD); + nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + if (nouveau_bo_memtype(src->bo)) { + BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 6); + PUSH_DATA (push, 0); + PUSH_DATA (push, src->tile_mode); + PUSH_DATA (push, src->width * cpp); + PUSH_DATA (push, src->height); + PUSH_DATA (push, src->depth); + PUSH_DATA (push, src->z); + } else { + src_ofst += src->y * src->pitch + src->x * cpp; + + BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_PITCH_IN), 1); + PUSH_DATA (push, src->pitch); + } + + if (nouveau_bo_memtype(dst->bo)) { + BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 6); + PUSH_DATA (push, 0); + PUSH_DATA (push, dst->tile_mode); + PUSH_DATA (push, dst->width * cpp); + PUSH_DATA (push, dst->height); + PUSH_DATA (push, dst->depth); + PUSH_DATA (push, dst->z); + } else { + dst_ofst += dst->y * dst->pitch + dst->x * cpp; + + BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_PITCH_OUT), 1); + PUSH_DATA (push, dst->pitch); + } + + while (height) { + int line_count = height > 2047 ? 2047 : height; + + BEGIN_NV04(push, NV50_M2MF(OFFSET_IN_HIGH), 2); + PUSH_DATAh(push, src->bo->offset + src_ofst); + PUSH_DATAh(push, dst->bo->offset + dst_ofst); + + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_OFFSET_IN), 2); + PUSH_DATA (push, src->bo->offset + src_ofst); + PUSH_DATA (push, dst->bo->offset + dst_ofst); + + if (nouveau_bo_memtype(src->bo)) { + BEGIN_NV04(push, NV50_M2MF(TILING_POSITION_IN), 1); + PUSH_DATA (push, (sy << 16) | (src->x * cpp)); + } else { + src_ofst += line_count * src->pitch; + } + if (nouveau_bo_memtype(dst->bo)) { + BEGIN_NV04(push, NV50_M2MF(TILING_POSITION_OUT), 1); + PUSH_DATA (push, (dy << 16) | (dst->x * cpp)); + } else { + dst_ofst += line_count * dst->pitch; + } + + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_LINE_LENGTH_IN), 4); + PUSH_DATA (push, nblocksx * cpp); + PUSH_DATA (push, line_count); + PUSH_DATA (push, (1 << 8) | (1 << 0)); + PUSH_DATA (push, 0); + + height -= line_count; + sy += line_count; + dy += line_count; + } + + nouveau_bufctx_reset(bctx, 0); +} + +void +nv50_sifc_linear_u8(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data) +{ + struct nv50_context *nv50 = nv50_context(&nv->pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + uint32_t *src = (uint32_t *)data; + unsigned count = (size + 3) / 4; + unsigned xcoord = offset & 0xff; + + nouveau_bufctx_refn(nv50->bufctx, 0, dst, domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nv50->bufctx); + nouveau_pushbuf_validate(push); + + offset &= ~0xff; + + BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); + PUSH_DATA (push, 262144); + PUSH_DATA (push, 65536); + PUSH_DATA (push, 1); + PUSH_DATAh(push, dst->offset + offset); + PUSH_DATA (push, dst->offset + offset); + BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); + PUSH_DATA (push, size); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, xcoord); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + + while (count) { + unsigned nr; + + if (!PUSH_SPACE(push, 16)) + break; + nr = PUSH_AVAIL(push); + assert(nr >= 16); + nr = MIN2(count, nr - 1); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr); + PUSH_DATAp(push, src, nr); + + src += nr; + count -= nr; + } + + nouveau_bufctx_reset(nv50->bufctx, 0); +} + +void +nv50_m2mf_copy_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size) +{ + struct nouveau_pushbuf *push = nv->pushbuf; + struct nouveau_bufctx *bctx = nv50_context(&nv->pipe)->bufctx; + + nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD); + nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 1); + PUSH_DATA (push, 1); + + while (size) { + unsigned bytes = MIN2(size, 1 << 17); + + BEGIN_NV04(push, NV50_M2MF(OFFSET_IN_HIGH), 2); + PUSH_DATAh(push, src->offset + srcoff); + PUSH_DATAh(push, dst->offset + dstoff); + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_OFFSET_IN), 2); + PUSH_DATA (push, src->offset + srcoff); + PUSH_DATA (push, dst->offset + dstoff); + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_LINE_LENGTH_IN), 4); + PUSH_DATA (push, bytes); + PUSH_DATA (push, 1); + PUSH_DATA (push, (1 << 8) | (1 << 0)); + PUSH_DATA (push, 0); + + srcoff += bytes; + dstoff += bytes; + size -= bytes; + } + + nouveau_bufctx_reset(bctx, 0); +} + +void * +nv50_miptree_transfer_map(struct pipe_context *pctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer) +{ + struct nv50_screen *screen = nv50_screen(pctx->screen); + struct nv50_context *nv50 = nv50_context(pctx); + struct nouveau_device *dev = nv50->screen->base.device; + const struct nv50_miptree *mt = nv50_miptree(res); + struct nv50_transfer *tx; + uint32_t size; + int ret; + unsigned flags = 0; + + if (usage & PIPE_TRANSFER_MAP_DIRECTLY) + return NULL; + + tx = CALLOC_STRUCT(nv50_transfer); + if (!tx) + return NULL; + + pipe_resource_reference(&tx->base.resource, res); + + tx->base.level = level; + tx->base.usage = usage; + tx->base.box = *box; + + if (util_format_is_plain(res->format)) { + tx->nblocksx = box->width << mt->ms_x; + tx->nblocksy = box->height << mt->ms_x; + } else { + tx->nblocksx = util_format_get_nblocksx(res->format, box->width); + tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + } + + tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); + tx->base.layer_stride = tx->nblocksy * tx->base.stride; + + nv50_m2mf_rect_setup(&tx->rect[0], res, level, box->x, box->y, box->z); + + size = tx->base.layer_stride; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, + size * tx->base.box.depth, NULL, &tx->rect[1].bo); + if (ret) { + FREE(tx); + return NULL; + } + + tx->rect[1].cpp = tx->rect[0].cpp; + tx->rect[1].width = tx->nblocksx; + tx->rect[1].height = tx->nblocksy; + tx->rect[1].depth = 1; + tx->rect[1].pitch = tx->base.stride; + tx->rect[1].domain = NOUVEAU_BO_GART; + + if (usage & PIPE_TRANSFER_READ) { + unsigned base = tx->rect[0].base; + unsigned z = tx->rect[0].z; + unsigned i; + for (i = 0; i < box->depth; ++i) { + nv50_m2mf_transfer_rect(nv50, &tx->rect[1], &tx->rect[0], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += size; + } + tx->rect[0].z = z; + tx->rect[0].base = base; + tx->rect[1].base = 0; + } + + if (tx->rect[1].bo->map) { + *ptransfer = &tx->base; + return tx->rect[1].bo->map; + } + + if (usage & PIPE_TRANSFER_READ) + flags = NOUVEAU_BO_RD; + if (usage & PIPE_TRANSFER_WRITE) + flags |= NOUVEAU_BO_WR; + + ret = nouveau_bo_map(tx->rect[1].bo, flags, screen->base.client); + if (ret) { + nouveau_bo_ref(NULL, &tx->rect[1].bo); + FREE(tx); + return NULL; + } + + *ptransfer = &tx->base; + return tx->rect[1].bo->map; +} + +void +nv50_miptree_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct nv50_context *nv50 = nv50_context(pctx); + struct nv50_transfer *tx = (struct nv50_transfer *)transfer; + struct nv50_miptree *mt = nv50_miptree(tx->base.resource); + unsigned i; + + if (tx->base.usage & PIPE_TRANSFER_WRITE) { + for (i = 0; i < tx->base.box.depth; ++i) { + nv50_m2mf_transfer_rect(nv50, &tx->rect[0], &tx->rect[1], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += tx->nblocksy * tx->base.stride; + } + } + + nouveau_bo_ref(NULL, &tx->rect[1].bo); + pipe_resource_reference(&transfer->resource, NULL); + + FREE(tx); +} + +void +nv50_cb_push(struct nouveau_context *nv, + struct nouveau_bo *bo, unsigned domain, + unsigned base, unsigned size, + unsigned offset, unsigned words, const uint32_t *data) +{ + struct nouveau_pushbuf *push = nv->pushbuf; + struct nouveau_bufctx *bctx = nv50_context(&nv->pipe)->bufctx; + + assert(!(offset & 3)); + size = align(size, 0x100); + + nouveau_bufctx_refn(bctx, 0, bo, NOUVEAU_BO_WR | domain); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + while (words) { + unsigned nr; + + nr = PUSH_AVAIL(push); + nr = MIN2(nr - 7, words); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1); + + BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, bo->offset + base); + PUSH_DATA (push, bo->offset + base); + PUSH_DATA (push, (NV50_CB_TMP << 16) | (size & 0xffff)); + BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); + PUSH_DATA (push, (offset << 6) | NV50_CB_TMP); + BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr); + PUSH_DATAp(push, data, nr); + + words -= nr; + data += nr; + offset += nr * 4; + } + + nouveau_bufctx_reset(bctx, 0); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.h b/src/gallium/drivers/nouveau/nv50/nv50_transfer.h new file mode 100644 index 00000000000..c58cb0008df --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.h @@ -0,0 +1,27 @@ + +#ifndef __NV50_TRANSFER_H__ +#define __NV50_TRANSFER_H__ + +#include "pipe/p_state.h" + +struct nv50_m2mf_rect { + struct nouveau_bo *bo; + uint32_t base; + unsigned domain; + uint32_t pitch; + uint32_t width; + uint32_t x; + uint32_t height; + uint32_t y; + uint16_t depth; + uint16_t z; + uint16_t tile_mode; + uint16_t cpp; +}; + +void +nv50_m2mf_rect_setup(struct nv50_m2mf_rect *rect, + struct pipe_resource *restrict res, unsigned l, + unsigned x, unsigned y, unsigned z); + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c new file mode 100644 index 00000000000..c6162b5f415 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -0,0 +1,820 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nv50/nv50_context.h" +#include "nv50/nv50_resource.h" + +#include "nv50/nv50_3d.xml.h" + +void +nv50_vertex_state_delete(struct pipe_context *pipe, + void *hwcso) +{ + struct nv50_vertex_stateobj *so = hwcso; + + if (so->translate) + so->translate->release(so->translate); + FREE(hwcso); +} + +void * +nv50_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nv50_vertex_stateobj *so; + struct translate_key transkey; + unsigned i; + + so = MALLOC(sizeof(*so) + + num_elements * sizeof(struct nv50_vertex_element)); + if (!so) + return NULL; + so->num_elements = num_elements; + so->instance_elts = 0; + so->instance_bufs = 0; + so->need_conversion = FALSE; + + memset(so->vb_access_size, 0, sizeof(so->vb_access_size)); + + for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) + so->min_instance_div[i] = 0xffffffff; + + transkey.nr_elements = 0; + transkey.output_stride = 0; + + for (i = 0; i < num_elements; ++i) { + const struct pipe_vertex_element *ve = &elements[i]; + const unsigned vbi = ve->vertex_buffer_index; + unsigned size; + enum pipe_format fmt = ve->src_format; + + so->element[i].pipe = elements[i]; + so->element[i].state = nv50_format_table[fmt].vtx; + + if (!so->element[i].state) { + switch (util_format_get_nr_components(fmt)) { + case 1: fmt = PIPE_FORMAT_R32_FLOAT; break; + case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break; + case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break; + case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break; + default: + assert(0); + FREE(so); + return NULL; + } + so->element[i].state = nv50_format_table[fmt].vtx; + so->need_conversion = TRUE; + } + so->element[i].state |= i; + + size = util_format_get_blocksize(fmt); + if (so->vb_access_size[vbi] < (ve->src_offset + size)) + so->vb_access_size[vbi] = ve->src_offset + size; + + if (1) { + unsigned j = transkey.nr_elements++; + + transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL; + transkey.element[j].input_format = ve->src_format; + transkey.element[j].input_buffer = vbi; + transkey.element[j].input_offset = ve->src_offset; + transkey.element[j].instance_divisor = ve->instance_divisor; + + transkey.element[j].output_format = fmt; + transkey.element[j].output_offset = transkey.output_stride; + transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3; + + if (unlikely(ve->instance_divisor)) { + so->instance_elts |= 1 << i; + so->instance_bufs |= 1 << vbi; + if (ve->instance_divisor < so->min_instance_div[vbi]) + so->min_instance_div[vbi] = ve->instance_divisor; + } + } + } + + so->translate = translate_create(&transkey); + so->vertex_size = transkey.output_stride / 4; + so->packet_vertex_limit = NV04_PFIFO_MAX_PACKET_LEN / + MAX2(so->vertex_size, 1); + + return so; +} + +#define NV50_3D_VERTEX_ATTRIB_INACTIVE \ + NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT | \ + NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 | \ + NV50_3D_VERTEX_ARRAY_ATTRIB_CONST + +static void +nv50_emit_vtxattr(struct nv50_context *nv50, struct pipe_vertex_buffer *vb, + struct pipe_vertex_element *ve, unsigned attr) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + const void *data = (const uint8_t *)vb->user_buffer + ve->src_offset; + float v[4]; + const unsigned nc = util_format_get_nr_components(ve->src_format); + const struct util_format_description *desc = + util_format_description(ve->src_format); + + assert(vb->user_buffer); + + if (desc->channel[0].pure_integer) { + if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + desc->unpack_rgba_sint((int32_t *)v, 0, data, 0, 1, 1); + } else { + desc->unpack_rgba_uint((uint32_t *)v, 0, data, 0, 1, 1); + } + } else { + desc->unpack_rgba_float(v, 0, data, 0, 1, 1); + } + + switch (nc) { + case 4: + BEGIN_NV04(push, NV50_3D(VTX_ATTR_4F_X(attr)), 4); + PUSH_DATAf(push, v[0]); + PUSH_DATAf(push, v[1]); + PUSH_DATAf(push, v[2]); + PUSH_DATAf(push, v[3]); + break; + case 3: + BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(attr)), 3); + PUSH_DATAf(push, v[0]); + PUSH_DATAf(push, v[1]); + PUSH_DATAf(push, v[2]); + break; + case 2: + BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(attr)), 2); + PUSH_DATAf(push, v[0]); + PUSH_DATAf(push, v[1]); + break; + case 1: + if (attr == nv50->vertprog->vp.edgeflag) { + BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1); + PUSH_DATA (push, v[0] ? 1 : 0); + } + BEGIN_NV04(push, NV50_3D(VTX_ATTR_1F(attr)), 1); + PUSH_DATAf(push, v[0]); + break; + default: + assert(0); + break; + } +} + +static INLINE void +nv50_user_vbuf_range(struct nv50_context *nv50, int vbi, + uint32_t *base, uint32_t *size) +{ + if (unlikely(nv50->vertex->instance_bufs & (1 << vbi))) { + /* TODO: use min and max instance divisor to get a proper range */ + *base = 0; + *size = nv50->vtxbuf[vbi].buffer->width0; + } else { + /* NOTE: if there are user buffers, we *must* have index bounds */ + assert(nv50->vb_elt_limit != ~0); + *base = nv50->vb_elt_first * nv50->vtxbuf[vbi].stride; + *size = nv50->vb_elt_limit * nv50->vtxbuf[vbi].stride + + nv50->vertex->vb_access_size[vbi]; + } +} + +static void +nv50_upload_user_buffers(struct nv50_context *nv50, + uint64_t addrs[], uint32_t limits[]) +{ + unsigned b; + + for (b = 0; b < nv50->num_vtxbufs; ++b) { + struct nouveau_bo *bo; + const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b]; + uint32_t base, size; + + if (!(nv50->vbo_user & (1 << b)) || !vb->stride) + continue; + nv50_user_vbuf_range(nv50, b, &base, &size); + + limits[b] = base + size - 1; + addrs[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, base, size, + &bo); + if (addrs[b]) + BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, NOUVEAU_BO_GART | + NOUVEAU_BO_RD, bo); + } + nv50->base.vbo_dirty = TRUE; +} + +static void +nv50_update_user_vbufs(struct nv50_context *nv50) +{ + uint64_t address[PIPE_MAX_ATTRIBS]; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned i; + uint32_t written = 0; + + for (i = 0; i < nv50->vertex->num_elements; ++i) { + struct pipe_vertex_element *ve = &nv50->vertex->element[i].pipe; + const unsigned b = ve->vertex_buffer_index; + struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b]; + uint32_t base, size; + + if (!(nv50->vbo_user & (1 << b))) + continue; + + if (!vb->stride) { + nv50_emit_vtxattr(nv50, vb, ve, i); + continue; + } + nv50_user_vbuf_range(nv50, b, &base, &size); + + if (!(written & (1 << b))) { + struct nouveau_bo *bo; + const uint32_t bo_flags = NOUVEAU_BO_GART | NOUVEAU_BO_RD; + written |= 1 << b; + address[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, + base, size, &bo); + if (address[b]) + BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo); + } + + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); + PUSH_DATAh(push, address[b] + base + size - 1); + PUSH_DATA (push, address[b] + base + size - 1); + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_START_HIGH(i)), 2); + PUSH_DATAh(push, address[b] + ve->src_offset); + PUSH_DATA (push, address[b] + ve->src_offset); + } + nv50->base.vbo_dirty = TRUE; +} + +static INLINE void +nv50_release_user_vbufs(struct nv50_context *nv50) +{ + if (nv50->vbo_user) { + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX_TMP); + nouveau_scratch_done(&nv50->base); + } +} + +void +nv50_vertex_arrays_validate(struct nv50_context *nv50) +{ + uint64_t addrs[PIPE_MAX_ATTRIBS]; + uint32_t limits[PIPE_MAX_ATTRIBS]; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_vertex_stateobj *vertex = nv50->vertex; + struct pipe_vertex_buffer *vb; + struct nv50_vertex_element *ve; + uint32_t mask; + uint32_t refd = 0; + unsigned i; + const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts); + + if (unlikely(vertex->need_conversion)) + nv50->vbo_fifo = ~0; + else + if (nv50->vbo_user & ~nv50->vbo_constant) + nv50->vbo_fifo = nv50->vbo_push_hint ? ~0 : 0; + else + nv50->vbo_fifo = 0; + + if (!nv50->vbo_fifo) { + /* if vertex buffer was written by GPU - flush VBO cache */ + for (i = 0; i < nv50->num_vtxbufs; ++i) { + struct nv04_resource *buf = nv04_resource(nv50->vtxbuf[i].buffer); + if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + nv50->base.vbo_dirty = TRUE; + break; + } + } + } + + /* update vertex format state */ + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_ATTRIB(0)), n); + if (nv50->vbo_fifo) { + nv50->state.num_vtxelts = vertex->num_elements; + for (i = 0; i < vertex->num_elements; ++i) + PUSH_DATA (push, vertex->element[i].state); + for (; i < n; ++i) + PUSH_DATA (push, NV50_3D_VERTEX_ATTRIB_INACTIVE); + for (i = 0; i < n; ++i) { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1); + PUSH_DATA (push, 0); + } + return; + } + for (i = 0; i < vertex->num_elements; ++i) { + const unsigned b = vertex->element[i].pipe.vertex_buffer_index; + ve = &vertex->element[i]; + vb = &nv50->vtxbuf[b]; + + if (likely(vb->stride) || !(nv50->vbo_user & (1 << b))) + PUSH_DATA(push, ve->state); + else + PUSH_DATA(push, ve->state | NV50_3D_VERTEX_ARRAY_ATTRIB_CONST); + } + for (; i < n; ++i) + PUSH_DATA(push, NV50_3D_VERTEX_ATTRIB_INACTIVE); + + /* update per-instance enables */ + mask = vertex->instance_elts ^ nv50->state.instance_elts; + while (mask) { + const int i = ffs(mask) - 1; + mask &= ~(1 << i); + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); + PUSH_DATA (push, (vertex->instance_elts >> i) & 1); + } + nv50->state.instance_elts = vertex->instance_elts; + + if (nv50->vbo_user & ~nv50->vbo_constant) + nv50_upload_user_buffers(nv50, addrs, limits); + + /* update buffers and set constant attributes */ + for (i = 0; i < vertex->num_elements; ++i) { + uint64_t address, limit; + const unsigned b = vertex->element[i].pipe.vertex_buffer_index; + ve = &vertex->element[i]; + vb = &nv50->vtxbuf[b]; + + if (unlikely(nv50->vbo_constant & (1 << b))) { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1); + PUSH_DATA (push, 0); + nv50_emit_vtxattr(nv50, vb, &ve->pipe, i); + continue; + } else + if (nv50->vbo_user & (1 << b)) { + address = addrs[b] + ve->pipe.src_offset; + limit = addrs[b] + limits[b]; + } else { + struct nv04_resource *buf = nv04_resource(vb->buffer); + if (!(refd & (1 << b))) { + refd |= 1 << b; + BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD); + } + address = buf->address + vb->buffer_offset + ve->pipe.src_offset; + limit = buf->address + buf->base.width0 - 1; + } + + if (unlikely(ve->pipe.instance_divisor)) { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 4); + PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + PUSH_DATA (push, ve->pipe.instance_divisor); + } else { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 3); + PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + } + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); + PUSH_DATAh(push, limit); + PUSH_DATA (push, limit); + } + for (; i < nv50->state.num_vtxelts; ++i) { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1); + PUSH_DATA (push, 0); + } + nv50->state.num_vtxelts = vertex->num_elements; +} + +#define NV50_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nv50_prim_gl(unsigned prim) +{ + switch (prim) { + NV50_PRIM_GL_CASE(POINTS); + NV50_PRIM_GL_CASE(LINES); + NV50_PRIM_GL_CASE(LINE_LOOP); + NV50_PRIM_GL_CASE(LINE_STRIP); + NV50_PRIM_GL_CASE(TRIANGLES); + NV50_PRIM_GL_CASE(TRIANGLE_STRIP); + NV50_PRIM_GL_CASE(TRIANGLE_FAN); + NV50_PRIM_GL_CASE(QUADS); + NV50_PRIM_GL_CASE(QUAD_STRIP); + NV50_PRIM_GL_CASE(POLYGON); + NV50_PRIM_GL_CASE(LINES_ADJACENCY); + NV50_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NV50_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NV50_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + default: + return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } +} + +/* For pre-nva0 transform feedback. */ +static const uint8_t nv50_pipe_prim_to_prim_size[PIPE_PRIM_MAX + 1] = +{ + [PIPE_PRIM_POINTS] = 1, + [PIPE_PRIM_LINES] = 2, + [PIPE_PRIM_LINE_LOOP] = 2, + [PIPE_PRIM_LINE_STRIP] = 2, + [PIPE_PRIM_TRIANGLES] = 3, + [PIPE_PRIM_TRIANGLE_STRIP] = 3, + [PIPE_PRIM_TRIANGLE_FAN] = 3, + [PIPE_PRIM_QUADS] = 3, + [PIPE_PRIM_QUAD_STRIP] = 3, + [PIPE_PRIM_POLYGON] = 3, + [PIPE_PRIM_LINES_ADJACENCY] = 2, + [PIPE_PRIM_LINE_STRIP_ADJACENCY] = 2, + [PIPE_PRIM_TRIANGLES_ADJACENCY] = 3, + [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = 3 +}; + +static void +nv50_draw_arrays(struct nv50_context *nv50, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned prim; + + if (nv50->state.index_bias) { + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); + PUSH_DATA (push, 0); + nv50->state.index_bias = 0; + } + + prim = nv50_prim_gl(mode); + + while (instance_count--) { + BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, prim); + BEGIN_NV04(push, NV50_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, start); + PUSH_DATA (push, count); + BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (push, 0); + + prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } +} + +static void +nv50_draw_elements_inline_u08(struct nouveau_pushbuf *push, const uint8_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 3) { + unsigned i; + BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U32), count & 3); + for (i = 0; i < (count & 3); ++i) + PUSH_DATA(push, *map++); + count &= ~3; + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4; + + BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U8), nr); + for (i = 0; i < nr; ++i) { + PUSH_DATA(push, + (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]); + map += 4; + } + count -= nr * 4; + } +} + +static void +nv50_draw_elements_inline_u16(struct nouveau_pushbuf *push, const uint16_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + count &= ~1; + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + PUSH_DATA(push, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } +} + +static void +nv50_draw_elements_inline_u32(struct nouveau_pushbuf *push, const uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + while (count) { + const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U32), nr); + PUSH_DATAp(push, map, nr); + + map += nr; + count -= nr; + } +} + +static void +nv50_draw_elements_inline_u32_short(struct nouveau_pushbuf *push, + const uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + count--; + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + PUSH_DATA(push, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } +} + +static void +nv50_draw_elements(struct nv50_context *nv50, boolean shorten, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count, int32_t index_bias) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned prim; + const unsigned index_size = nv50->idxbuf.index_size; + + prim = nv50_prim_gl(mode); + + if (index_bias != nv50->state.index_bias) { + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); + PUSH_DATA (push, index_bias); + nv50->state.index_bias = index_bias; + } + + if (nv50->idxbuf.buffer) { + struct nv04_resource *buf = nv04_resource(nv50->idxbuf.buffer); + unsigned pb_start; + unsigned pb_bytes; + const unsigned base = (buf->offset + nv50->idxbuf.offset) & ~3; + + start += ((buf->offset + nv50->idxbuf.offset) & 3) >> (index_size >> 1); + + assert(nouveau_resource_mapped_by_gpu(nv50->idxbuf.buffer)); + + while (instance_count--) { + BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, prim); + + nouveau_pushbuf_space(push, 8, 0, 1); + + switch (index_size) { + case 4: + BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U32), count); + nouveau_pushbuf_data(push, buf->bo, base + start * 4, count * 4); + break; + case 2: + pb_start = (start & ~1) * 2; + pb_bytes = ((start + count + 1) & ~1) * 2 - pb_start; + + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U16_SETUP), 1); + PUSH_DATA (push, (start << 31) | count); + BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U16), pb_bytes / 4); + nouveau_pushbuf_data(push, buf->bo, base + pb_start, pb_bytes); + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U16_SETUP), 1); + PUSH_DATA (push, 0); + break; + default: + assert(index_size == 1); + pb_start = start & ~3; + pb_bytes = ((start + count + 3) & ~3) - pb_start; + + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U8_SETUP), 1); + PUSH_DATA (push, (start << 30) | count); + BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U8), pb_bytes / 4); + nouveau_pushbuf_data(push, buf->bo, base + pb_start, pb_bytes); + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U8_SETUP), 1); + PUSH_DATA (push, 0); + break; + } + BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (push, 0); + + prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + } else { + const void *data = nv50->idxbuf.user_buffer; + + while (instance_count--) { + BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, prim); + switch (index_size) { + case 1: + nv50_draw_elements_inline_u08(push, data, start, count); + break; + case 2: + nv50_draw_elements_inline_u16(push, data, start, count); + break; + case 4: + if (shorten) + nv50_draw_elements_inline_u32_short(push, data, start, count); + else + nv50_draw_elements_inline_u32(push, data, start, count); + break; + default: + assert(0); + return; + } + BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (push, 0); + + prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + } +} + +static void +nva0_draw_stream_output(struct nv50_context *nv50, + const struct pipe_draw_info *info) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_so_target *so = nv50_so_target(info->count_from_stream_output); + struct nv04_resource *res = nv04_resource(so->pipe.buffer); + unsigned num_instances = info->instance_count; + unsigned mode = nv50_prim_gl(info->mode); + + if (unlikely(nv50->screen->base.class_3d < NVA0_3D_CLASS)) { + /* A proper implementation without waiting doesn't seem possible, + * so don't bother. + */ + NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n"); + return; + } + + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + PUSH_SPACE(push, 4); + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1); + PUSH_DATA (push, 0); + } + + assert(num_instances); + do { + PUSH_SPACE(push, 8); + BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, mode); + BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1); + nv50_query_pushbuf_submit(push, so->pq, 0x4); + BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (push, 0); + + mode |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } while (--num_instances); +} + +static void +nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan) +{ + struct nv50_screen *screen = chan->user_priv; + + nouveau_fence_update(&screen->base, TRUE); + + nv50_bufctx_fence(screen->cur_ctx->bufctx_3d, TRUE); +} + +void +nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ + nv50->vb_elt_first = info->min_index + info->index_bias; + nv50->vb_elt_limit = info->max_index - info->min_index; + nv50->instance_off = info->start_instance; + nv50->instance_max = info->instance_count - 1; + + /* For picking only a few vertices from a large user buffer, push is better, + * if index count is larger and we expect repeated vertices, suggest upload. + */ + nv50->vbo_push_hint = /* the 64 is heuristic */ + !(info->indexed && ((nv50->vb_elt_limit + 64) < info->count)); + + if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_ARRAYS | NV50_NEW_VERTEX))) { + if (!!nv50->vbo_fifo != nv50->vbo_push_hint) + nv50->dirty |= NV50_NEW_ARRAYS; + else + if (!nv50->vbo_fifo) + nv50_update_user_vbufs(nv50); + } + + if (unlikely(nv50->num_so_targets && !nv50->gmtyprog)) + nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode]; + + nv50_state_validate(nv50, ~0, 8); /* 8 as minimum, we use flush_notify */ + + push->kick_notify = nv50_draw_vbo_kick_notify; + + if (nv50->vbo_fifo) { + nv50_push_vbo(nv50, info); + push->kick_notify = nv50_default_kick_notify; + nouveau_pushbuf_bufctx(push, NULL); + return; + } + + if (nv50->state.instance_base != info->start_instance) { + nv50->state.instance_base = info->start_instance; + /* NOTE: this does not affect the shader input, should it ? */ + BEGIN_NV04(push, NV50_3D(VB_INSTANCE_BASE), 1); + PUSH_DATA (push, info->start_instance); + } + + if (nv50->base.vbo_dirty) { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1); + PUSH_DATA (push, 0); + nv50->base.vbo_dirty = FALSE; + } + + if (info->indexed) { + boolean shorten = info->max_index <= 65535; + + if (info->primitive_restart != nv50->state.prim_restart) { + if (info->primitive_restart) { + BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 2); + PUSH_DATA (push, 1); + PUSH_DATA (push, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } else { + BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 1); + PUSH_DATA (push, 0); + } + nv50->state.prim_restart = info->primitive_restart; + } else + if (info->primitive_restart) { + BEGIN_NV04(push, NV50_3D(PRIM_RESTART_INDEX), 1); + PUSH_DATA (push, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } + + nv50_draw_elements(nv50, shorten, + info->mode, info->start, info->count, + info->instance_count, info->index_bias); + } else + if (unlikely(info->count_from_stream_output)) { + nva0_draw_stream_output(nv50, info); + } else { + nv50_draw_arrays(nv50, + info->mode, info->start, info->count, + info->instance_count); + } + push->kick_notify = nv50_default_kick_notify; + + nv50_release_user_vbufs(nv50); + + nouveau_pushbuf_bufctx(push, NULL); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h new file mode 100644 index 00000000000..e8578c8be6f --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h @@ -0,0 +1,125 @@ + +#ifndef __NV50_WINSYS_H__ +#define __NV50_WINSYS_H__ + +#include <stdint.h> +#include <unistd.h> + +#include "pipe/p_defines.h" + +#include "nouveau_winsys.h" +#include "nouveau_buffer.h" + + +#ifndef NV04_PFIFO_MAX_PACKET_LEN +#define NV04_PFIFO_MAX_PACKET_LEN 2047 +#endif + + +static INLINE void +nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin, + unsigned flags, struct nouveau_bo *bo) +{ + nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL; +} + +static INLINE void +nv50_add_bufctx_resident(struct nouveau_bufctx *bufctx, int bin, + struct nv04_resource *res, unsigned flags) +{ + struct nouveau_bufref *ref = + nouveau_bufctx_refn(bufctx, bin, res->bo, flags | res->domain); + ref->priv = res; + ref->priv_data = flags; +} + +#define BCTX_REFN_bo(ctx, bin, fl, bo) \ + nv50_add_bufctx_resident_bo(ctx, NV50_BIND_##bin, fl, bo); + +#define BCTX_REFN(bctx, bin, res, acc) \ + nv50_add_bufctx_resident(bctx, NV50_BIND_##bin, res, NOUVEAU_BO_##acc) + +static INLINE void +PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) +{ + struct nouveau_pushbuf_refn ref = { bo, flags }; + nouveau_pushbuf_refn(push, &ref, 1); +} + + +#define SUBC_3D(m) 3, (m) +#define NV50_3D(n) SUBC_3D(NV50_3D_##n) +#define NVA0_3D(n) SUBC_3D(NVA0_3D_##n) + +#define SUBC_2D(m) 4, (m) +#define NV50_2D(n) SUBC_2D(NV50_2D_##n) + +#define SUBC_M2MF(m) 5, (m) +#define NV50_M2MF(n) SUBC_M2MF(NV50_M2MF_##n) + +#define SUBC_COMPUTE(m) 6, (m) +#define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n) + + +static INLINE uint32_t +NV50_FIFO_PKHDR(int subc, int mthd, unsigned size) +{ + return 0x00000000 | (size << 18) | (subc << 13) | mthd; +} + +static INLINE uint32_t +NV50_FIFO_PKHDR_NI(int subc, int mthd, unsigned size) +{ + return 0x40000000 | (size << 18) | (subc << 13) | mthd; +} + +static INLINE uint32_t +NV50_FIFO_PKHDR_L(int subc, int mthd) +{ + return 0x00030000 | (subc << 13) | mthd; +} + + +static INLINE uint32_t +nouveau_bo_memtype(const struct nouveau_bo *bo) +{ + return bo->config.nv50.memtype; +} + + +static INLINE void +PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data) +{ + *push->cur++ = (uint32_t)(data >> 32); +} + +static INLINE void +BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size) +{ +#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, size + 1); +#endif + PUSH_DATA (push, NV50_FIFO_PKHDR(subc, mthd, size)); +} + +static INLINE void +BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size) +{ +#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, size + 1); +#endif + PUSH_DATA (push, NV50_FIFO_PKHDR_NI(subc, mthd, size)); +} + +/* long, non-incremental, nv50-only */ +static INLINE void +BEGIN_NL50(struct nouveau_pushbuf *push, int subc, int mthd, uint32_t size) +{ +#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, 2); +#endif + PUSH_DATA (push, NV50_FIFO_PKHDR_L(subc, mthd)); + PUSH_DATA (push, size); +} + +#endif /* __NV50_WINSYS_H__ */ diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video.c b/src/gallium/drivers/nouveau/nv50/nv84_video.c new file mode 100644 index 00000000000..3fee6d95f66 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv84_video.c @@ -0,0 +1,797 @@ +/* + * Copyright 2013 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <fcntl.h> + +#include "util/u_format.h" +#include "util/u_sampler.h" +#include "vl/vl_zscan.h" + +#include "nv50/nv84_video.h" + +static int +nv84_copy_firmware(const char *path, void *dest, ssize_t len) +{ + int fd = open(path, O_RDONLY | O_CLOEXEC); + ssize_t r; + if (fd < 0) { + fprintf(stderr, "opening firmware file %s failed: %m\n", path); + return 1; + } + r = read(fd, dest, len); + close(fd); + + if (r != len) { + fprintf(stderr, "reading firwmare file %s failed: %m\n", path); + return 1; + } + + return 0; +} + +static int +filesize(const char *path) +{ + int ret; + struct stat statbuf; + + ret = stat(path, &statbuf); + if (ret) + return ret; + return statbuf.st_size; +} + +static struct nouveau_bo * +nv84_load_firmwares(struct nouveau_device *dev, struct nv84_decoder *dec, + const char *fw1, const char *fw2) +{ + int ret, size1, size2 = 0; + struct nouveau_bo *fw; + + size1 = filesize(fw1); + if (fw2) + size2 = filesize(fw2); + if (size1 < 0 || size2 < 0) + return NULL; + + dec->vp_fw2_offset = align(size1, 0x100); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, dec->vp_fw2_offset + size2, NULL, &fw); + if (ret) + return NULL; + ret = nouveau_bo_map(fw, NOUVEAU_BO_WR, dec->client); + if (ret) + goto error; + + ret = nv84_copy_firmware(fw1, fw->map, size1); + if (fw2 && !ret) + ret = nv84_copy_firmware(fw2, fw->map + dec->vp_fw2_offset, size2); + munmap(fw->map, fw->size); + fw->map = NULL; + if (!ret) + return fw; +error: + nouveau_bo_ref(NULL, &fw); + return NULL; +} + +static struct nouveau_bo * +nv84_load_bsp_firmware(struct nouveau_device *dev, struct nv84_decoder *dec) +{ + return nv84_load_firmwares( + dev, dec, "/lib/firmware/nouveau/nv84_bsp-h264", NULL); +} + +static struct nouveau_bo * +nv84_load_vp_firmware(struct nouveau_device *dev, struct nv84_decoder *dec) +{ + return nv84_load_firmwares( + dev, dec, + "/lib/firmware/nouveau/nv84_vp-h264-1", + "/lib/firmware/nouveau/nv84_vp-h264-2"); +} + +static struct nouveau_bo * +nv84_load_vp_firmware_mpeg(struct nouveau_device *dev, struct nv84_decoder *dec) +{ + return nv84_load_firmwares( + dev, dec, "/lib/firmware/nouveau/nv84_vp-mpeg12", NULL); +} + +static void +nv84_decoder_decode_bitstream_h264(struct pipe_video_codec *decoder, + struct pipe_video_buffer *video_target, + struct pipe_picture_desc *picture, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes) +{ + struct nv84_decoder *dec = (struct nv84_decoder *)decoder; + struct nv84_video_buffer *target = (struct nv84_video_buffer *)video_target; + + struct pipe_h264_picture_desc *desc = (struct pipe_h264_picture_desc *)picture; + + assert(target->base.buffer_format == PIPE_FORMAT_NV12); + + nv84_decoder_bsp(dec, desc, num_buffers, data, num_bytes, target); + nv84_decoder_vp_h264(dec, desc, target); +} + +static void +nv84_decoder_flush(struct pipe_video_codec *decoder) +{ +} + +static void +nv84_decoder_begin_frame_h264(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ +} + +static void +nv84_decoder_end_frame_h264(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ +} + +static void +nv84_decoder_decode_bitstream_mpeg12(struct pipe_video_codec *decoder, + struct pipe_video_buffer *video_target, + struct pipe_picture_desc *picture, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes) +{ + struct nv84_decoder *dec = (struct nv84_decoder *)decoder; + + assert(video_target->buffer_format == PIPE_FORMAT_NV12); + + vl_mpg12_bs_decode(dec->mpeg12_bs, + video_target, + (struct pipe_mpeg12_picture_desc *)picture, + num_buffers, + data, + num_bytes); +} + +static void +nv84_decoder_begin_frame_mpeg12(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ + struct nv84_decoder *dec = (struct nv84_decoder *)decoder; + struct pipe_mpeg12_picture_desc *desc = (struct pipe_mpeg12_picture_desc *)picture; + int i; + + nouveau_bo_wait(dec->mpeg12_bo, NOUVEAU_BO_RDWR, dec->client); + dec->mpeg12_mb_info = dec->mpeg12_bo->map + 0x100; + dec->mpeg12_data = dec->mpeg12_bo->map + 0x100 + + align(0x20 * mb(dec->base.width) * mb(dec->base.height), 0x100); + if (desc->intra_matrix) { + dec->zscan = desc->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; + for (i = 0; i < 64; i++) { + dec->mpeg12_intra_matrix[i] = desc->intra_matrix[dec->zscan[i]]; + dec->mpeg12_non_intra_matrix[i] = desc->non_intra_matrix[dec->zscan[i]]; + } + dec->mpeg12_intra_matrix[0] = 1 << (7 - desc->intra_dc_precision); + } +} + +static void +nv84_decoder_end_frame_mpeg12(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ + nv84_decoder_vp_mpeg12( + (struct nv84_decoder *)decoder, + (struct pipe_mpeg12_picture_desc *)picture, + (struct nv84_video_buffer *)target); +} + +static void +nv84_decoder_decode_macroblock(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, + const struct pipe_macroblock *macroblocks, + unsigned num_macroblocks) +{ + const struct pipe_mpeg12_macroblock *mb = (const struct pipe_mpeg12_macroblock *)macroblocks; + for (int i = 0; i < num_macroblocks; i++, mb++) { + nv84_decoder_vp_mpeg12_mb( + (struct nv84_decoder *)decoder, + (struct pipe_mpeg12_picture_desc *)picture, + mb); + } +} + +static void +nv84_decoder_destroy(struct pipe_video_codec *decoder) +{ + struct nv84_decoder *dec = (struct nv84_decoder *)decoder; + + nouveau_bo_ref(NULL, &dec->bsp_fw); + nouveau_bo_ref(NULL, &dec->bsp_data); + nouveau_bo_ref(NULL, &dec->vp_fw); + nouveau_bo_ref(NULL, &dec->vp_data); + nouveau_bo_ref(NULL, &dec->mbring); + nouveau_bo_ref(NULL, &dec->vpring); + nouveau_bo_ref(NULL, &dec->bitstream); + nouveau_bo_ref(NULL, &dec->vp_params); + nouveau_bo_ref(NULL, &dec->fence); + + nouveau_object_del(&dec->bsp); + nouveau_object_del(&dec->vp); + + nouveau_bufctx_del(&dec->bsp_bufctx); + nouveau_pushbuf_del(&dec->bsp_pushbuf); + nouveau_object_del(&dec->bsp_channel); + + nouveau_bufctx_del(&dec->vp_bufctx); + nouveau_pushbuf_del(&dec->vp_pushbuf); + nouveau_object_del(&dec->vp_channel); + + nouveau_client_del(&dec->client); + + if (dec->mpeg12_bs) + FREE(dec->mpeg12_bs); + FREE(dec); +} + +struct pipe_video_codec * +nv84_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ) +{ + struct nv50_context *nv50 = (struct nv50_context *)context; + struct nouveau_screen *screen = &nv50->screen->base; + struct nv84_decoder *dec; + struct nouveau_pushbuf *bsp_push, *vp_push; + struct nv50_surface surf; + struct nv50_miptree mip; + union pipe_color_union color; + struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 }; + int ret, i; + int is_h264 = u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC; + int is_mpeg12 = u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_MPEG12; + + if (getenv("XVMC_VL")) + return vl_create_decoder(context, templ); + + if ((is_h264 && templ->entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) || + (is_mpeg12 && templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_IDCT)) { + debug_printf("%x\n", templ->entrypoint); + return NULL; + } + + if (!is_h264 && !is_mpeg12) { + debug_printf("invalid profile: %x\n", templ->profile); + return NULL; + } + + dec = CALLOC_STRUCT(nv84_decoder); + if (!dec) + return NULL; + + dec->base = *templ; + dec->base.context = context; + dec->base.destroy = nv84_decoder_destroy; + dec->base.flush = nv84_decoder_flush; + if (is_h264) { + dec->base.decode_bitstream = nv84_decoder_decode_bitstream_h264; + dec->base.begin_frame = nv84_decoder_begin_frame_h264; + dec->base.end_frame = nv84_decoder_end_frame_h264; + + dec->frame_mbs = mb(dec->base.width) * mb_half(dec->base.height) * 2; + dec->frame_size = dec->frame_mbs << 8; + dec->vpring_deblock = align(0x30 * dec->frame_mbs, 0x100); + dec->vpring_residual = 0x2000 + MAX2(0x32000, 0x600 * dec->frame_mbs); + dec->vpring_ctrl = MAX2(0x10000, align(0x1080 + 0x144 * dec->frame_mbs, 0x100)); + } else if (is_mpeg12) { + dec->base.decode_macroblock = nv84_decoder_decode_macroblock; + dec->base.begin_frame = nv84_decoder_begin_frame_mpeg12; + dec->base.end_frame = nv84_decoder_end_frame_mpeg12; + + if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + dec->mpeg12_bs = CALLOC_STRUCT(vl_mpg12_bs); + if (!dec->mpeg12_bs) + goto fail; + vl_mpg12_bs_init(dec->mpeg12_bs, &dec->base); + dec->base.decode_bitstream = nv84_decoder_decode_bitstream_mpeg12; + } + } else { + goto fail; + } + + ret = nouveau_client_new(screen->device, &dec->client); + if (ret) + goto fail; + + if (is_h264) { + ret = nouveau_object_new(&screen->device->object, 0, + NOUVEAU_FIFO_CHANNEL_CLASS, + &nv04_data, sizeof(nv04_data), &dec->bsp_channel); + if (ret) + goto fail; + + ret = nouveau_pushbuf_new(dec->client, dec->bsp_channel, 4, + 32 * 1024, true, &dec->bsp_pushbuf); + if (ret) + goto fail; + + ret = nouveau_bufctx_new(dec->client, 1, &dec->bsp_bufctx); + if (ret) + goto fail; + } + + ret = nouveau_object_new(&screen->device->object, 0, + NOUVEAU_FIFO_CHANNEL_CLASS, + &nv04_data, sizeof(nv04_data), &dec->vp_channel); + if (ret) + goto fail; + ret = nouveau_pushbuf_new(dec->client, dec->vp_channel, 4, + 32 * 1024, true, &dec->vp_pushbuf); + if (ret) + goto fail; + + ret = nouveau_bufctx_new(dec->client, 1, &dec->vp_bufctx); + if (ret) + goto fail; + + bsp_push = dec->bsp_pushbuf; + vp_push = dec->vp_pushbuf; + + if (is_h264) { + dec->bsp_fw = nv84_load_bsp_firmware(screen->device, dec); + dec->vp_fw = nv84_load_vp_firmware(screen->device, dec); + if (!dec->bsp_fw || !dec->vp_fw) + goto fail; + } + if (is_mpeg12) { + dec->vp_fw = nv84_load_vp_firmware_mpeg(screen->device, dec); + if (!dec->vp_fw) + goto fail; + } + + if (is_h264) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, + 0, 0x40000, NULL, &dec->bsp_data); + if (ret) + goto fail; + } + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, + 0, 0x40000, NULL, &dec->vp_data); + if (ret) + goto fail; + if (is_h264) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, + 0, + 2 * (dec->vpring_deblock + + dec->vpring_residual + + dec->vpring_ctrl + + 0x1000), + NULL, &dec->vpring); + if (ret) + goto fail; + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, + 0, + (templ->max_references + 1) * dec->frame_mbs * 0x40 + + dec->frame_size + 0x2000, + NULL, &dec->mbring); + if (ret) + goto fail; + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART, + 0, 2 * (0x700 + MAX2(0x40000, 0x800 + 0x180 * dec->frame_mbs)), + NULL, &dec->bitstream); + if (ret) + goto fail; + ret = nouveau_bo_map(dec->bitstream, NOUVEAU_BO_WR, dec->client); + if (ret) + goto fail; + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART, + 0, 0x2000, NULL, &dec->vp_params); + if (ret) + goto fail; + ret = nouveau_bo_map(dec->vp_params, NOUVEAU_BO_WR, dec->client); + if (ret) + goto fail; + } + if (is_mpeg12) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART, + 0, + align(0x20 * mb(templ->width) * mb(templ->height), 0x100) + + (6 * 64 * 8) * mb(templ->width) * mb(templ->height) + 0x100, + NULL, &dec->mpeg12_bo); + if (ret) + goto fail; + ret = nouveau_bo_map(dec->mpeg12_bo, NOUVEAU_BO_WR, dec->client); + if (ret) + goto fail; + } + + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0, 0x1000, NULL, &dec->fence); + if (ret) + goto fail; + ret = nouveau_bo_map(dec->fence, NOUVEAU_BO_WR, dec->client); + if (ret) + goto fail; + *(uint32_t *)dec->fence->map = 0; + + if (is_h264) { + nouveau_pushbuf_bufctx(bsp_push, dec->bsp_bufctx); + nouveau_bufctx_refn(dec->bsp_bufctx, 0, + dec->bsp_fw, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + nouveau_bufctx_refn(dec->bsp_bufctx, 0, + dec->bsp_data, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + } + + nouveau_pushbuf_bufctx(vp_push, dec->vp_bufctx); + nouveau_bufctx_refn(dec->vp_bufctx, 0, dec->vp_fw, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + nouveau_bufctx_refn(dec->vp_bufctx, 0, dec->vp_data, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + + if (is_h264 && !ret) + ret = nouveau_object_new(dec->bsp_channel, 0xbeef74b0, 0x74b0, + NULL, 0, &dec->bsp); + + if (!ret) + ret = nouveau_object_new(dec->vp_channel, 0xbeef7476, 0x7476, + NULL, 0, &dec->vp); + + if (ret) + goto fail; + + + if (is_h264) { + /* Zero out some parts of mbring/vpring. there's gotta be some cleaner way + * of doing this... perhaps makes sense to just copy the relevant logic + * here. */ + color.f[0] = color.f[1] = color.f[2] = color.f[3] = 0; + surf.offset = dec->frame_size; + surf.width = 64; + surf.height = (templ->max_references + 1) * dec->frame_mbs / 4; + surf.depth = 1; + surf.base.format = PIPE_FORMAT_B8G8R8A8_UNORM; + surf.base.u.tex.level = 0; + surf.base.texture = &mip.base.base; + mip.level[0].tile_mode = 0; + mip.level[0].pitch = surf.width * 4; + mip.base.domain = NOUVEAU_BO_VRAM; + mip.base.bo = dec->mbring; + context->clear_render_target(context, &surf.base, &color, 0, 0, 64, 4760); + surf.offset = dec->vpring->size / 2 - 0x1000; + surf.width = 1024; + surf.height = 1; + mip.level[0].pitch = surf.width * 4; + mip.base.bo = dec->vpring; + context->clear_render_target(context, &surf.base, &color, 0, 0, 1024, 1); + surf.offset = dec->vpring->size - 0x1000; + context->clear_render_target(context, &surf.base, &color, 0, 0, 1024, 1); + + PUSH_SPACE(screen->pushbuf, 5); + PUSH_REFN(screen->pushbuf, dec->fence, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + /* The clear_render_target is done via 3D engine, so use it to write to a + * sempahore to indicate that it's done. + */ + BEGIN_NV04(screen->pushbuf, NV50_3D(QUERY_ADDRESS_HIGH), 4); + PUSH_DATAh(screen->pushbuf, dec->fence->offset); + PUSH_DATA (screen->pushbuf, dec->fence->offset); + PUSH_DATA (screen->pushbuf, 1); + PUSH_DATA (screen->pushbuf, 0xf010); + PUSH_KICK (screen->pushbuf); + + PUSH_SPACE(bsp_push, 2 + 12 + 2 + 4 + 3); + + BEGIN_NV04(bsp_push, SUBC_BSP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (bsp_push, dec->bsp->handle); + + BEGIN_NV04(bsp_push, SUBC_BSP(0x180), 11); + for (i = 0; i < 11; i++) + PUSH_DATA(bsp_push, nv04_data.vram); + BEGIN_NV04(bsp_push, SUBC_BSP(0x1b8), 1); + PUSH_DATA (bsp_push, nv04_data.vram); + + BEGIN_NV04(bsp_push, SUBC_BSP(0x600), 3); + PUSH_DATAh(bsp_push, dec->bsp_fw->offset); + PUSH_DATA (bsp_push, dec->bsp_fw->offset); + PUSH_DATA (bsp_push, dec->bsp_fw->size); + + BEGIN_NV04(bsp_push, SUBC_BSP(0x628), 2); + PUSH_DATA (bsp_push, dec->bsp_data->offset >> 8); + PUSH_DATA (bsp_push, dec->bsp_data->size); + PUSH_KICK (bsp_push); + } + + PUSH_SPACE(vp_push, 2 + 12 + 2 + 4 + 3); + + BEGIN_NV04(vp_push, SUBC_VP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (vp_push, dec->vp->handle); + + BEGIN_NV04(vp_push, SUBC_VP(0x180), 11); + for (i = 0; i < 11; i++) + PUSH_DATA(vp_push, nv04_data.vram); + + BEGIN_NV04(vp_push, SUBC_VP(0x1b8), 1); + PUSH_DATA (vp_push, nv04_data.vram); + + BEGIN_NV04(vp_push, SUBC_VP(0x600), 3); + PUSH_DATAh(vp_push, dec->vp_fw->offset); + PUSH_DATA (vp_push, dec->vp_fw->offset); + PUSH_DATA (vp_push, dec->vp_fw->size); + + BEGIN_NV04(vp_push, SUBC_VP(0x628), 2); + PUSH_DATA (vp_push, dec->vp_data->offset >> 8); + PUSH_DATA (vp_push, dec->vp_data->size); + PUSH_KICK (vp_push); + + return &dec->base; +fail: + nv84_decoder_destroy(&dec->base); + return NULL; +} + +static struct pipe_sampler_view ** +nv84_video_buffer_sampler_view_planes(struct pipe_video_buffer *buffer) +{ + struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer; + return buf->sampler_view_planes; +} + +static struct pipe_sampler_view ** +nv84_video_buffer_sampler_view_components(struct pipe_video_buffer *buffer) +{ + struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer; + return buf->sampler_view_components; +} + +static struct pipe_surface ** +nv84_video_buffer_surfaces(struct pipe_video_buffer *buffer) +{ + struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer; + return buf->surfaces; +} + +static void +nv84_video_buffer_destroy(struct pipe_video_buffer *buffer) +{ + struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer; + unsigned i; + + assert(buf); + + for (i = 0; i < VL_NUM_COMPONENTS; ++i) { + pipe_resource_reference(&buf->resources[i], NULL); + pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL); + pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL); + pipe_surface_reference(&buf->surfaces[i * 2], NULL); + pipe_surface_reference(&buf->surfaces[i * 2 + 1], NULL); + } + + nouveau_bo_ref(NULL, &buf->interlaced); + nouveau_bo_ref(NULL, &buf->full); + + FREE(buffer); +} + +struct pipe_video_buffer * +nv84_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *template) +{ + struct nv84_video_buffer *buffer; + struct pipe_resource templ; + unsigned i, j, component; + struct pipe_sampler_view sv_templ; + struct pipe_surface surf_templ; + struct nv50_miptree *mt0, *mt1; + struct nouveau_bo *empty = NULL; + struct nouveau_screen *screen = &((struct nv50_context *)pipe)->screen->base; + union nouveau_bo_config cfg; + unsigned bo_size; + + if (getenv("XVMC_VL") || template->buffer_format != PIPE_FORMAT_NV12) + return vl_video_buffer_create(pipe, template); + + if (!template->interlaced) { + debug_printf("Require interlaced video buffers\n"); + return NULL; + } + if (template->chroma_format != PIPE_VIDEO_CHROMA_FORMAT_420) { + debug_printf("Must use 4:2:0 format\n"); + return NULL; + } + + /* + * Note that there are always going to be exactly two planes, one for Y, + * and one for UV. These are also the resources. VP expects these to be + * adjacent, so they need to belong to the same BO. + */ + + buffer = CALLOC_STRUCT(nv84_video_buffer); + if (!buffer) return NULL; + + buffer->mvidx = -1; + + buffer->base.buffer_format = template->buffer_format; + buffer->base.context = pipe; + buffer->base.destroy = nv84_video_buffer_destroy; + buffer->base.chroma_format = template->chroma_format; + buffer->base.width = template->width; + buffer->base.height = template->height; + buffer->base.get_sampler_view_planes = nv84_video_buffer_sampler_view_planes; + buffer->base.get_sampler_view_components = nv84_video_buffer_sampler_view_components; + buffer->base.get_surfaces = nv84_video_buffer_surfaces; + buffer->base.interlaced = true; + + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D_ARRAY; + templ.depth0 = 1; + templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; + templ.format = PIPE_FORMAT_R8_UNORM; + templ.width0 = align(template->width, 2); + templ.height0 = align(template->height, 4) / 2; + templ.flags = NV50_RESOURCE_FLAG_VIDEO | NV50_RESOURCE_FLAG_NOALLOC; + templ.array_size = 2; + + cfg.nv50.tile_mode = 0x20; + cfg.nv50.memtype = 0x70; + + buffer->resources[0] = pipe->screen->resource_create(pipe->screen, &templ); + if (!buffer->resources[0]) + goto error; + + templ.format = PIPE_FORMAT_R8G8_UNORM; + templ.width0 /= 2; + templ.height0 /= 2; + buffer->resources[1] = pipe->screen->resource_create(pipe->screen, &templ); + if (!buffer->resources[1]) + goto error; + + mt0 = nv50_miptree(buffer->resources[0]); + mt1 = nv50_miptree(buffer->resources[1]); + + bo_size = mt0->total_size + mt1->total_size; + if (nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, 0, + bo_size, &cfg, &buffer->interlaced)) + goto error; + /* XXX Change reference frame management so that this is only allocated in + * the decoder when necessary. */ + if (nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, 0, + bo_size, &cfg, &buffer->full)) + goto error; + + mt0->base.bo = buffer->interlaced; + mt0->base.domain = NOUVEAU_BO_VRAM; + mt0->base.offset = 0; + mt0->base.address = buffer->interlaced->offset + mt0->base.offset; + nouveau_bo_ref(buffer->interlaced, &empty); + + mt1->base.bo = buffer->interlaced; + mt1->base.domain = NOUVEAU_BO_VRAM; + mt1->base.offset = mt0->layer_stride * 2; + mt1->base.address = buffer->interlaced->offset + mt1->base.offset; + nouveau_bo_ref(buffer->interlaced, &empty); + + memset(&sv_templ, 0, sizeof(sv_templ)); + for (component = 0, i = 0; i < 2; ++i ) { + struct pipe_resource *res = buffer->resources[i]; + unsigned nr_components = util_format_get_nr_components(res->format); + + u_sampler_view_default_template(&sv_templ, res, res->format); + buffer->sampler_view_planes[i] = + pipe->create_sampler_view(pipe, res, &sv_templ); + if (!buffer->sampler_view_planes[i]) + goto error; + + for (j = 0; j < nr_components; ++j, ++component) { + sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = + PIPE_SWIZZLE_RED + j; + sv_templ.swizzle_a = PIPE_SWIZZLE_ONE; + + buffer->sampler_view_components[component] = + pipe->create_sampler_view(pipe, res, &sv_templ); + if (!buffer->sampler_view_components[component]) + goto error; + } + } + + memset(&surf_templ, 0, sizeof(surf_templ)); + for (j = 0; j < 2; ++j) { + surf_templ.format = buffer->resources[j]->format; + surf_templ.u.tex.first_layer = surf_templ.u.tex.last_layer = 0; + buffer->surfaces[j * 2] = + pipe->create_surface(pipe, buffer->resources[j], &surf_templ); + if (!buffer->surfaces[j * 2]) + goto error; + + surf_templ.u.tex.first_layer = surf_templ.u.tex.last_layer = 1; + buffer->surfaces[j * 2 + 1] = + pipe->create_surface(pipe, buffer->resources[j], &surf_templ); + if (!buffer->surfaces[j * 2 + 1]) + goto error; + } + + return &buffer->base; + +error: + nv84_video_buffer_destroy(&buffer->base); + return NULL; +} + +int +nv84_screen_get_video_param(struct pipe_screen *pscreen, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint, + enum pipe_video_cap param) +{ + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + return u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC || + u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG12; + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 1; + case PIPE_VIDEO_CAP_MAX_WIDTH: + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return 2048; + case PIPE_VIDEO_CAP_PREFERED_FORMAT: + return PIPE_FORMAT_NV12; + case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: + case PIPE_VIDEO_CAP_PREFERS_INTERLACED: + return true; + case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: + return false; + case PIPE_VIDEO_CAP_MAX_LEVEL: + switch (profile) { + case PIPE_VIDEO_PROFILE_MPEG1: + return 0; + case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE: + case PIPE_VIDEO_PROFILE_MPEG2_MAIN: + return 3; + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + return 41; + default: + debug_printf("unknown video profile: %d\n", profile); + return 0; + } + default: + debug_printf("unknown video param: %d\n", param); + return 0; + } +} + +boolean +nv84_screen_video_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint) +{ + if (profile != PIPE_VIDEO_PROFILE_UNKNOWN) + return format == PIPE_FORMAT_NV12; + + return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video.h b/src/gallium/drivers/nouveau/nv50/nv84_video.h new file mode 100644 index 00000000000..2edba389dbf --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv84_video.h @@ -0,0 +1,138 @@ +/* + * Copyright 2013 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef NV84_VIDEO_H_ +#define NV84_VIDEO_H_ + +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" +#include "vl/vl_types.h" + +#include "vl/vl_mpeg12_bitstream.h" + +#include "util/u_video.h" + +#include "nv50/nv50_context.h" + +/* These are expected to be on their own pushbufs */ +#define SUBC_BSP(m) 2, (m) +#define SUBC_VP(m) 2, (m) + +union pipe_desc { + struct pipe_picture_desc *base; + struct pipe_mpeg12_picture_desc *mpeg12; + struct pipe_mpeg4_picture_desc *mpeg4; + struct pipe_vc1_picture_desc *vc1; + struct pipe_h264_picture_desc *h264; +}; + +struct nv84_video_buffer { + struct pipe_video_buffer base; + struct pipe_resource *resources[VL_NUM_COMPONENTS]; + struct pipe_sampler_view *sampler_view_planes[VL_NUM_COMPONENTS]; + struct pipe_sampler_view *sampler_view_components[VL_NUM_COMPONENTS]; + struct pipe_surface *surfaces[VL_NUM_COMPONENTS * 2]; + + struct nouveau_bo *interlaced, *full; + int mvidx; + unsigned frame_num, frame_num_max; +}; + +struct nv84_decoder { + struct pipe_video_codec base; + struct nouveau_client *client; + struct nouveau_object *bsp_channel, *vp_channel, *bsp, *vp; + struct nouveau_pushbuf *bsp_pushbuf, *vp_pushbuf; + struct nouveau_bufctx *bsp_bufctx, *vp_bufctx; + + struct nouveau_bo *bsp_fw, *bsp_data; + struct nouveau_bo *vp_fw, *vp_data; + struct nouveau_bo *mbring, *vpring; + + /* + * states: + * 0: init + * 1: vpring/mbring cleared, bsp is ready + * 2: bsp is done, vp is ready + * and then vp it back to 1 + */ + struct nouveau_bo *fence; + + struct nouveau_bo *bitstream; + struct nouveau_bo *vp_params; + + size_t vp_fw2_offset; + + unsigned frame_mbs, frame_size; + /* VPRING layout: + RESIDUAL + CTRL + DEBLOCK + 0x1000 + */ + unsigned vpring_deblock, vpring_residual, vpring_ctrl; + + + struct vl_mpg12_bs *mpeg12_bs; + + struct nouveau_bo *mpeg12_bo; + void *mpeg12_mb_info; + uint16_t *mpeg12_data; + const int *zscan; + uint8_t mpeg12_intra_matrix[64]; + uint8_t mpeg12_non_intra_matrix[64]; +}; + +static INLINE uint32_t mb(uint32_t coord) +{ + return (coord + 0xf)>>4; +} + +static INLINE uint32_t mb_half(uint32_t coord) +{ + return (coord + 0x1f)>>5; +} + +int +nv84_decoder_bsp(struct nv84_decoder *dec, + struct pipe_h264_picture_desc *desc, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes, + struct nv84_video_buffer *dest); + +void +nv84_decoder_vp_h264(struct nv84_decoder *dec, + struct pipe_h264_picture_desc *desc, + struct nv84_video_buffer *dest); + +void +nv84_decoder_vp_mpeg12_mb(struct nv84_decoder *dec, + struct pipe_mpeg12_picture_desc *desc, + const struct pipe_mpeg12_macroblock *mb); + +void +nv84_decoder_vp_mpeg12(struct nv84_decoder *dec, + struct pipe_mpeg12_picture_desc *desc, + struct nv84_video_buffer *dest); + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c b/src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c new file mode 100644 index 00000000000..86047b5f463 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c @@ -0,0 +1,250 @@ +/* + * Copyright 2013 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv84_video.h" + +struct iparm { + struct iseqparm { + uint32_t chroma_format_idc; // 00 + uint32_t pad[(0x128 - 0x4) / 4]; + uint32_t log2_max_frame_num_minus4; // 128 + uint32_t pic_order_cnt_type; // 12c + uint32_t log2_max_pic_order_cnt_lsb_minus4; // 130 + uint32_t delta_pic_order_always_zero_flag; // 134 + uint32_t num_ref_frames; // 138 + uint32_t pic_width_in_mbs_minus1; // 13c + uint32_t pic_height_in_map_units_minus1; // 140 + uint32_t frame_mbs_only_flag; // 144 + uint32_t mb_adaptive_frame_field_flag; // 148 + uint32_t direct_8x8_inference_flag; // 14c + } iseqparm; // 000 + struct ipicparm { + uint32_t entropy_coding_mode_flag; // 00 + uint32_t pic_order_present_flag; // 04 + uint32_t num_slice_groups_minus1; // 08 + uint32_t slice_group_map_type; // 0c + uint32_t pad1[0x60 / 4]; + uint32_t u70; // 70 + uint32_t u74; // 74 + uint32_t u78; // 78 + uint32_t num_ref_idx_l0_active_minus1; // 7c + uint32_t num_ref_idx_l1_active_minus1; // 80 + uint32_t weighted_pred_flag; // 84 + uint32_t weighted_bipred_idc; // 88 + uint32_t pic_init_qp_minus26; // 8c + uint32_t chroma_qp_index_offset; // 90 + uint32_t deblocking_filter_control_present_flag; // 94 + uint32_t constrained_intra_pred_flag; // 98 + uint32_t redundant_pic_cnt_present_flag; // 9c + uint32_t transform_8x8_mode_flag; // a0 + uint32_t pad2[(0x1c8 - 0xa0 - 4) / 4]; + uint32_t second_chroma_qp_index_offset; // 1c8 + uint32_t u1cc; // 1cc + uint32_t curr_pic_order_cnt; // 1d0 + uint32_t field_order_cnt[2]; // 1d4 + uint32_t curr_mvidx; // 1dc + struct iref { + uint32_t u00; // 00 + uint32_t field_is_ref; // 04 // bit0: top, bit1: bottom + uint8_t is_long_term; // 08 + uint8_t non_existing; // 09 + uint32_t frame_idx; // 0c + uint32_t field_order_cnt[2]; // 10 + uint32_t mvidx; // 18 + uint8_t field_pic_flag; // 1c + // 20 + } refs[0x10]; // 1e0 + } ipicparm; // 150 +}; + +int +nv84_decoder_bsp(struct nv84_decoder *dec, + struct pipe_h264_picture_desc *desc, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes, + struct nv84_video_buffer *dest) +{ + struct iparm params; + uint32_t more_params[0x44 / 4] = {0}; + unsigned total_bytes = 0; + int i; + static const uint32_t end[] = {0x0b010000, 0, 0x0b010000, 0}; + char indexes[17] = {0}; + struct nouveau_pushbuf *push = dec->bsp_pushbuf; + struct nouveau_pushbuf_refn bo_refs[] = { + { dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dec->bitstream, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART }, + { dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + }; + + nouveau_bo_wait(dec->fence, NOUVEAU_BO_RDWR, dec->client); + + STATIC_ASSERT(sizeof(struct iparm) == 0x530); + + memset(¶ms, 0, sizeof(params)); + + dest->frame_num = dest->frame_num_max = desc->frame_num; + + for (i = 0; i < 16; i++) { + struct iref *ref = ¶ms.ipicparm.refs[i]; + struct nv84_video_buffer *frame = (struct nv84_video_buffer *)desc->ref[i]; + if (!frame) break; + /* The frame index is relative to the last IDR frame. So once the frame + * num goes back to 0, previous reference frames need to have a negative + * index. + */ + if (desc->frame_num >= frame->frame_num_max) { + frame->frame_num_max = desc->frame_num; + } else { + frame->frame_num -= frame->frame_num_max + 1; + frame->frame_num_max = desc->frame_num; + } + ref->non_existing = 0; + ref->field_is_ref = (desc->top_is_reference[i] ? 1 : 0) | + (desc->bottom_is_reference[i] ? 2 : 0); + ref->is_long_term = desc->is_long_term[i]; + ref->field_order_cnt[0] = desc->field_order_cnt_list[i][0]; + ref->field_order_cnt[1] = desc->field_order_cnt_list[i][1]; + ref->frame_idx = frame->frame_num; + ref->u00 = ref->mvidx = frame->mvidx; + ref->field_pic_flag = desc->field_pic_flag; + indexes[frame->mvidx] = 1; + } + + /* Needs to be adjusted if we ever support non-4:2:0 videos */ + params.iseqparm.chroma_format_idc = 1; + + params.iseqparm.pic_width_in_mbs_minus1 = mb(dec->base.width) - 1; + if (desc->field_pic_flag || desc->mb_adaptive_frame_field_flag) + params.iseqparm.pic_height_in_map_units_minus1 = mb_half(dec->base.height) - 1; + else + params.iseqparm.pic_height_in_map_units_minus1 = mb(dec->base.height) - 1; + + if (desc->bottom_field_flag) + params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[1]; + else + params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[0]; + params.ipicparm.field_order_cnt[0] = desc->field_order_cnt[0]; + params.ipicparm.field_order_cnt[1] = desc->field_order_cnt[1]; + if (desc->is_reference) { + if (dest->mvidx < 0) { + for (i = 0; i < desc->num_ref_frames + 1; i++) { + if (!indexes[i]) { + dest->mvidx = i; + break; + } + } + assert(i != desc->num_ref_frames + 1); + } + + params.ipicparm.u1cc = params.ipicparm.curr_mvidx = dest->mvidx; + } + + params.iseqparm.num_ref_frames = desc->num_ref_frames; + params.iseqparm.mb_adaptive_frame_field_flag = desc->mb_adaptive_frame_field_flag; + params.ipicparm.constrained_intra_pred_flag = desc->constrained_intra_pred_flag; + params.ipicparm.weighted_pred_flag = desc->weighted_pred_flag; + params.ipicparm.weighted_bipred_idc = desc->weighted_bipred_idc; + params.iseqparm.frame_mbs_only_flag = desc->frame_mbs_only_flag; + params.ipicparm.transform_8x8_mode_flag = desc->transform_8x8_mode_flag; + params.ipicparm.chroma_qp_index_offset = desc->chroma_qp_index_offset; + params.ipicparm.second_chroma_qp_index_offset = desc->second_chroma_qp_index_offset; + params.ipicparm.pic_init_qp_minus26 = desc->pic_init_qp_minus26; + params.ipicparm.num_ref_idx_l0_active_minus1 = desc->num_ref_idx_l0_active_minus1; + params.ipicparm.num_ref_idx_l1_active_minus1 = desc->num_ref_idx_l1_active_minus1; + params.iseqparm.log2_max_frame_num_minus4 = desc->log2_max_frame_num_minus4; + params.iseqparm.pic_order_cnt_type = desc->pic_order_cnt_type; + params.iseqparm.log2_max_pic_order_cnt_lsb_minus4 = desc->log2_max_pic_order_cnt_lsb_minus4; + params.iseqparm.delta_pic_order_always_zero_flag = desc->delta_pic_order_always_zero_flag; + params.iseqparm.direct_8x8_inference_flag = desc->direct_8x8_inference_flag; + params.ipicparm.entropy_coding_mode_flag = desc->entropy_coding_mode_flag; + params.ipicparm.pic_order_present_flag = desc->pic_order_present_flag; + params.ipicparm.deblocking_filter_control_present_flag = desc->deblocking_filter_control_present_flag; + params.ipicparm.redundant_pic_cnt_present_flag = desc->redundant_pic_cnt_present_flag; + + memcpy(dec->bitstream->map, ¶ms, sizeof(params)); + for (i = 0; i < num_buffers; i++) { + assert(total_bytes + num_bytes[i] < dec->bitstream->size / 2 - 0x700); + memcpy(dec->bitstream->map + 0x700 + total_bytes, data[i], num_bytes[i]); + total_bytes += num_bytes[i]; + } + memcpy(dec->bitstream->map + 0x700 + total_bytes, end, sizeof(end)); + total_bytes += sizeof(end); + more_params[1] = total_bytes; + memcpy(dec->bitstream->map + 0x600, more_params, sizeof(more_params)); + + PUSH_SPACE(push, 5 + 21 + 3 + 2 + 4 + 2); + nouveau_pushbuf_refn(push, bo_refs, sizeof(bo_refs)/sizeof(bo_refs[0])); + + /* Wait for the fence = 1 */ + BEGIN_NV04(push, SUBC_BSP(0x10), 4); + PUSH_DATAh(push, dec->fence->offset); + PUSH_DATA (push, dec->fence->offset); + PUSH_DATA (push, 1); + PUSH_DATA (push, 1); + + /* TODO: Use both halves of bitstream/vpring for alternating frames */ + + /* Kick off the BSP */ + BEGIN_NV04(push, SUBC_BSP(0x400), 20); + PUSH_DATA (push, dec->bitstream->offset >> 8); + PUSH_DATA (push, (dec->bitstream->offset >> 8) + 7); + PUSH_DATA (push, dec->bitstream->size / 2 - 0x700); + PUSH_DATA (push, (dec->bitstream->offset >> 8) + 6); + PUSH_DATA (push, 1); + PUSH_DATA (push, dec->mbring->offset >> 8); + PUSH_DATA (push, dec->frame_size); + PUSH_DATA (push, (dec->mbring->offset + dec->frame_size) >> 8); + PUSH_DATA (push, dec->vpring->offset >> 8); + PUSH_DATA (push, dec->vpring->size / 2); + PUSH_DATA (push, dec->vpring_residual); + PUSH_DATA (push, dec->vpring_ctrl); + PUSH_DATA (push, 0); + PUSH_DATA (push, dec->vpring_residual); + PUSH_DATA (push, dec->vpring_residual + dec->vpring_ctrl); + PUSH_DATA (push, dec->vpring_deblock); + PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl + + dec->vpring_residual + dec->vpring_deblock) >> 8); + PUSH_DATA (push, 0x654321); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0x100008); + + BEGIN_NV04(push, SUBC_BSP(0x620), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 0); + + /* Write fence = 2, intr */ + BEGIN_NV04(push, SUBC_BSP(0x610), 3); + PUSH_DATAh(push, dec->fence->offset); + PUSH_DATA (push, dec->fence->offset); + PUSH_DATA (push, 2); + + BEGIN_NV04(push, SUBC_BSP(0x304), 1); + PUSH_DATA (push, 0x101); + PUSH_KICK (push); + return 0; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c b/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c new file mode 100644 index 00000000000..619aa4e7a40 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c @@ -0,0 +1,552 @@ +/* + * Copyright 2013 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv84_video.h" + +#include "util/u_sse.h" + +struct h264_iparm1 { + uint8_t scaling_lists_4x4[6][16]; // 00 + uint8_t scaling_lists_8x8[2][64]; // 60 + uint32_t width; // e0 + uint32_t height; // e4 + uint64_t ref1_addrs[16]; // e8 + uint64_t ref2_addrs[16]; // 168 + uint32_t unk1e8; + uint32_t unk1ec; + uint32_t w1; // 1f0 + uint32_t w2; // 1f4 + uint32_t w3; // 1f8 + uint32_t h1; // 1fc + uint32_t h2; // 200 + uint32_t h3; // 204 + uint32_t mb_adaptive_frame_field_flag; // 208 + uint32_t field_pic_flag; // 20c + uint32_t format; // 210 + uint32_t unk214; // 214 +}; + +struct h264_iparm2 { + uint32_t width; // 00 + uint32_t height; // 04 + uint32_t mbs; // 08 + uint32_t w1; // 0c + uint32_t w2; // 10 + uint32_t w3; // 14 + uint32_t h1; // 18 + uint32_t h2; // 1c + uint32_t h3; // 20 + uint32_t unk24; + uint32_t mb_adaptive_frame_field_flag; // 28 + uint32_t top; // 2c + uint32_t bottom; // 30 + uint32_t is_reference; // 34 +}; + +void +nv84_decoder_vp_h264(struct nv84_decoder *dec, + struct pipe_h264_picture_desc *desc, + struct nv84_video_buffer *dest) +{ + struct h264_iparm1 param1; + struct h264_iparm2 param2; + int i, width = align(dest->base.width, 16), + height = align(dest->base.height, 16); + + struct nouveau_pushbuf *push = dec->vp_pushbuf; + struct nouveau_pushbuf_refn bo_refs[] = { + { dest->interlaced, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dest->full, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dec->vp_params, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART }, + { dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + bool is_ref = desc->is_reference; + + STATIC_ASSERT(sizeof(struct h264_iparm1) == 0x218); + STATIC_ASSERT(sizeof(struct h264_iparm2) == 0x38); + + memset(¶m1, 0, sizeof(param1)); + memset(¶m2, 0, sizeof(param2)); + + memcpy(¶m1.scaling_lists_4x4, desc->scaling_lists_4x4, + sizeof(param1.scaling_lists_4x4)); + memcpy(¶m1.scaling_lists_8x8, desc->scaling_lists_8x8, + sizeof(param1.scaling_lists_8x8)); + + param1.width = width; + param1.w1 = param1.w2 = param1.w3 = align(width, 64); + param1.height = param1.h2 = height; + param1.h1 = param1.h3 = align(height, 32); + param1.format = 0x3231564e; /* 'NV12' */ + param1.mb_adaptive_frame_field_flag = desc->mb_adaptive_frame_field_flag; + param1.field_pic_flag = desc->field_pic_flag; + + param2.width = width; + param2.w1 = param2.w2 = param2.w3 = param1.w1; + if (desc->field_pic_flag) + param2.height = align(height, 32) / 2; + else + param2.height = height; + param2.h1 = param2.h2 = align(height, 32); + param2.h3 = height; + param2.mbs = width * height >> 8; + if (desc->field_pic_flag) { + param2.top = desc->bottom_field_flag ? 2 : 1; + param2.bottom = desc->bottom_field_flag; + } + param2.mb_adaptive_frame_field_flag = desc->mb_adaptive_frame_field_flag; + param2.is_reference = desc->is_reference; + + PUSH_SPACE(push, 5 + 16 + 3 + 2 + 6 + (is_ref ? 2 : 0) + 3 + 2 + 4 + 2); + + struct nouveau_bo *ref2_default = dest->full; + + for (i = 0; i < 16; i++) { + struct nv84_video_buffer *buf = (struct nv84_video_buffer *)desc->ref[i]; + struct nouveau_bo *bo1, *bo2; + if (buf) { + bo1 = buf->interlaced; + bo2 = buf->full; + if (i == 0) + ref2_default = buf->full; + } else { + bo1 = dest->interlaced; + bo2 = ref2_default; + } + param1.ref1_addrs[i] = bo1->offset; + param1.ref2_addrs[i] = bo2->offset; + struct nouveau_pushbuf_refn bo_refs[] = { + { bo1, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { bo2, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + }; + nouveau_pushbuf_refn(push, bo_refs, sizeof(bo_refs)/sizeof(bo_refs[0])); + } + + memcpy(dec->vp_params->map, ¶m1, sizeof(param1)); + memcpy(dec->vp_params->map + 0x400, ¶m2, sizeof(param2)); + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + /* Wait for BSP to have completed */ + BEGIN_NV04(push, SUBC_VP(0x10), 4); + PUSH_DATAh(push, dec->fence->offset); + PUSH_DATA (push, dec->fence->offset); + PUSH_DATA (push, 2); + PUSH_DATA (push, 1); /* wait for sem == 2 */ + + /* VP step 1 */ + BEGIN_NV04(push, SUBC_VP(0x400), 15); + PUSH_DATA (push, 1); + PUSH_DATA (push, param2.mbs); + PUSH_DATA (push, 0x3987654); /* each nibble probably a dma index */ + PUSH_DATA (push, 0x55001); /* constant */ + PUSH_DATA (push, dec->vp_params->offset >> 8); + PUSH_DATA (push, (dec->vpring->offset + dec->vpring_residual) >> 8); + PUSH_DATA (push, dec->vpring_ctrl); + PUSH_DATA (push, dec->vpring->offset >> 8); + PUSH_DATA (push, dec->bitstream->size / 2 - 0x700); + PUSH_DATA (push, (dec->mbring->offset + dec->mbring->size - 0x2000) >> 8); + PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl + + dec->vpring_residual + dec->vpring_deblock) >> 8); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0x100008); + PUSH_DATA (push, dest->interlaced->offset >> 8); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, SUBC_VP(0x620), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 0); + + /* VP step 2 */ + BEGIN_NV04(push, SUBC_VP(0x400), 5); + PUSH_DATA (push, 0x54530201); + PUSH_DATA (push, (dec->vp_params->offset >> 8) + 0x4); + PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl + + dec->vpring_residual) >> 8); + PUSH_DATA (push, dest->interlaced->offset >> 8); + PUSH_DATA (push, dest->interlaced->offset >> 8); + + if (is_ref) { + BEGIN_NV04(push, SUBC_VP(0x414), 1); + PUSH_DATA (push, dest->full->offset >> 8); + } + + BEGIN_NV04(push, SUBC_VP(0x620), 2); + PUSH_DATAh(push, dec->vp_fw2_offset); + PUSH_DATA (push, dec->vp_fw2_offset); + + BEGIN_NV04(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 0); + + /* Set the semaphore back to 1 */ + BEGIN_NV04(push, SUBC_VP(0x610), 3); + PUSH_DATAh(push, dec->fence->offset); + PUSH_DATA (push, dec->fence->offset); + PUSH_DATA (push, 1); + + /* Write to the semaphore location, intr */ + BEGIN_NV04(push, SUBC_VP(0x304), 1); + PUSH_DATA (push, 0x101); + + for (i = 0; i < 2; i++) { + struct nv50_miptree *mt = nv50_miptree(dest->resources[i]); + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + } + + PUSH_KICK (push); +} + +static INLINE int16_t inverse_quantize(int16_t val, uint8_t quant, int mpeg1) { + int16_t ret = val * quant / 16; + if (mpeg1 && ret) { + if (ret > 0) + ret = (ret - 1) | 1; + else + ret = (ret + 1) | 1; + } + if (ret < -2048) + ret = -2048; + else if (ret > 2047) + ret = 2047; + return ret; +} + +struct mpeg12_mb_info { + uint32_t index; + uint8_t unk4; + uint8_t unk5; + uint16_t coded_block_pattern; + uint8_t block_counts[6]; + uint16_t PMV[8]; + uint16_t skipped; +}; + +void +nv84_decoder_vp_mpeg12_mb(struct nv84_decoder *dec, + struct pipe_mpeg12_picture_desc *desc, + const struct pipe_mpeg12_macroblock *macrob) +{ + STATIC_ASSERT(sizeof(struct mpeg12_mb_info) == 32); + + struct mpeg12_mb_info info = {0}; + int i, sum = 0, mask, block_index, count; + const int16_t *blocks; + int intra = macrob->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA; + int motion = macrob->macroblock_type & + (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD); + const uint8_t *quant_matrix = intra ? dec->mpeg12_intra_matrix : + dec->mpeg12_non_intra_matrix; + int mpeg1 = dec->base.profile == PIPE_VIDEO_PROFILE_MPEG1; + + info.index = macrob->y * mb(dec->base.width) + macrob->x; + info.unk4 = motion; + if (intra) + info.unk4 |= 1; + if (macrob->macroblock_modes.bits.dct_type) + info.unk4 |= 0x20; + info.unk5 = (macrob->motion_vertical_field_select << 4) | + (macrob->macroblock_modes.value & 0xf); + info.coded_block_pattern = macrob->coded_block_pattern; + if (motion) { + memcpy(info.PMV, macrob->PMV, sizeof(info.PMV)); + } + blocks = macrob->blocks; + for (mask = 0x20, block_index = 0; mask > 0; mask >>= 1, block_index++) { + if ((macrob->coded_block_pattern & mask) == 0) + continue; + + count = 0; + + /* + * The observation here is that there are a lot of 0's, and things go + * a lot faster if one skips over them. + */ + +#if defined(PIPE_ARCH_SSE) && defined(PIPE_ARCH_X86_64) +/* Note that the SSE implementation is much more tuned to X86_64. As it's not + * benchmarked on X86_32, disable it there. I suspect that the code needs to + * be reorganized in terms of 32-bit wide data in order to be more + * efficient. NV84+ were released well into the 64-bit CPU era, so it should + * be a minority case. + */ + +/* This returns a 16-bit bit-mask, each 2 bits are both 1 or both 0, depending + * on whether the corresponding (16-bit) word in blocks is zero or non-zero. */ +#define wordmask(blocks, zero) \ + (uint64_t)(_mm_movemask_epi8( \ + _mm_cmpeq_epi16( \ + zero, _mm_load_si128((__m128i *)(blocks))))) + + __m128i zero = _mm_setzero_si128(); + + /* TODO: Look into doing the inverse quantization in terms of SSE + * operations unconditionally, when necessary. */ + uint64_t bmask0 = wordmask(blocks, zero); + bmask0 |= wordmask(blocks + 8, zero) << 16; + bmask0 |= wordmask(blocks + 16, zero) << 32; + bmask0 |= wordmask(blocks + 24, zero) << 48; + uint64_t bmask1 = wordmask(blocks + 32, zero); + bmask1 |= wordmask(blocks + 40, zero) << 16; + bmask1 |= wordmask(blocks + 48, zero) << 32; + bmask1 |= wordmask(blocks + 56, zero) << 48; + + /* The wordmask macro returns the inverse of what we want, since it + * returns a 1 for equal-to-zero. Invert. */ + bmask0 = ~bmask0; + bmask1 = ~bmask1; + + /* Note that the bitmask is actually sequences of 2 bits for each block + * index. This is because there is no movemask_epi16. That means that + * (a) ffs will never return 64, since the prev bit will always be set + * in that case, and (b) we need to do an extra bit shift. Or'ing the + * bitmasks together is faster than having a loop that computes them one + * at a time and processes them, on a Core i7-920. Trying to put bmask + * into an array and then looping also slows things down. + */ + + /* shift needs to be the same width as i, and unsigned so that / 2 + * becomes a rshift operation */ + uint32_t shift; + i = 0; + + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + int16_t tmp; + while ((shift = __builtin_ffsll(bmask0))) { + i += (shift - 1) / 2; + bmask0 >>= shift - 1; + *dec->mpeg12_data++ = dec->zscan[i] * 2; + tmp = inverse_quantize(blocks[i], quant_matrix[i], mpeg1); + *dec->mpeg12_data++ = tmp; + sum += tmp; + count++; + i++; + bmask0 >>= 2; + } + i = 32; + while ((shift = __builtin_ffsll(bmask1))) { + i += (shift - 1) / 2; + bmask1 >>= shift - 1; + *dec->mpeg12_data++ = dec->zscan[i] * 2; + tmp = inverse_quantize(blocks[i], quant_matrix[i], mpeg1); + *dec->mpeg12_data++ = tmp; + sum += tmp; + count++; + i++; + bmask1 >>= 2; + } + } else { + while ((shift = __builtin_ffsll(bmask0))) { + i += (shift - 1) / 2; + bmask0 >>= shift - 1; + *dec->mpeg12_data++ = i * 2; + *dec->mpeg12_data++ = blocks[i]; + count++; + i++; + bmask0 >>= 2; + } + i = 32; + while ((shift = __builtin_ffsll(bmask1))) { + i += (shift - 1) / 2; + bmask1 >>= shift - 1; + *dec->mpeg12_data++ = i * 2; + *dec->mpeg12_data++ = blocks[i]; + count++; + i++; + bmask1 >>= 2; + } + } +#undef wordmask +#else + + /* + * This loop looks ridiculously written... and it is. I tried a lot of + * different ways of achieving this scan, and this was the fastest, at + * least on a Core i7-920. Note that it's not necessary to skip the 0's, + * the firmware will deal with those just fine. But it's faster to skip + * them. Note to people trying benchmarks: make sure to use realistic + * mpeg data, which can often be a single data point first followed by + * 63 0's, or <data> 7x <0> <data> 7x <0> etc. + */ + i = 0; + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + while (true) { + int16_t tmp; + while (likely(i < 64 && !(tmp = blocks[i]))) i++; + if (i >= 64) break; + *dec->mpeg12_data++ = dec->zscan[i] * 2; + tmp = inverse_quantize(tmp, quant_matrix[i], mpeg1); + *dec->mpeg12_data++ = tmp; + sum += tmp; + count++; + i++; + } + } else { + while (true) { + int16_t tmp; + while (likely(i < 64 && !(tmp = blocks[i]))) i++; + if (i >= 64) break; + *dec->mpeg12_data++ = i * 2; + *dec->mpeg12_data++ = tmp; + count++; + i++; + } + } + +#endif + + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + if (!mpeg1 && (sum & 1) == 0) { + if (count && *(dec->mpeg12_data - 2) == 63 * 2) { + uint16_t *val = dec->mpeg12_data - 1; + if (*val & 1) *val -= 1; + else *val += 1; + } else { + *dec->mpeg12_data++ = 63 * 2; + *dec->mpeg12_data++ = 1; + count++; + } + } + } + + if (count) { + *(dec->mpeg12_data - 2) |= 1; + } else { + *dec->mpeg12_data++ = 1; + *dec->mpeg12_data++ = 0; + count = 1; + } + info.block_counts[block_index] = count; + blocks += 64; + } + + memcpy(dec->mpeg12_mb_info, &info, sizeof(info)); + dec->mpeg12_mb_info += sizeof(info); + + if (macrob->num_skipped_macroblocks) { + info.index++; + info.coded_block_pattern = 0; + info.skipped = macrob->num_skipped_macroblocks - 1; + memset(info.block_counts, 0, sizeof(info.block_counts)); + memcpy(dec->mpeg12_mb_info, &info, sizeof(info)); + dec->mpeg12_mb_info += sizeof(info); + } +} + +struct mpeg12_header { + uint32_t luma_top_size; // 00 + uint32_t luma_bottom_size; // 04 + uint32_t chroma_top_size; // 08 + uint32_t mbs; // 0c + uint32_t mb_info_size; // 10 + uint32_t mb_width_minus1; // 14 + uint32_t mb_height_minus1; // 18 + uint32_t width; // 1c + uint32_t height; // 20 + uint8_t progressive; // 24 + uint8_t mocomp_only; // 25 + uint8_t frames; // 26 + uint8_t picture_structure; // 27 + uint32_t unk28; // 28 -- 0x50100 + uint32_t unk2c; // 2c + uint32_t pad[4 * 13]; +}; + +void +nv84_decoder_vp_mpeg12(struct nv84_decoder *dec, + struct pipe_mpeg12_picture_desc *desc, + struct nv84_video_buffer *dest) +{ + struct nouveau_pushbuf *push = dec->vp_pushbuf; + struct nv84_video_buffer *ref1 = (struct nv84_video_buffer *)desc->ref[0]; + struct nv84_video_buffer *ref2 = (struct nv84_video_buffer *)desc->ref[1]; + struct nouveau_pushbuf_refn bo_refs[] = { + { dest->interlaced, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { NULL, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { NULL, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dec->mpeg12_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART }, + }; + int i, num_refs = sizeof(bo_refs) / sizeof(*bo_refs); + struct mpeg12_header header = {0}; + struct nv50_miptree *y = nv50_miptree(dest->resources[0]); + struct nv50_miptree *uv = nv50_miptree(dest->resources[1]); + + STATIC_ASSERT(sizeof(struct mpeg12_header) == 0x100); + + if (ref1 == NULL) + ref1 = dest; + if (ref2 == NULL) + ref2 = dest; + bo_refs[1].bo = ref1->interlaced; + bo_refs[2].bo = ref2->interlaced; + + header.luma_top_size = y->layer_stride; + header.luma_bottom_size = y->layer_stride; + header.chroma_top_size = uv->layer_stride; + header.mbs = mb(dec->base.width) * mb(dec->base.height); + header.mb_info_size = dec->mpeg12_mb_info - dec->mpeg12_bo->map - 0x100; + header.mb_width_minus1 = mb(dec->base.width) - 1; + header.mb_height_minus1 = mb(dec->base.height) - 1; + header.width = align(dec->base.width, 16); + header.height = align(dec->base.height, 16); + header.progressive = desc->frame_pred_frame_dct; + header.frames = 1 + (desc->ref[0] != NULL) + (desc->ref[1] != NULL); + header.picture_structure = desc->picture_structure; + header.unk28 = 0x50100; + + memcpy(dec->mpeg12_bo->map, &header, sizeof(header)); + + PUSH_SPACE(push, 10 + 3 + 2); + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + BEGIN_NV04(push, SUBC_VP(0x400), 9); + PUSH_DATA (push, 0x543210); /* each nibble possibly a dma index */ + PUSH_DATA (push, 0x555001); /* constant */ + PUSH_DATA (push, dec->mpeg12_bo->offset >> 8); + PUSH_DATA (push, (dec->mpeg12_bo->offset + 0x100) >> 8); + PUSH_DATA (push, (dec->mpeg12_bo->offset + 0x100 + + align(0x20 * mb(dec->base.width) * + mb(dec->base.height), 0x100)) >> 8); + PUSH_DATA (push, dest->interlaced->offset >> 8); + PUSH_DATA (push, ref1->interlaced->offset >> 8); + PUSH_DATA (push, ref2->interlaced->offset >> 8); + PUSH_DATA (push, 6 * 64 * 8 * header.mbs); + + BEGIN_NV04(push, SUBC_VP(0x620), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 0); + + for (i = 0; i < 2; i++) { + struct nv50_miptree *mt = nv50_miptree(dest->resources[i]); + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + } + PUSH_KICK (push); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video.c b/src/gallium/drivers/nouveau/nv50/nv98_video.c new file mode 100644 index 00000000000..069481de207 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv98_video.c @@ -0,0 +1,297 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv98_video.h" + +#include "util/u_sampler.h" +#include "util/u_format.h" + +static void +nv98_decoder_decode_bitstream(struct pipe_video_codec *decoder, + struct pipe_video_buffer *video_target, + struct pipe_picture_desc *picture, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes) +{ + struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder; + struct nouveau_vp3_video_buffer *target = (struct nouveau_vp3_video_buffer *)video_target; + uint32_t comm_seq = ++dec->fence_seq; + union pipe_desc desc; + + unsigned vp_caps, is_ref, ret; + struct nouveau_vp3_video_buffer *refs[16] = {}; + + desc.base = picture; + + assert(target->base.buffer_format == PIPE_FORMAT_NV12); + + ret = nv98_decoder_bsp(dec, desc, target, comm_seq, + num_buffers, data, num_bytes, + &vp_caps, &is_ref, refs); + + /* did we decode bitstream correctly? */ + assert(ret == 2); + + nv98_decoder_vp(dec, desc, target, comm_seq, vp_caps, is_ref, refs); + nv98_decoder_ppp(dec, desc, target, comm_seq); +} + +struct pipe_video_codec * +nv98_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ) +{ + struct nouveau_screen *screen = &((struct nv50_context *)context)->screen->base; + struct nouveau_vp3_decoder *dec; + struct nouveau_pushbuf **push; + struct nv04_fifo nv04_data = {.vram = 0xbeef0201, .gart = 0xbeef0202}; + union nouveau_bo_config cfg; + + cfg.nv50.tile_mode = 0x20; + cfg.nv50.memtype = 0x70; + + int ret, i; + uint32_t codec = 1, ppp_codec = 3; + uint32_t timeout; + u32 tmp_size = 0; + + if (getenv("XVMC_VL")) + return vl_create_decoder(context, templ); + + if (templ->entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + debug_printf("%x\n", templ->entrypoint); + return NULL; + } + + dec = CALLOC_STRUCT(nouveau_vp3_decoder); + if (!dec) + return NULL; + dec->client = screen->client; + dec->base = *templ; + nouveau_vp3_decoder_init_common(&dec->base); + + dec->bsp_idx = 5; + dec->vp_idx = 6; + dec->ppp_idx = 7; + + ret = nouveau_object_new(&screen->device->object, 0, + NOUVEAU_FIFO_CHANNEL_CLASS, + &nv04_data, sizeof(nv04_data), &dec->channel[0]); + + if (!ret) + ret = nouveau_pushbuf_new(screen->client, dec->channel[0], 4, + 32 * 1024, true, &dec->pushbuf[0]); + + for (i = 1; i < 3; ++i) { + dec->channel[i] = dec->channel[0]; + dec->pushbuf[i] = dec->pushbuf[0]; + } + push = dec->pushbuf; + + if (!ret) + ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x85b1, NULL, 0, &dec->bsp); + if (!ret) + ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x85b2, NULL, 0, &dec->vp); + if (!ret) + ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x85b3, NULL, 0, &dec->ppp); + if (ret) + goto fail; + + BEGIN_NV04(push[0], SUBC_BSP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[0], dec->bsp->handle); + + BEGIN_NV04(push[0], SUBC_BSP(0x180), 5); + for (i = 0; i < 5; i++) + PUSH_DATA (push[0], nv04_data.vram); + + BEGIN_NV04(push[1], SUBC_VP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[1], dec->vp->handle); + + BEGIN_NV04(push[1], SUBC_VP(0x180), 6); + for (i = 0; i < 6; i++) + PUSH_DATA (push[1], nv04_data.vram); + + BEGIN_NV04(push[2], SUBC_PPP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[2], dec->ppp->handle); + + BEGIN_NV04(push[2], SUBC_PPP(0x180), 5); + for (i = 0; i < 5; i++) + PUSH_DATA (push[2], nv04_data.vram); + + dec->base.context = context; + dec->base.decode_bitstream = nv98_decoder_decode_bitstream; + + for (i = 0; i < NOUVEAU_VP3_VIDEO_QDEPTH && !ret; ++i) + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0, 1 << 20, NULL, &dec->bsp_bo[i]); + if (!ret) + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0x100, 4 << 20, NULL, &dec->inter_bo[0]); + if (!ret) + nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]); + if (ret) + goto fail; + + switch (u_reduce_video_profile(templ->profile)) { + case PIPE_VIDEO_FORMAT_MPEG12: { + codec = 1; + assert(templ->max_references <= 2); + break; + } + case PIPE_VIDEO_FORMAT_MPEG4: { + codec = 4; + tmp_size = mb(templ->height)*16 * mb(templ->width)*16; + assert(templ->max_references <= 2); + break; + } + case PIPE_VIDEO_FORMAT_VC1: { + ppp_codec = codec = 2; + tmp_size = mb(templ->height)*16 * mb(templ->width)*16; + assert(templ->max_references <= 2); + break; + } + case PIPE_VIDEO_FORMAT_MPEG4_AVC: { + codec = 3; + dec->tmp_stride = 16 * mb_half(templ->width) * nouveau_vp3_video_align(templ->height) * 3 / 2; + tmp_size = dec->tmp_stride * (templ->max_references + 1); + assert(templ->max_references <= 16); + break; + } + default: + fprintf(stderr, "invalid codec\n"); + goto fail; + } + + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + 0x4000, NULL, &dec->fw_bo); + if (ret) + goto fail; + + ret = nouveau_vp3_load_firmware(dec, templ->profile, screen->device->chipset); + if (ret) + goto fw_fail; + + if (codec != 3) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + 0x400, NULL, &dec->bitplane_bo); + if (ret) + goto fail; + } + + dec->ref_stride = mb(templ->width)*16 * (mb_half(templ->height)*32 + nouveau_vp3_video_align(templ->height)/2); + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + dec->ref_stride * (templ->max_references+2) + tmp_size, + &cfg, &dec->ref_bo); + if (ret) + goto fail; + + timeout = 0; + + BEGIN_NV04(push[0], SUBC_BSP(0x200), 2); + PUSH_DATA (push[0], codec); + PUSH_DATA (push[0], timeout); + + BEGIN_NV04(push[1], SUBC_VP(0x200), 2); + PUSH_DATA (push[1], codec); + PUSH_DATA (push[1], timeout); + + BEGIN_NV04(push[2], SUBC_PPP(0x200), 2); + PUSH_DATA (push[2], ppp_codec); + PUSH_DATA (push[2], timeout); + + ++dec->fence_seq; + +#if NOUVEAU_VP3_DEBUG_FENCE + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART|NOUVEAU_BO_MAP, + 0, 0x1000, NULL, &dec->fence_bo); + if (ret) + goto fail; + + nouveau_bo_map(dec->fence_bo, NOUVEAU_BO_RDWR, screen->client); + dec->fence_map = dec->fence_bo->map; + dec->fence_map[0] = dec->fence_map[4] = dec->fence_map[8] = 0; + dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map))); + + /* So lets test if the fence is working? */ + nouveau_pushbuf_space(push[0], 6, 1, 0); + PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); + BEGIN_NV04(push[0], SUBC_BSP(0x240), 3); + PUSH_DATAh(push[0], dec->fence_bo->offset); + PUSH_DATA (push[0], dec->fence_bo->offset); + PUSH_DATA (push[0], dec->fence_seq); + + BEGIN_NV04(push[0], SUBC_BSP(0x304), 1); + PUSH_DATA (push[0], 0); + PUSH_KICK (push[0]); + + nouveau_pushbuf_space(push[1], 6, 1, 0); + PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); + BEGIN_NV04(push[1], SUBC_VP(0x240), 3); + PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push[1], (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push[1], dec->fence_seq); + + BEGIN_NV04(push[1], SUBC_VP(0x304), 1); + PUSH_DATA (push[1], 0); + PUSH_KICK (push[1]); + + nouveau_pushbuf_space(push[2], 6, 1, 0); + PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); + BEGIN_NV04(push[2], SUBC_PPP(0x240), 3); + PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push[2], (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push[2], dec->fence_seq); + + BEGIN_NV04(push[2], SUBC_PPP(0x304), 1); + PUSH_DATA (push[2], 0); + PUSH_KICK (push[2]); + + usleep(100); + while (dec->fence_seq > dec->fence_map[0] || + dec->fence_seq > dec->fence_map[4] || + dec->fence_seq > dec->fence_map[8]) { + debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); + usleep(100); + } + debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); +#endif + + return &dec->base; + +fw_fail: + debug_printf("Cannot create decoder without firmware..\n"); + dec->base.destroy(&dec->base); + return NULL; + +fail: + debug_printf("Creation failed: %s (%i)\n", strerror(-ret), ret); + dec->base.destroy(&dec->base); + return NULL; +} + +struct pipe_video_buffer * +nv98_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *templat) +{ + return nouveau_vp3_video_buffer_create( + pipe, templat, NV50_RESOURCE_FLAG_VIDEO); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video.h b/src/gallium/drivers/nouveau/nv50/nv98_video.h new file mode 100644 index 00000000000..cec761df4ab --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv98_video.h @@ -0,0 +1,48 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv50_context.h" +#include "nv50/nv50_screen.h" +#include "nouveau_vp3_video.h" + +#include "vl/vl_decoder.h" +#include "vl/vl_types.h" + +#include "util/u_video.h" + +extern unsigned +nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, + unsigned comm_seq, unsigned num_buffers, + const void *const *data, const unsigned *num_bytes, + unsigned *vp_caps, unsigned *is_ref, + struct nouveau_vp3_video_buffer *refs[16]); + +extern void +nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, unsigned comm_seq, + unsigned caps, unsigned is_ref, + struct nouveau_vp3_video_buffer *refs[16]); + +extern void +nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, unsigned comm_seq); diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c new file mode 100644 index 00000000000..97d4119b6d1 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c @@ -0,0 +1,159 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv98_video.h" + +#if NOUVEAU_VP3_DEBUG_FENCE +static void dump_comm_bsp(struct comm *comm) +{ + unsigned idx = comm->bsp_cur_index & 0xf; + debug_printf("Cur seq: %x, bsp byte ofs: %x\n", comm->bsp_cur_index, comm->byte_ofs); + debug_printf("Status: %08x, pos: %08x\n", comm->status[idx], comm->pos[idx]); +} +#endif + +unsigned +nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, + unsigned comm_seq, unsigned num_buffers, + const void *const *data, const unsigned *num_bytes, + unsigned *vp_caps, unsigned *is_ref, + struct nouveau_vp3_video_buffer *refs[16]) +{ + struct nouveau_pushbuf *push = dec->pushbuf[0]; + enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); + uint32_t bsp_addr, comm_addr, inter_addr; + uint32_t slice_size, bucket_size, ring_size; + uint32_t caps; + int ret; + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH]; + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; + unsigned fence_extra = 0; + struct nouveau_pushbuf_refn bo_refs[] = { + { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, + { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, +#if NOUVEAU_VP3_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + + if (!dec->bitplane_bo) + num_refs--; + +#if NOUVEAU_VP3_DEBUG_FENCE + fence_extra = 4; +#endif + + ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client); + if (ret) { + debug_printf("map failed: %i %s\n", ret, strerror(-ret)); + return -1; + } + + caps = nouveau_vp3_bsp(dec, desc, target, comm_seq, + num_buffers, data, num_bytes); + + nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs); + + nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 8) + fence_extra + 2, num_refs, 0); + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + bsp_addr = bsp_bo->offset >> 8; + inter_addr = inter_bo->offset >> 8; + +#if NOUVEAU_VP3_DEBUG_FENCE + memset(dec->comm, 0, 0x200); + comm_addr = (dec->fence_bo->offset + COMM_OFFSET) >> 8; +#else + comm_addr = bsp_addr + (COMM_OFFSET>>8); +#endif + + BEGIN_NV04(push, SUBC_BSP(0x700), 5); + PUSH_DATA (push, caps); // 700 cmd + PUSH_DATA (push, bsp_addr + 1); // 704 strparm_bsp + PUSH_DATA (push, bsp_addr + 7); // 708 str addr + PUSH_DATA (push, comm_addr); // 70c comm + PUSH_DATA (push, comm_seq); // 710 seq + + if (codec != PIPE_VIDEO_FORMAT_MPEG4_AVC) { + u32 bitplane_addr; + int mpeg12 = (codec == PIPE_VIDEO_FORMAT_MPEG12); + + bitplane_addr = dec->bitplane_bo->offset >> 8; + + nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size); + BEGIN_NV04(push, SUBC_BSP(0x400), mpeg12 ? 5 : 7); + PUSH_DATA (push, bsp_addr); // 400 picparm addr + PUSH_DATA (push, inter_addr); // 404 interparm addr + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 408 interdata addr + PUSH_DATA (push, ring_size << 8); // 40c interdata_size + if (!mpeg12) { + PUSH_DATA (push, bitplane_addr); // 410 BITPLANE_DATA + PUSH_DATA (push, 0x400); // 414 BITPLANE_DATA_SIZE + } + PUSH_DATA (push, 0); // dma idx + } else { + nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); + BEGIN_NV04(push, SUBC_BSP(0x400), 8); + PUSH_DATA (push, bsp_addr); // 400 picparm addr + PUSH_DATA (push, inter_addr); // 404 interparm addr + PUSH_DATA (push, slice_size << 8); // 408 interparm size? + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 40c interdata addr + PUSH_DATA (push, ring_size << 8); // 410 interdata size + PUSH_DATA (push, inter_addr + slice_size); // 414 bucket? + PUSH_DATA (push, bucket_size << 8); // 418 bucket size? unshifted.. + PUSH_DATA (push, 0); // 41c targets + // TODO: Double check 414 / 418 with nvidia trace + } + +#if NOUVEAU_VP3_DEBUG_FENCE + BEGIN_NV04(push, SUBC_BSP(0x240), 3); + PUSH_DATAh(push, dec->fence_bo->offset); + PUSH_DATA (push, dec->fence_bo->offset); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NV04(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK (push); + + { + unsigned spin = 0; + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) { + debug_printf("b%u: %u\n", dec->fence_seq, dec->fence_map[0]); + dump_comm_bsp(dec->comm); + } + } while (dec->fence_seq > dec->fence_map[0]); + } + + dump_comm_bsp(dec->comm); + return dec->comm->status[comm_seq & 0xf]; +#else + BEGIN_NV04(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); + return 2; +#endif +} diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c new file mode 100644 index 00000000000..6b0b7148dcb --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c @@ -0,0 +1,143 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv98_video.h" + +static void +nv98_decoder_setup_ppp(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target, uint32_t low700) { + struct nouveau_pushbuf *push = dec->pushbuf[2]; + + uint32_t stride_in = mb(dec->base.width); + uint32_t stride_out = mb(target->resources[0]->width0); + uint32_t dec_h = mb(dec->base.height); + uint32_t dec_w = mb(dec->base.width); + uint64_t in_addr; + uint32_t y2, cbcr, cbcr2, i; + struct nouveau_pushbuf_refn bo_refs[] = { + { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { dec->ref_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, +#if NOUVEAU_VP3_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + }; + unsigned num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + + for (i = 0; i < 2; ++i) { + struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i]; + bo_refs[i].bo = mt->base.bo; + } + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + nouveau_vp3_ycbcr_offsets(dec, &y2, &cbcr, &cbcr2); + + BEGIN_NV04(push, SUBC_PPP(0x700), 10); + in_addr = nouveau_vp3_video_addr(dec, target) >> 8; + + PUSH_DATA (push, (stride_out << 24) | (stride_out << 16) | low700); // 700 + PUSH_DATA (push, (stride_in << 24) | (stride_in << 16) | (dec_h << 8) | dec_w); // 704 + assert(dec_w == stride_in); + + /* Input: */ + PUSH_DATA (push, in_addr); // 708 + PUSH_DATA (push, in_addr + y2); // 70c + PUSH_DATA (push, in_addr + cbcr); // 710 + PUSH_DATA (push, in_addr + cbcr2); // 714 + + for (i = 0; i < 2; ++i) { + struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i]; + + PUSH_DATA (push, mt->base.address >> 8); + PUSH_DATA (push, (mt->base.address + mt->total_size/2) >> 8); + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + } +} + +static uint32_t +nv98_decoder_vc1_ppp(struct nouveau_vp3_decoder *dec, struct pipe_vc1_picture_desc *desc, struct nouveau_vp3_video_buffer *target) { + struct nouveau_pushbuf *push = dec->pushbuf[2]; + + nv98_decoder_setup_ppp(dec, target, 0x1412); + assert(!desc->deblockEnable); + assert(!(dec->base.width & 0xf)); + assert(!(dec->base.height & 0xf)); + + BEGIN_NV04(push, SUBC_PPP(0x400), 1); + PUSH_DATA (push, desc->pquant << 11); + + // 728 = wtf? + return 0x10; +} + +void +nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct nouveau_vp3_video_buffer *target, unsigned comm_seq) { + enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); + struct nouveau_pushbuf *push = dec->pushbuf[2]; + unsigned ppp_caps = 0x10; + unsigned fence_extra = 0; + +#if NOUVEAU_VP3_DEBUG_FENCE + fence_extra = 4; +#endif + + nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0); + + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG12: { + unsigned mpeg2 = dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1; + nv98_decoder_setup_ppp(dec, target, 0x1410 | mpeg2); + break; + } + case PIPE_VIDEO_FORMAT_MPEG4: nv98_decoder_setup_ppp(dec, target, 0x1414); break; + case PIPE_VIDEO_FORMAT_VC1: ppp_caps = nv98_decoder_vc1_ppp(dec, desc.vc1, target); break; + case PIPE_VIDEO_FORMAT_MPEG4_AVC: nv98_decoder_setup_ppp(dec, target, 0x1413); break; + default: assert(0); + } + BEGIN_NV04(push, SUBC_PPP(0x734), 2); + PUSH_DATA (push, comm_seq); + PUSH_DATA (push, ppp_caps); + +#if NOUVEAU_VP3_DEBUG_FENCE + BEGIN_NV04(push, SUBC_PPP(0x240), 3); + PUSH_DATAh(push, (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push, (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NV04(push, SUBC_PPP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK (push); + + { + unsigned spin = 0; + + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) + debug_printf("p%u: %u\n", dec->fence_seq, dec->fence_map[8]); + } while (dec->fence_seq > dec->fence_map[8]); + } +#else + BEGIN_NV04(push, SUBC_PPP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); +#endif +} diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c new file mode 100644 index 00000000000..9b756ea73f5 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c @@ -0,0 +1,202 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv98_video.h" +#include <sys/mman.h> + +#if NOUVEAU_VP3_DEBUG_FENCE +static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq, + struct nouveau_bo *inter_bo, unsigned slice_size) +{ + unsigned i, idx = comm->pvp_cur_index & 0xf; + debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); +#if 0 + debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); + debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); + + for (i = 0; i != comm->irq_index; ++i) + debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); + for (i = 0; i != comm->parse_endpos_index; ++i) + debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); +#endif + debug_printf("mb_y = %u\n", comm->mb_y[idx]); + if (comm->status_vp[idx] == 1) + return; + + if ((comm->pvp_stage & 0xff) != 0xff) { + unsigned *map; + assert(nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client) >= 0); + map = inter_bo->map; + for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { + debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); + } + munmap(inter_bo->map, inter_bo->size); + inter_bo->map = NULL; + } + assert((comm->pvp_stage & 0xff) == 0xff); +} +#endif + +static void +nv98_decoder_kick_ref(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target) +{ + dec->refs[target->valid_ref].vidbuf = NULL; + dec->refs[target->valid_ref].last_used = 0; +// debug_printf("Unreffed %p\n", target); +} + +void +nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, unsigned comm_seq, + unsigned caps, unsigned is_ref, + struct nouveau_vp3_video_buffer *refs[16]) +{ + struct nouveau_pushbuf *push = dec->pushbuf[1]; + uint32_t bsp_addr, comm_addr, inter_addr, ucode_addr, pic_addr[17], last_addr, null_addr; + uint32_t slice_size, bucket_size, ring_size, i; + enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH]; + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; + u32 fence_extra = 0, codec_extra = 0; + struct nouveau_pushbuf_refn bo_refs[] = { + { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, +#if NOUVEAU_VP3_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + { dec->fw_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs) - !dec->fw_bo; + +#if NOUVEAU_VP3_DEBUG_FENCE + fence_extra = 4; +#endif + + if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) { + nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); + codec_extra += 2; + } else + nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size); + + if (dec->base.max_references > 2) + codec_extra += 1 + (dec->base.max_references - 2); + + pic_addr[16] = nouveau_vp3_video_addr(dec, target) >> 8; + last_addr = null_addr = nouveau_vp3_video_addr(dec, NULL) >> 8; + + for (i = 0; i < dec->base.max_references; ++i) { + if (!refs[i]) + pic_addr[i] = last_addr; + else if (dec->refs[refs[i]->valid_ref].vidbuf == refs[i]) + last_addr = pic_addr[i] = nouveau_vp3_video_addr(dec, refs[i]) >> 8; + else + pic_addr[i] = null_addr; + } + if (!is_ref) + nv98_decoder_kick_ref(dec, target); + + nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) + + 6 + codec_extra + fence_extra + 2, num_refs, 0); + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + bsp_addr = bsp_bo->offset >> 8; +#if NOUVEAU_VP3_DEBUG_FENCE + comm_addr = (dec->fence_bo->offset + COMM_OFFSET)>>8; +#else + comm_addr = bsp_addr + (COMM_OFFSET>>8); +#endif + inter_addr = inter_bo->offset >> 8; + if (dec->fw_bo) + ucode_addr = dec->fw_bo->offset >> 8; + else + ucode_addr = 0; + + BEGIN_NV04(push, SUBC_VP(0x700), 7); + PUSH_DATA (push, caps); // 700 + PUSH_DATA (push, comm_seq); // 704 + PUSH_DATA (push, 0); // 708 fuc targets, ignored for nv98 + PUSH_DATA (push, dec->fw_sizes); // 70c + PUSH_DATA (push, bsp_addr+(VP_OFFSET>>8)); // 710 picparm_addr + PUSH_DATA (push, inter_addr); // 714 inter_parm + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 718 inter_data_ofs + + if (bucket_size) { + uint64_t tmpimg_addr = dec->ref_bo->offset + dec->ref_stride * (dec->base.max_references+2); + + BEGIN_NV04(push, SUBC_VP(0x71c), 2); + PUSH_DATA (push, tmpimg_addr >> 8); // 71c + PUSH_DATA (push, inter_addr + slice_size); // 720 bucket_ofs + } + + BEGIN_NV04(push, SUBC_VP(0x724), 5); + PUSH_DATA (push, comm_addr); // 724 + PUSH_DATA (push, ucode_addr); // 728 + PUSH_DATA (push, pic_addr[16]); // 734 + PUSH_DATA (push, pic_addr[0]); // 72c + PUSH_DATA (push, pic_addr[1]); // 730 + + if (dec->base.max_references > 2) { + int i; + + BEGIN_NV04(push, SUBC_VP(0x400), dec->base.max_references - 2); + for (i = 2; i < dec->base.max_references; ++i) { + assert(0x400 + (i - 2) * 4 < 0x438); + PUSH_DATA (push, pic_addr[i]); + } + } + + if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) { + BEGIN_NV04(push, SUBC_VP(0x438), 1); + PUSH_DATA (push, desc.h264->slice_count); + } + + //debug_printf("Decoding %08lx with %08lx and %08lx\n", pic_addr[16], pic_addr[0], pic_addr[1]); + +#if NOUVEAU_VP3_DEBUG_FENCE + BEGIN_NV04(push, SUBC_VP(0x240), 3); + PUSH_DATAh(push, (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push, (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NV04(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK(push); + + { + unsigned spin = 0; + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) { + debug_printf("v%u: %u\n", dec->fence_seq, dec->fence_map[4]); + dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8); + } + } while (dec->fence_seq > dec->fence_map[4]); + } + dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8); +#else + BEGIN_NV04(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); +#endif +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h new file mode 100644 index 00000000000..9a488c17be1 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h @@ -0,0 +1,380 @@ +#ifndef NVC0_2D_XML +#define NVC0_2D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_2d.xml ( 9454 bytes, from 2010-10-16 16:03:11) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24) +- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_2D_DST_FORMAT 0x00000200 + +#define NVC0_2D_DST_LINEAR 0x00000204 + +#define NVC0_2D_DST_TILE_MODE 0x00000208 + +#define NVC0_2D_DST_DEPTH 0x0000020c + +#define NVC0_2D_DST_LAYER 0x00000210 + +#define NVC0_2D_DST_PITCH 0x00000214 + +#define NVC0_2D_DST_WIDTH 0x00000218 + +#define NVC0_2D_DST_HEIGHT 0x0000021c + +#define NVC0_2D_DST_ADDRESS_HIGH 0x00000220 + +#define NVC0_2D_DST_ADDRESS_LOW 0x00000224 + +#define NVC0_2D_UNK228 0x00000228 + +#define NVC0_2D_SRC_FORMAT 0x00000230 + +#define NVC0_2D_SRC_LINEAR 0x00000234 + +#define NVC0_2D_SRC_TILE_MODE 0x00000238 + +#define NVC0_2D_SRC_DEPTH 0x0000023c + +#define NVC0_2D_SRC_LAYER 0x00000240 + +#define NVC0_2D_SRC_PITCH 0x00000244 +#define NVC0_2D_SRC_PITCH__MAX 0x00040000 + +#define NVC0_2D_SRC_WIDTH 0x00000248 +#define NVC0_2D_SRC_WIDTH__MAX 0x00010000 + +#define NVC0_2D_SRC_HEIGHT 0x0000024c +#define NVC0_2D_SRC_HEIGHT__MAX 0x00010000 + +#define NVC0_2D_SRC_ADDRESS_HIGH 0x00000250 + +#define NVC0_2D_SRC_ADDRESS_LOW 0x00000254 + +#define NVC0_2D_UNK258 0x00000258 + +#define NVC0_2D_SINGLE_GPC 0x00000260 + +#define NVC0_2D_COND_ADDRESS_HIGH 0x00000264 + +#define NVC0_2D_COND_ADDRESS_LOW 0x00000268 + +#define NVC0_2D_COND_MODE 0x0000026c +#define NVC0_2D_COND_MODE_NEVER 0x00000000 +#define NVC0_2D_COND_MODE_ALWAYS 0x00000001 +#define NVC0_2D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NVC0_2D_COND_MODE_EQUAL 0x00000003 +#define NVC0_2D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NVC0_2D_CLIP_X 0x00000280 + +#define NVC0_2D_CLIP_Y 0x00000284 + +#define NVC0_2D_CLIP_W 0x00000288 + +#define NVC0_2D_CLIP_H 0x0000028c + +#define NVC0_2D_CLIP_ENABLE 0x00000290 + +#define NVC0_2D_COLOR_KEY_FORMAT 0x00000294 +#define NVC0_2D_COLOR_KEY_FORMAT_16BPP 0x00000000 +#define NVC0_2D_COLOR_KEY_FORMAT_15BPP 0x00000001 +#define NVC0_2D_COLOR_KEY_FORMAT_24BPP 0x00000002 +#define NVC0_2D_COLOR_KEY_FORMAT_30BPP 0x00000003 +#define NVC0_2D_COLOR_KEY_FORMAT_8BPP 0x00000004 +#define NVC0_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005 +#define NVC0_2D_COLOR_KEY_FORMAT_32BPP 0x00000006 + +#define NVC0_2D_COLOR_KEY 0x00000298 + +#define NVC0_2D_COLOR_KEY_ENABLE 0x0000029c + +#define NVC0_2D_ROP 0x000002a0 + +#define NVC0_2D_BETA1 0x000002a4 + +#define NVC0_2D_BETA4 0x000002a8 + +#define NVC0_2D_OPERATION 0x000002ac +#define NVC0_2D_OPERATION_SRCCOPY_AND 0x00000000 +#define NVC0_2D_OPERATION_ROP_AND 0x00000001 +#define NVC0_2D_OPERATION_BLEND 0x00000002 +#define NVC0_2D_OPERATION_SRCCOPY 0x00000003 +#define NVC0_2D_OPERATION_ROP 0x00000004 +#define NVC0_2D_OPERATION_SRCCOPY_PREMULT 0x00000005 +#define NVC0_2D_OPERATION_BLEND_PREMULT 0x00000006 + +#define NVC0_2D_UNK2B0 0x000002b0 +#define NVC0_2D_UNK2B0_UNK0__MASK 0x0000003f +#define NVC0_2D_UNK2B0_UNK0__SHIFT 0 +#define NVC0_2D_UNK2B0_UNK1__MASK 0x00003f00 +#define NVC0_2D_UNK2B0_UNK1__SHIFT 8 + +#define NVC0_2D_PATTERN_SELECT 0x000002b4 +#define NVC0_2D_PATTERN_SELECT_MONO_8X8 0x00000000 +#define NVC0_2D_PATTERN_SELECT_MONO_64X1 0x00000001 +#define NVC0_2D_PATTERN_SELECT_MONO_1X64 0x00000002 +#define NVC0_2D_PATTERN_SELECT_COLOR 0x00000003 + +#define NVC0_2D_PATTERN_COLOR_FORMAT 0x000002e8 +#define NVC0_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000 +#define NVC0_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001 +#define NVC0_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002 +#define NVC0_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003 +#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004 +#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005 + +#define NVC0_2D_PATTERN_MONO_FORMAT 0x000002ec +#define NVC0_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000 +#define NVC0_2D_PATTERN_MONO_FORMAT_LE 0x00000001 + +#define NVC0_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0)) +#define NVC0_2D_PATTERN_COLOR__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_COLOR__LEN 0x00000002 + +#define NVC0_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0)) +#define NVC0_2D_PATTERN_BITMAP__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_BITMAP__LEN 0x00000002 + +#define NVC0_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0)) +#define NVC0_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_X8R8G8B8__LEN 0x00000040 +#define NVC0_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff +#define NVC0_2D_PATTERN_X8R8G8B8_B__SHIFT 0 +#define NVC0_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00 +#define NVC0_2D_PATTERN_X8R8G8B8_G__SHIFT 8 +#define NVC0_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000 +#define NVC0_2D_PATTERN_X8R8G8B8_R__SHIFT 16 + +#define NVC0_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0)) +#define NVC0_2D_PATTERN_R5G6B5__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_R5G6B5__LEN 0x00000020 +#define NVC0_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f +#define NVC0_2D_PATTERN_R5G6B5_B0__SHIFT 0 +#define NVC0_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0 +#define NVC0_2D_PATTERN_R5G6B5_G0__SHIFT 5 +#define NVC0_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800 +#define NVC0_2D_PATTERN_R5G6B5_R0__SHIFT 11 +#define NVC0_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000 +#define NVC0_2D_PATTERN_R5G6B5_B1__SHIFT 16 +#define NVC0_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000 +#define NVC0_2D_PATTERN_R5G6B5_G1__SHIFT 21 +#define NVC0_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000 +#define NVC0_2D_PATTERN_R5G6B5_R1__SHIFT 27 + +#define NVC0_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0)) +#define NVC0_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_X1R5G5B5__LEN 0x00000020 +#define NVC0_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f +#define NVC0_2D_PATTERN_X1R5G5B5_B0__SHIFT 0 +#define NVC0_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0 +#define NVC0_2D_PATTERN_X1R5G5B5_G0__SHIFT 5 +#define NVC0_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00 +#define NVC0_2D_PATTERN_X1R5G5B5_R0__SHIFT 10 +#define NVC0_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000 +#define NVC0_2D_PATTERN_X1R5G5B5_B1__SHIFT 16 +#define NVC0_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000 +#define NVC0_2D_PATTERN_X1R5G5B5_G1__SHIFT 21 +#define NVC0_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000 +#define NVC0_2D_PATTERN_X1R5G5B5_R1__SHIFT 26 + +#define NVC0_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0)) +#define NVC0_2D_PATTERN_Y8__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_Y8__LEN 0x00000010 +#define NVC0_2D_PATTERN_Y8_Y0__MASK 0x000000ff +#define NVC0_2D_PATTERN_Y8_Y0__SHIFT 0 +#define NVC0_2D_PATTERN_Y8_Y1__MASK 0x0000ff00 +#define NVC0_2D_PATTERN_Y8_Y1__SHIFT 8 +#define NVC0_2D_PATTERN_Y8_Y2__MASK 0x00ff0000 +#define NVC0_2D_PATTERN_Y8_Y2__SHIFT 16 +#define NVC0_2D_PATTERN_Y8_Y3__MASK 0xff000000 +#define NVC0_2D_PATTERN_Y8_Y3__SHIFT 24 + +#define NVC0_2D_DRAW_SHAPE 0x00000580 +#define NVC0_2D_DRAW_SHAPE_POINTS 0x00000000 +#define NVC0_2D_DRAW_SHAPE_LINES 0x00000001 +#define NVC0_2D_DRAW_SHAPE_LINE_STRIP 0x00000002 +#define NVC0_2D_DRAW_SHAPE_TRIANGLES 0x00000003 +#define NVC0_2D_DRAW_SHAPE_RECTANGLES 0x00000004 + +#define NVC0_2D_DRAW_COLOR_FORMAT 0x00000584 + +#define NVC0_2D_DRAW_COLOR 0x00000588 + +#define NVC0_2D_UNK58C 0x0000058c +#define NVC0_2D_UNK58C_0 0x00000001 +#define NVC0_2D_UNK58C_1 0x00000010 +#define NVC0_2D_UNK58C_2 0x00000100 +#define NVC0_2D_UNK58C_3 0x00001000 + +#define NVC0_2D_DRAW_POINT16 0x000005e0 +#define NVC0_2D_DRAW_POINT16_X__MASK 0x0000ffff +#define NVC0_2D_DRAW_POINT16_X__SHIFT 0 +#define NVC0_2D_DRAW_POINT16_Y__MASK 0xffff0000 +#define NVC0_2D_DRAW_POINT16_Y__SHIFT 16 + +#define NVC0_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0)) +#define NVC0_2D_DRAW_POINT32_X__ESIZE 0x00000008 +#define NVC0_2D_DRAW_POINT32_X__LEN 0x00000040 + +#define NVC0_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0)) +#define NVC0_2D_DRAW_POINT32_Y__ESIZE 0x00000008 +#define NVC0_2D_DRAW_POINT32_Y__LEN 0x00000040 + +#define NVC0_2D_SIFC_BITMAP_ENABLE 0x00000800 + +#define NVC0_2D_SIFC_FORMAT 0x00000804 + +#define NVC0_2D_SIFC_BITMAP_FORMAT 0x00000808 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I1 0x00000000 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I4 0x00000001 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I8 0x00000002 + +#define NVC0_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c + +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002 + +#define NVC0_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814 + +#define NVC0_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818 + +#define NVC0_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c + +#define NVC0_2D_SIFC_WIDTH 0x00000838 + +#define NVC0_2D_SIFC_HEIGHT 0x0000083c + +#define NVC0_2D_SIFC_DX_DU_FRACT 0x00000840 + +#define NVC0_2D_SIFC_DX_DU_INT 0x00000844 + +#define NVC0_2D_SIFC_DY_DV_FRACT 0x00000848 + +#define NVC0_2D_SIFC_DY_DV_INT 0x0000084c + +#define NVC0_2D_SIFC_DST_X_FRACT 0x00000850 + +#define NVC0_2D_SIFC_DST_X_INT 0x00000854 + +#define NVC0_2D_SIFC_DST_Y_FRACT 0x00000858 + +#define NVC0_2D_SIFC_DST_Y_INT 0x0000085c + +#define NVC0_2D_SIFC_DATA 0x00000860 + +#define NVC0_2D_UNK0870 0x00000870 + +#define NVC0_2D_UNK0880 0x00000880 + +#define NVC0_2D_UNK0884 0x00000884 + +#define NVC0_2D_UNK0888 0x00000888 + +#define NVC0_2D_BLIT_CONTROL 0x0000088c +#define NVC0_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001 +#define NVC0_2D_BLIT_CONTROL_ORIGIN__SHIFT 0 +#define NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000 +#define NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001 +#define NVC0_2D_BLIT_CONTROL_FILTER__MASK 0x00000010 +#define NVC0_2D_BLIT_CONTROL_FILTER__SHIFT 4 +#define NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000 +#define NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010 + +#define NVC0_2D_BLIT_DST_X 0x000008b0 + +#define NVC0_2D_BLIT_DST_Y 0x000008b4 + +#define NVC0_2D_BLIT_DST_W 0x000008b8 + +#define NVC0_2D_BLIT_DST_H 0x000008bc + +#define NVC0_2D_BLIT_DU_DX_FRACT 0x000008c0 + +#define NVC0_2D_BLIT_DU_DX_INT 0x000008c4 + +#define NVC0_2D_BLIT_DV_DY_FRACT 0x000008c8 + +#define NVC0_2D_BLIT_DV_DY_INT 0x000008cc + +#define NVC0_2D_BLIT_SRC_X_FRACT 0x000008d0 + +#define NVC0_2D_BLIT_SRC_X_INT 0x000008d4 + +#define NVC0_2D_BLIT_SRC_Y_FRACT 0x000008d8 + +#define NVC0_2D_BLIT_SRC_Y_INT 0x000008dc + + +#endif /* NVC0_2D_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h new file mode 100644 index 00000000000..d3f719d333f --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h @@ -0,0 +1,1350 @@ +#ifndef NVC0_3D_XML +#define NVC0_3D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_3d.xml ( 30827 bytes, from 2011-01-13 18:23:07) +- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv_3ddefs.xml ( 16394 bytes, from 2010-12-17 15:10:40) +- nv_object.xml ( 11898 bytes, from 2010-12-23 14:14:20) +- nvchipsets.xml ( 3074 bytes, from 2010-11-07 00:36:28) +- nv50_defs.xml ( 4487 bytes, from 2010-12-10 00:37:17) + +Copyright (C) 2006-2011 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_3D_NOTIFY_ADDRESS_HIGH 0x00000104 +#define NVC0_3D_NOTIFY_ADDRESS_LOW 0x00000108 +#define NVC0_3D_NOTIFY 0x0000010c + +#define NVC0_3D_SERIALIZE 0x00000110 + +#define NVC0_3D_LINE_WIDTH_SEPARATE 0x0000020c + +#define NVC0_3D_FORCE_EARLY_FRAGMENT_TESTS 0x00000210 + +#define NVC0_3D_MEM_BARRIER 0x0000021c +#define NVC0_3D_MEM_BARRIER_UNK0 0x00000001 +#define NVC0_3D_MEM_BARRIER_UNK1 0x00000002 +#define NVC0_3D_MEM_BARRIER_UNK2 0x00000004 +#define NVC0_3D_MEM_BARRIER_UNK4 0x00000010 +#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100 +#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000 + +#define NVC0_3D_CACHE_SPLIT 0x00000308 +#define NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001 +#define NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1 0x00000002 +#define NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003 + +#define NVC0_3D_TESS_MODE 0x00000320 +#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f +#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0 +#define NVC0_3D_TESS_MODE_PRIM_ISOLINES 0x00000000 +#define NVC0_3D_TESS_MODE_PRIM_TRIANGLES 0x00000001 +#define NVC0_3D_TESS_MODE_PRIM_QUADS 0x00000002 +#define NVC0_3D_TESS_MODE_SPACING__MASK 0x000000f0 +#define NVC0_3D_TESS_MODE_SPACING__SHIFT 4 +#define NVC0_3D_TESS_MODE_SPACING_EQUAL 0x00000000 +#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD 0x00000010 +#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN 0x00000020 +#define NVC0_3D_TESS_MODE_CW 0x00000100 +#define NVC0_3D_TESS_MODE_CONNECTED 0x00000200 + +#define NVC0_3D_TESS_LEVEL_OUTER(i0) (0x00000324 + 0x4*(i0)) +#define NVC0_3D_TESS_LEVEL_OUTER__ESIZE 0x00000004 +#define NVC0_3D_TESS_LEVEL_OUTER__LEN 0x00000004 + +#define NVC0_3D_TESS_LEVEL_INNER(i0) (0x00000334 + 0x4*(i0)) +#define NVC0_3D_TESS_LEVEL_INNER__ESIZE 0x00000004 +#define NVC0_3D_TESS_LEVEL_INNER__LEN 0x00000002 + +#define NVC0_3D_RASTERIZE_ENABLE 0x0000037c + +#define NVC0_3D_TFB(i0) (0x00000380 + 0x20*(i0)) +#define NVC0_3D_TFB__ESIZE 0x00000020 +#define NVC0_3D_TFB__LEN 0x00000004 + +#define NVC0_3D_TFB_BUFFER_ENABLE(i0) (0x00000380 + 0x20*(i0)) + +#define NVC0_3D_TFB_ADDRESS_HIGH(i0) (0x00000384 + 0x20*(i0)) + +#define NVC0_3D_TFB_ADDRESS_LOW(i0) (0x00000388 + 0x20*(i0)) + +#define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0)) + +#define NVC0_3D_TFB_BUFFER_OFFSET(i0) (0x00000390 + 0x20*(i0)) + +#define NVC0_3D_TFB_STREAM(i0) (0x00000700 + 0x10*(i0)) +#define NVC0_3D_TFB_STREAM__ESIZE 0x00000010 +#define NVC0_3D_TFB_STREAM__LEN 0x00000004 + +#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0)) +#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010 +#define NVC0_3D_TFB_VARYING_COUNT__LEN 0x00000004 + +#define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0)) +#define NVC0_3D_TFB_BUFFER_STRIDE__ESIZE 0x00000010 +#define NVC0_3D_TFB_BUFFER_STRIDE__LEN 0x00000004 + +#define NVC0_3D_TFB_ENABLE 0x00000744 + +#define NVC0_3D_SAMPLE_SHADING 0x00000754 +#define NVC0_3D_SAMPLE_SHADING_MIN_SAMPLES__MASK 0x0000000f +#define NVC0_3D_SAMPLE_SHADING_MIN_SAMPLES__SHIFT 0 +#define NVC0_3D_SAMPLE_SHADING_ENABLE 0x00000010 + +#define NVC0_3D_LOCAL_BASE 0x0000077c + +#define NVC0_3D_TEMP_ADDRESS_HIGH 0x00000790 + +#define NVC0_3D_TEMP_ADDRESS_LOW 0x00000794 + +#define NVC0_3D_TEMP_SIZE_HIGH 0x00000798 + +#define NVC0_3D_TEMP_SIZE_LOW 0x0000079c + +#define NVC0_3D_WARP_TEMP_ALLOC 0x000007a0 + +#define NVC0_3D_ZCULL_WIDTH 0x000007c0 + +#define NVC0_3D_ZCULL_HEIGHT 0x000007c4 + +#define NVC0_3D_ZCULL_ADDRESS_HIGH 0x000007e8 + +#define NVC0_3D_ZCULL_ADDRESS_LOW 0x000007ec + +#define NVC0_3D_ZCULL_LIMIT_HIGH 0x000007f0 + +#define NVC0_3D_ZCULL_LIMIT_LOW 0x000007f4 + +#define NVC0_3D_RT(i0) (0x00000800 + 0x40*(i0)) +#define NVC0_3D_RT__ESIZE 0x00000040 +#define NVC0_3D_RT__LEN 0x00000008 + +#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x40*(i0)) + +#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x40*(i0)) + +#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x40*(i0)) + +#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x40*(i0)) + +#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x40*(i0)) + +#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x40*(i0)) +#define NVC0_3D_RT_TILE_MODE_X 0x00000001 +#define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070 +#define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4 +#define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700 +#define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8 +#define NVC0_3D_RT_TILE_MODE_LINEAR 0x00001000 +#define NVC0_3D_RT_TILE_MODE_UNK16 0x00010000 + +#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x40*(i0)) +#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0 +#define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000 + +#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x40*(i0)) + +#define NVC0_3D_RT_BASE_LAYER(i0) (0x00000820 + 0x40*(i0)) + +#define NVC0_3D_RT_UNK14(i0) (0x00000824 + 0x40*(i0)) + +#define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_X__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_Y__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_Z__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0)) +#define NVC0_3D_VIEWPORT_HORIZ__ESIZE 0x00000010 +#define NVC0_3D_VIEWPORT_HORIZ__LEN 0x00000010 +#define NVC0_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff +#define NVC0_3D_VIEWPORT_HORIZ_X__SHIFT 0 +#define NVC0_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000 +#define NVC0_3D_VIEWPORT_HORIZ_W__SHIFT 16 + +#define NVC0_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0)) +#define NVC0_3D_VIEWPORT_VERT__ESIZE 0x00000010 +#define NVC0_3D_VIEWPORT_VERT__LEN 0x00000010 +#define NVC0_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff +#define NVC0_3D_VIEWPORT_VERT_Y__SHIFT 0 +#define NVC0_3D_VIEWPORT_VERT_H__MASK 0xffff0000 +#define NVC0_3D_VIEWPORT_VERT_H__SHIFT 16 + +#define NVC0_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0)) +#define NVC0_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010 +#define NVC0_3D_DEPTH_RANGE_NEAR__LEN 0x00000010 + +#define NVC0_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0)) +#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010 +#define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010 + +#define NVC0_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16 + +#define NVC0_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_VERT__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_VERT_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_VERT_MAX__SHIFT 16 + +#define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0)) +#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008 +#define NVC0_3D_CLIPID_REGION_HORIZ__LEN 0x00000004 +#define NVC0_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff +#define NVC0_3D_CLIPID_REGION_HORIZ_X__SHIFT 0 +#define NVC0_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000 +#define NVC0_3D_CLIPID_REGION_HORIZ_W__SHIFT 16 + +#define NVC0_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0)) +#define NVC0_3D_CLIPID_REGION_VERT__ESIZE 0x00000008 +#define NVC0_3D_CLIPID_REGION_VERT__LEN 0x00000004 +#define NVC0_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff +#define NVC0_3D_CLIPID_REGION_VERT_Y__SHIFT 0 +#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000 +#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16 + +#define NVC0_3D_CALL_LIMIT_LOG 0x00000d64 + +#define NVC0_3D_COUNTER_ENABLE 0x00000d68 +#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001 +#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002 +#define NVC0_3D_COUNTER_ENABLE_UNK02 0x00000004 +#define NVC0_3D_COUNTER_ENABLE_UNK03 0x00000008 +#define NVC0_3D_COUNTER_ENABLE_UNK04 0x00000010 +#define NVC0_3D_COUNTER_ENABLE_EMITTED_PRIMITIVES 0x00000020 +#define NVC0_3D_COUNTER_ENABLE_UNK06 0x00000040 +#define NVC0_3D_COUNTER_ENABLE_UNK07 0x00000080 +#define NVC0_3D_COUNTER_ENABLE_UNK08 0x00000100 +#define NVC0_3D_COUNTER_ENABLE_UNK09 0x00000200 +#define NVC0_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000400 +#define NVC0_3D_COUNTER_ENABLE_UNK0B 0x00000800 +#define NVC0_3D_COUNTER_ENABLE_UNK0C 0x00001000 +#define NVC0_3D_COUNTER_ENABLE_UNK0D 0x00002000 +#define NVC0_3D_COUNTER_ENABLE_UNK0E 0x00004000 +#define NVC0_3D_COUNTER_ENABLE_UNK0F 0x00008000 + +#define NVC0_3D_VERTEX_BUFFER_FIRST 0x00000d74 + +#define NVC0_3D_VERTEX_BUFFER_COUNT 0x00000d78 + +#define NVC0_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0)) +#define NVC0_3D_CLEAR_COLOR__ESIZE 0x00000004 +#define NVC0_3D_CLEAR_COLOR__LEN 0x00000004 + +#define NVC0_3D_CLEAR_DEPTH 0x00000d90 + +#define NVC0_3D_CLEAR_STENCIL 0x00000da0 + +#define NVC0_3D_POLYGON_SMOOTH_ENABLE 0x00000db4 + +#define NVC0_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0 + +#define NVC0_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4 + +#define NVC0_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8 + +#define NVC0_3D_PATCH_VERTICES 0x00000dcc + +#define NVC0_3D_WATCHDOG_TIMER 0x00000de4 + +#define NVC0_3D_WINDOW_OFFSET_X 0x00000df8 + +#define NVC0_3D_WINDOW_OFFSET_Y 0x00000dfc + +#define NVC0_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_ENABLE__ESIZE 0x00000010 +#define NVC0_3D_SCISSOR_ENABLE__LEN 0x00000010 + +#define NVC0_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_HORIZ__ESIZE 0x00000010 +#define NVC0_3D_SCISSOR_HORIZ__LEN 0x00000010 +#define NVC0_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff +#define NVC0_3D_SCISSOR_HORIZ_MIN__SHIFT 0 +#define NVC0_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000 +#define NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT 16 + +#define NVC0_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_VERT__ESIZE 0x00000010 +#define NVC0_3D_SCISSOR_VERT__LEN 0x00000010 +#define NVC0_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff +#define NVC0_3D_SCISSOR_VERT_MIN__SHIFT 0 +#define NVC0_3D_SCISSOR_VERT_MAX__MASK 0xffff0000 +#define NVC0_3D_SCISSOR_VERT_MAX__SHIFT 16 + +#define NVC0_3D_STENCIL_BACK_FUNC_REF 0x00000f54 + +#define NVC0_3D_STENCIL_BACK_MASK 0x00000f58 + +#define NVC0_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c + +#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84 + +#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88 + +#define NVC0_3D_COLOR_MASK_COMMON 0x00000f90 + +#define NVC0_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0)) +#define NVC0_3D_DEPTH_BOUNDS__ESIZE 0x00000004 +#define NVC0_3D_DEPTH_BOUNDS__LEN 0x00000002 + +#define NVC0_3D_RT_SEPARATE_FRAG_DATA 0x00000fac + +#define NVC0_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0)) +#define NVC0_3D_MSAA_MASK__ESIZE 0x00000004 +#define NVC0_3D_MSAA_MASK__LEN 0x00000004 + +#define NVC0_3D_CLIPID_ADDRESS_HIGH 0x00000fcc + +#define NVC0_3D_CLIPID_ADDRESS_LOW 0x00000fd0 + +#define NVC0_3D_ZETA_ADDRESS_HIGH 0x00000fe0 + +#define NVC0_3D_ZETA_ADDRESS_LOW 0x00000fe4 + +#define NVC0_3D_ZETA_FORMAT 0x00000fe8 + +#define NVC0_3D_ZETA_TILE_MODE 0x00000fec + +#define NVC0_3D_ZETA_LAYER_STRIDE 0x00000ff0 + +#define NVC0_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0 + +#define NVC0_3D_SCREEN_SCISSOR_VERT 0x00000ff8 +#define NVC0_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000 +#define NVC0_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16 +#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff +#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0 + +#define NVC0_3D_CLEAR_FLAGS 0x000010f8 +#define NVC0_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001 +#define NVC0_3D_CLEAR_FLAGS_UNK4 0x00000010 +#define NVC0_3D_CLEAR_FLAGS_SCISSOR 0x00000100 +#define NVC0_3D_CLEAR_FLAGS_VIEWPORT 0x00001000 + +#define NVC0_3D_VERTEX_ID 0x00001118 + +#define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c +#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff +#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000 + +#define NVC0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0)) +#define NVC0_3D_VTX_ATTR_DATA__ESIZE 0x00000004 +#define NVC0_3D_VTX_ATTR_DATA__LEN 0x00000004 + +#define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0) (0x00001160 + 0x4*(i0)) +#define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE 0x00000004 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN 0x00000020 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK 0x0000001f +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT 0 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST 0x00000040 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK 0x001fff80 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT 7 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__MASK 0x07e00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__SHIFT 21 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32_32 0x00200000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32 0x00400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16_16 0x00600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32 0x00800000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16 0x00a00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8_8 0x01400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16 0x01e00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 0x02400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8 0x02600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8 0x03000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16 0x03600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8 0x03a00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_10_10_10_2 0x06000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK 0x38000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT 27 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM 0x08000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM 0x10000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SINT 0x18000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT 0x20000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_USCALED 0x28000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED 0x30000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT 0x38000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BGRA 0x80000000 + +#define NVC0_3D_RT_CONTROL 0x0000121c +#define NVC0_3D_RT_CONTROL_COUNT__MASK 0x0000000f +#define NVC0_3D_RT_CONTROL_COUNT__SHIFT 0 +#define NVC0_3D_RT_CONTROL_MAP0__MASK 0x00000070 +#define NVC0_3D_RT_CONTROL_MAP0__SHIFT 4 +#define NVC0_3D_RT_CONTROL_MAP1__MASK 0x00000380 +#define NVC0_3D_RT_CONTROL_MAP1__SHIFT 7 +#define NVC0_3D_RT_CONTROL_MAP2__MASK 0x00001c00 +#define NVC0_3D_RT_CONTROL_MAP2__SHIFT 10 +#define NVC0_3D_RT_CONTROL_MAP3__MASK 0x0000e000 +#define NVC0_3D_RT_CONTROL_MAP3__SHIFT 13 +#define NVC0_3D_RT_CONTROL_MAP4__MASK 0x00070000 +#define NVC0_3D_RT_CONTROL_MAP4__SHIFT 16 +#define NVC0_3D_RT_CONTROL_MAP5__MASK 0x00380000 +#define NVC0_3D_RT_CONTROL_MAP5__SHIFT 19 +#define NVC0_3D_RT_CONTROL_MAP6__MASK 0x01c00000 +#define NVC0_3D_RT_CONTROL_MAP6__SHIFT 22 +#define NVC0_3D_RT_CONTROL_MAP7__MASK 0x0e000000 +#define NVC0_3D_RT_CONTROL_MAP7__SHIFT 25 + +#define NVC0_3D_ZETA_HORIZ 0x00001228 + +#define NVC0_3D_ZETA_VERT 0x0000122c + +#define NVC0_3D_ZETA_ARRAY_MODE 0x00001230 +#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0 +#define NVC0_3D_ZETA_ARRAY_MODE_UNK 0x00010000 + +#define NVC0_3D_LINKED_TSC 0x00001234 + +#define NVC0_3D_DRAW_TFB_BYTES 0x0000123c + +#define NVC0_3D_FP_RESULT_COUNT 0x00001298 + +#define NVC0_3D_DEPTH_TEST_ENABLE 0x000012cc + +#define NVC0_3D_D3D_FILL_MODE 0x000012d0 +#define NVC0_3D_D3D_FILL_MODE_POINT 0x00000001 +#define NVC0_3D_D3D_FILL_MODE_WIREFRAME 0x00000002 +#define NVC0_3D_D3D_FILL_MODE_SOLID 0x00000003 + +#define NVC0_3D_SHADE_MODEL 0x000012d4 +#define NVC0_3D_SHADE_MODEL_FLAT 0x00001d00 +#define NVC0_3D_SHADE_MODEL_SMOOTH 0x00001d01 + +#define NVC0_3D_BLEND_INDEPENDENT 0x000012e4 + +#define NVC0_3D_DEPTH_WRITE_ENABLE 0x000012e8 + +#define NVC0_3D_ALPHA_TEST_ENABLE 0x000012ec + +#define NVC0_3D_VB_ELEMENT_U8_SETUP 0x00001300 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff +#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0 + +#define NVC0_3D_VB_ELEMENT_U8 0x00001304 +#define NVC0_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff +#define NVC0_3D_VB_ELEMENT_U8_I0__SHIFT 0 +#define NVC0_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00 +#define NVC0_3D_VB_ELEMENT_U8_I1__SHIFT 8 +#define NVC0_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000 +#define NVC0_3D_VB_ELEMENT_U8_I2__SHIFT 16 +#define NVC0_3D_VB_ELEMENT_U8_I3__MASK 0xff000000 +#define NVC0_3D_VB_ELEMENT_U8_I3__SHIFT 24 + +#define NVC0_3D_D3D_CULL_MODE 0x00001308 +#define NVC0_3D_D3D_CULL_MODE_NONE 0x00000001 +#define NVC0_3D_D3D_CULL_MODE_FRONT 0x00000002 +#define NVC0_3D_D3D_CULL_MODE_BACK 0x00000003 + +#define NVC0_3D_DEPTH_TEST_FUNC 0x0000130c +#define NVC0_3D_DEPTH_TEST_FUNC_NEVER 0x00000200 +#define NVC0_3D_DEPTH_TEST_FUNC_LESS 0x00000201 +#define NVC0_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202 +#define NVC0_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_DEPTH_TEST_FUNC_GREATER 0x00000204 +#define NVC0_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_ALPHA_TEST_REF 0x00001310 + +#define NVC0_3D_ALPHA_TEST_FUNC 0x00001314 +#define NVC0_3D_ALPHA_TEST_FUNC_NEVER 0x00000200 +#define NVC0_3D_ALPHA_TEST_FUNC_LESS 0x00000201 +#define NVC0_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202 +#define NVC0_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NVC0_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_DRAW_TFB_STRIDE 0x00001318 +#define NVC0_3D_DRAW_TFB_STRIDE__MIN 0x00000001 +#define NVC0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff + +#define NVC0_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0)) +#define NVC0_3D_BLEND_COLOR__ESIZE 0x00000004 +#define NVC0_3D_BLEND_COLOR__LEN 0x00000004 + +#define NVC0_3D_TSC_FLUSH 0x00001330 +#define NVC0_3D_TSC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_3D_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_3D_TIC_FLUSH 0x00001334 +#define NVC0_3D_TIC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_3D_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_3D_TEX_CACHE_CTL 0x00001338 +#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030 +#define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT 4 + +#define NVC0_3D_BLEND_SEPARATE_ALPHA 0x0000133c + +#define NVC0_3D_BLEND_EQUATION_RGB 0x00001340 +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NVC0_3D_BLEND_EQUATION_RGB_MIN 0x00008007 +#define NVC0_3D_BLEND_EQUATION_RGB_MAX 0x00008008 +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_BLEND_FUNC_SRC_RGB 0x00001344 + +#define NVC0_3D_BLEND_FUNC_DST_RGB 0x00001348 + +#define NVC0_3D_BLEND_EQUATION_ALPHA 0x0000134c +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NVC0_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NVC0_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_BLEND_FUNC_SRC_ALPHA 0x00001350 + +#define NVC0_3D_BLEND_FUNC_DST_ALPHA 0x00001358 + +#define NVC0_3D_BLEND_ENABLE_COMMON 0x0000135c + +#define NVC0_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0)) +#define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004 +#define NVC0_3D_BLEND_ENABLE__LEN 0x00000008 + +#define NVC0_3D_STENCIL_ENABLE 0x00001380 + +#define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394 + +#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x00001398 + +#define NVC0_3D_STENCIL_FRONT_MASK 0x0000139c + +#define NVC0_3D_DRAW_TFB_BASE 0x000013a4 + +#define NVC0_3D_FRAG_COLOR_CLAMP_EN 0x000013a8 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000 + +#define NVC0_3D_SCREEN_Y_CONTROL 0x000013ac +#define NVC0_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001 +#define NVC0_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010 + +#define NVC0_3D_LINE_WIDTH_SMOOTH 0x000013b0 + +#define NVC0_3D_LINE_WIDTH_ALIASED 0x000013b4 + +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420 +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001 +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400 + +#define NVC0_3D_VERTEX_ARRAY_FLUSH 0x0000142c + +#define NVC0_3D_VB_ELEMENT_BASE 0x00001434 + +#define NVC0_3D_VB_INSTANCE_BASE 0x00001438 + +#define NVC0_3D_CODE_CB_FLUSH 0x00001440 + +#define NVC0_3D_CLIPID_HEIGHT 0x00001504 +#define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000 + +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ 0x00001508 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16 + +#define NVC0_3D_CLIPID_FILL_RECT_VERT 0x0000150c +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16 + +#define NVC0_3D_CLIP_DISTANCE_ENABLE 0x00001510 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_0 0x00000001 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_1 0x00000002 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_2 0x00000004 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_3 0x00000008 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_4 0x00000010 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_5 0x00000020 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_6 0x00000040 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_7 0x00000080 + +#define NVC0_3D_SAMPLECNT_ENABLE 0x00001514 + +#define NVC0_3D_POINT_SIZE 0x00001518 + +#define NVC0_3D_ZCULL_STATCTRS_ENABLE 0x0000151c + +#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520 + +#define NVC0_3D_COUNTER_RESET 0x00001530 +#define NVC0_3D_COUNTER_RESET_SAMPLECNT 0x00000001 +#define NVC0_3D_COUNTER_RESET_UNK02 0x00000002 +#define NVC0_3D_COUNTER_RESET_UNK03 0x00000003 +#define NVC0_3D_COUNTER_RESET_UNK04 0x00000004 +#define NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES 0x00000010 +#define NVC0_3D_COUNTER_RESET_UNK11 0x00000011 +#define NVC0_3D_COUNTER_RESET_UNK12 0x00000012 +#define NVC0_3D_COUNTER_RESET_UNK13 0x00000013 +#define NVC0_3D_COUNTER_RESET_UNK15 0x00000015 +#define NVC0_3D_COUNTER_RESET_UNK16 0x00000016 +#define NVC0_3D_COUNTER_RESET_UNK17 0x00000017 +#define NVC0_3D_COUNTER_RESET_UNK18 0x00000018 +#define NVC0_3D_COUNTER_RESET_UNK1A 0x0000001a +#define NVC0_3D_COUNTER_RESET_UNK1B 0x0000001b +#define NVC0_3D_COUNTER_RESET_UNK1C 0x0000001c +#define NVC0_3D_COUNTER_RESET_UNK1D 0x0000001d +#define NVC0_3D_COUNTER_RESET_UNK1E 0x0000001e +#define NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x0000001f + +#define NVC0_3D_MULTISAMPLE_ENABLE 0x00001534 + +#define NVC0_3D_ZETA_ENABLE 0x00001538 + +#define NVC0_3D_MULTISAMPLE_CTRL 0x0000153c +#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001 +#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010 + +#define NVC0_3D_COND_ADDRESS_HIGH 0x00001550 + +#define NVC0_3D_COND_ADDRESS_LOW 0x00001554 + +#define NVC0_3D_COND_MODE 0x00001558 +#define NVC0_3D_COND_MODE_NEVER 0x00000000 +#define NVC0_3D_COND_MODE_ALWAYS 0x00000001 +#define NVC0_3D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NVC0_3D_COND_MODE_EQUAL 0x00000003 +#define NVC0_3D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NVC0_3D_TSC_ADDRESS_HIGH 0x0000155c + +#define NVC0_3D_TSC_ADDRESS_LOW 0x00001560 +#define NVC0_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020 + +#define NVC0_3D_TSC_LIMIT 0x00001564 +#define NVC0_3D_TSC_LIMIT__MAX 0x00001fff + +#define NVC0_3D_POLYGON_OFFSET_FACTOR 0x0000156c + +#define NVC0_3D_LINE_SMOOTH_ENABLE 0x00001570 + +#define NVC0_3D_TIC_ADDRESS_HIGH 0x00001574 + +#define NVC0_3D_TIC_ADDRESS_LOW 0x00001578 + +#define NVC0_3D_TIC_LIMIT 0x0000157c + +#define NVC0_3D_ZCULL_REGION 0x00001590 + +#define NVC0_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594 + +#define NVC0_3D_STENCIL_BACK_OP_FAIL 0x00001598 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_BACK_OP_ZPASS 0x000015a0 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_CSAA_ENABLE 0x000015b4 + +#define NVC0_3D_FRAMEBUFFER_SRGB 0x000015b8 + +#define NVC0_3D_POLYGON_OFFSET_UNITS 0x000015bc + +#define NVC0_3D_LAYER 0x000015cc +#define NVC0_3D_LAYER_IDX__MASK 0x0000ffff +#define NVC0_3D_LAYER_IDX__SHIFT 0 +#define NVC0_3D_LAYER_USE_GP 0x00010000 + +#define NVC0_3D_MULTISAMPLE_MODE 0x000015d0 +#define NVC0_3D_MULTISAMPLE_MODE_MS1 0x00000000 +#define NVC0_3D_MULTISAMPLE_MODE_MS2 0x00000001 +#define NVC0_3D_MULTISAMPLE_MODE_MS4 0x00000002 +#define NVC0_3D_MULTISAMPLE_MODE_MS8 0x00000003 +#define NVC0_3D_MULTISAMPLE_MODE_MS8_ALT 0x00000004 +#define NVC0_3D_MULTISAMPLE_MODE_MS2_ALT 0x00000005 +#define NVC0_3D_MULTISAMPLE_MODE_UNK6 0x00000006 +#define NVC0_3D_MULTISAMPLE_MODE_MS4_CS4 0x00000008 +#define NVC0_3D_MULTISAMPLE_MODE_MS4_CS12 0x00000009 +#define NVC0_3D_MULTISAMPLE_MODE_MS8_CS8 0x0000000a +#define NVC0_3D_MULTISAMPLE_MODE_MS8_CS24 0x0000000b + +#define NVC0_3D_VERTEX_BEGIN_D3D 0x000015d4 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NVC0_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000 + +#define NVC0_3D_VERTEX_END_D3D 0x000015d8 +#define NVC0_3D_VERTEX_END_D3D_UNK0 0x00000001 +#define NVC0_3D_VERTEX_END_D3D_UNK1 0x00000002 + +#define NVC0_3D_EDGEFLAG 0x000015e4 + +#define NVC0_3D_VB_ELEMENT_U32 0x000015e8 + +#define NVC0_3D_VB_ELEMENT_U16_SETUP 0x000015ec +#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff +#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0 + +#define NVC0_3D_VB_ELEMENT_U16 0x000015f0 +#define NVC0_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff +#define NVC0_3D_VB_ELEMENT_U16_I0__SHIFT 0 +#define NVC0_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000 +#define NVC0_3D_VB_ELEMENT_U16_I1__SHIFT 16 + +#define NVC0_3D_VERTEX_BASE_HIGH 0x000015f4 + +#define NVC0_3D_VERTEX_BASE_LOW 0x000015f8 + +#define NVC0_3D_ZCULL_WINDOW_OFFSET_X 0x000015fc + +#define NVC0_3D_ZCULL_WINDOW_OFFSET_Y 0x00001600 + +#define NVC0_3D_POINT_COORD_REPLACE 0x00001604 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__MASK 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__SHIFT 2 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT 0x00000000 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__MASK 0x000007f8 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__SHIFT 3 + +#define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608 + +#define NVC0_3D_CODE_ADDRESS_LOW 0x0000160c + +#define NVC0_3D_VERTEX_END_GL 0x00001614 +#define NVC0_3D_VERTEX_END_GL_UNK0 0x00000001 +#define NVC0_3D_VERTEX_END_GL_UNK1 0x00000002 + +#define NVC0_3D_VERTEX_BEGIN_GL 0x00001618 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES 0x0000000e +#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x04000000 +#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x08000000 + +#define NVC0_3D_VERTEX_ID_REPLACE 0x0000161c +#define NVC0_3D_VERTEX_ID_REPLACE_ENABLE 0x00000001 +#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE__MASK 0x00000ff0 +#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT 4 + +#define NVC0_3D_VERTEX_DATA 0x00001640 + +#define NVC0_3D_PRIM_RESTART_ENABLE 0x00001644 + +#define NVC0_3D_PRIM_RESTART_INDEX 0x00001648 + +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000 + +#define NVC0_3D_POINT_SMOOTH_ENABLE 0x00001658 + +#define NVC0_3D_POINT_RASTER_RULES 0x0000165c +#define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000 +#define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001 + +#define NVC0_3D_TEX_MISC 0x00001664 +#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004 + +#define NVC0_3D_LINE_STIPPLE_ENABLE 0x0000166c + +#define NVC0_3D_LINE_STIPPLE_PATTERN 0x00001680 + +#define NVC0_3D_PROVOKING_VERTEX_LAST 0x00001684 + +#define NVC0_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688 + +#define NVC0_3D_POLYGON_STIPPLE_ENABLE 0x0000168c + +#define NVC0_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0)) +#define NVC0_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004 +#define NVC0_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020 + +#define NVC0_3D_ZETA_BASE_LAYER 0x0000179c + +#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_HIGH 0x000017bc + +#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_LOW 0x000017c0 + +#define NVC0_3D_VERTEX_QUARANTINE_SIZE 0x000017c4 +#define NVC0_3D_VERTEX_QUARANTINE_SIZE_16K 0x00000001 +#define NVC0_3D_VERTEX_QUARANTINE_SIZE_32K 0x00000002 +#define NVC0_3D_VERTEX_QUARANTINE_SIZE_64K 0x00000003 + +#define NVC0_3D_STRMOUT_UNK1780(i0) (0x00001780 + 0x4*(i0)) +#define NVC0_3D_STRMOUT_UNK1780__ESIZE 0x00000004 +#define NVC0_3D_STRMOUT_UNK1780__LEN 0x00000004 + +#define NVC0_3D_UNK17BC_ADDRESS_HIGH 0x000017bc + +#define NVC0_3D_UNK17BC_ADDRESS_LOW 0x000017c0 + +#define NVC0_3D_UNK17BC_LIMIT 0x000017c4 + +#define NVC0_3D_INDEX_ARRAY_START_HIGH 0x000017c8 + +#define NVC0_3D_INDEX_ARRAY_START_LOW 0x000017cc + +#define NVC0_3D_INDEX_ARRAY_LIMIT_HIGH 0x000017d0 + +#define NVC0_3D_INDEX_ARRAY_LIMIT_LOW 0x000017d4 + +#define NVC0_3D_INDEX_LOG2_SIZE 0x000017d8 + +#define NVC0_3D_INDEX_BATCH_FIRST 0x000017dc + +#define NVC0_3D_INDEX_BATCH_COUNT 0x000017e0 + +#define NVC0_3D_POLYGON_OFFSET_CLAMP 0x0000187c + +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001880 + 0x4*(i0)) +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004 +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000020 + +#define NVC0_3D_VP_POINT_SIZE_EN 0x00001910 + +#define NVC0_3D_CULL_FACE_ENABLE 0x00001918 + +#define NVC0_3D_FRONT_FACE 0x0000191c +#define NVC0_3D_FRONT_FACE_CW 0x00000900 +#define NVC0_3D_FRONT_FACE_CCW 0x00000901 + +#define NVC0_3D_CULL_FACE 0x00001920 +#define NVC0_3D_CULL_FACE_FRONT 0x00000404 +#define NVC0_3D_CULL_FACE_BACK 0x00000405 +#define NVC0_3D_CULL_FACE_FRONT_AND_BACK 0x00000408 + +#define NVC0_3D_LINE_LAST_PIXEL 0x00001924 + +#define NVC0_3D_VIEWPORT_TRANSFORM_EN 0x0000192c + +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1 0x00000001 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK 0x00000006 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT 1 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 0x00000002 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK2 0x00000004 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000 + +#define NVC0_3D_CLIP_DISTANCE_MODE 0x00001940 +#define NVC0_3D_CLIP_DISTANCE_MODE_0__MASK 0x00000001 +#define NVC0_3D_CLIP_DISTANCE_MODE_0__SHIFT 0 +#define NVC0_3D_CLIP_DISTANCE_MODE_0_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_0_CULL 0x00000001 +#define NVC0_3D_CLIP_DISTANCE_MODE_1__MASK 0x00000010 +#define NVC0_3D_CLIP_DISTANCE_MODE_1__SHIFT 4 +#define NVC0_3D_CLIP_DISTANCE_MODE_1_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_1_CULL 0x00000010 +#define NVC0_3D_CLIP_DISTANCE_MODE_2__MASK 0x00000100 +#define NVC0_3D_CLIP_DISTANCE_MODE_2__SHIFT 8 +#define NVC0_3D_CLIP_DISTANCE_MODE_2_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_2_CULL 0x00000100 +#define NVC0_3D_CLIP_DISTANCE_MODE_3__MASK 0x00001000 +#define NVC0_3D_CLIP_DISTANCE_MODE_3__SHIFT 12 +#define NVC0_3D_CLIP_DISTANCE_MODE_3_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_3_CULL 0x00001000 +#define NVC0_3D_CLIP_DISTANCE_MODE_4__MASK 0x00010000 +#define NVC0_3D_CLIP_DISTANCE_MODE_4__SHIFT 16 +#define NVC0_3D_CLIP_DISTANCE_MODE_4_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_4_CULL 0x00010000 +#define NVC0_3D_CLIP_DISTANCE_MODE_5__MASK 0x00100000 +#define NVC0_3D_CLIP_DISTANCE_MODE_5__SHIFT 20 +#define NVC0_3D_CLIP_DISTANCE_MODE_5_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_5_CULL 0x00100000 +#define NVC0_3D_CLIP_DISTANCE_MODE_6__MASK 0x01000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_6__SHIFT 24 +#define NVC0_3D_CLIP_DISTANCE_MODE_6_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_6_CULL 0x01000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_7__MASK 0x10000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_7__SHIFT 28 +#define NVC0_3D_CLIP_DISTANCE_MODE_7_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_7_CULL 0x10000000 + +#define NVC0_3D_CLIP_RECTS_EN 0x0000194c + +#define NVC0_3D_CLIP_RECTS_MODE 0x00001950 +#define NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000 +#define NVC0_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001 +#define NVC0_3D_CLIP_RECTS_MODE_NEVER 0x00000002 + +#define NVC0_3D_ZCULL_INVALIDATE 0x00001958 + +#define NVC0_3D_ZCULL_TEST_MASK 0x0000196c +#define NVC0_3D_ZCULL_TEST_MASK_FAIL_GT_PASS_LT 0x00000001 +#define NVC0_3D_ZCULL_TEST_MASK_PASS_GT_FAIL_LT 0x00000010 + +#define NVC0_3D_FP_ZORDER_CTRL 0x0000196c +#define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001 +#define NVC0_3D_FP_ZORDER_CTRL_1 0x00000010 + +#define NVC0_3D_CLIPID_ENABLE 0x0000197c + +#define NVC0_3D_CLIPID_WIDTH 0x00001980 +#define NVC0_3D_CLIPID_WIDTH__MAX 0x00002000 +#define NVC0_3D_CLIPID_WIDTH__ALIGN 0x00000040 + +#define NVC0_3D_CLIPID_ID 0x00001984 + +#define NVC0_3D_DEPTH_BOUNDS_EN 0x000019bc + +#define NVC0_3D_LOGIC_OP_ENABLE 0x000019c4 + +#define NVC0_3D_LOGIC_OP 0x000019c8 +#define NVC0_3D_LOGIC_OP_CLEAR 0x00001500 +#define NVC0_3D_LOGIC_OP_AND 0x00001501 +#define NVC0_3D_LOGIC_OP_AND_REVERSE 0x00001502 +#define NVC0_3D_LOGIC_OP_COPY 0x00001503 +#define NVC0_3D_LOGIC_OP_AND_INVERTED 0x00001504 +#define NVC0_3D_LOGIC_OP_NOOP 0x00001505 +#define NVC0_3D_LOGIC_OP_XOR 0x00001506 +#define NVC0_3D_LOGIC_OP_OR 0x00001507 +#define NVC0_3D_LOGIC_OP_NOR 0x00001508 +#define NVC0_3D_LOGIC_OP_EQUIV 0x00001509 +#define NVC0_3D_LOGIC_OP_INVERT 0x0000150a +#define NVC0_3D_LOGIC_OP_OR_REVERSE 0x0000150b +#define NVC0_3D_LOGIC_OP_COPY_INVERTED 0x0000150c +#define NVC0_3D_LOGIC_OP_OR_INVERTED 0x0000150d +#define NVC0_3D_LOGIC_OP_NAND 0x0000150e +#define NVC0_3D_LOGIC_OP_SET 0x0000150f + +#define NVC0_3D_ZETA_COMP_ENABLE 0x000019cc + +#define NVC0_3D_CLEAR_BUFFERS 0x000019d0 +#define NVC0_3D_CLEAR_BUFFERS_Z 0x00000001 +#define NVC0_3D_CLEAR_BUFFERS_S 0x00000002 +#define NVC0_3D_CLEAR_BUFFERS_R 0x00000004 +#define NVC0_3D_CLEAR_BUFFERS_G 0x00000008 +#define NVC0_3D_CLEAR_BUFFERS_B 0x00000010 +#define NVC0_3D_CLEAR_BUFFERS_A 0x00000020 +#define NVC0_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0 +#define NVC0_3D_CLEAR_BUFFERS_RT__SHIFT 6 +#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00 +#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10 + +#define NVC0_3D_CLIPID_FILL 0x000019d4 + +#define NVC0_3D_RT_COMP_ENABLE(i0) (0x000019e0 + 0x4*(i0)) +#define NVC0_3D_RT_COMP_ENABLE__ESIZE 0x00000004 +#define NVC0_3D_RT_COMP_ENABLE__LEN 0x00000008 + +#define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0)) +#define NVC0_3D_COLOR_MASK__ESIZE 0x00000004 +#define NVC0_3D_COLOR_MASK__LEN 0x00000008 +#define NVC0_3D_COLOR_MASK_R 0x0000000f +#define NVC0_3D_COLOR_MASK_G 0x000000f0 +#define NVC0_3D_COLOR_MASK_B 0x00000f00 +#define NVC0_3D_COLOR_MASK_A 0x0000f000 + +#define NVC0_3D_QUERY_ADDRESS_HIGH 0x00001b00 + +#define NVC0_3D_QUERY_ADDRESS_LOW 0x00001b04 + +#define NVC0_3D_QUERY_SEQUENCE 0x00001b08 + +#define NVC0_3D_QUERY_GET 0x00001b0c +#define NVC0_3D_QUERY_GET_MODE__MASK 0x00000003 +#define NVC0_3D_QUERY_GET_MODE__SHIFT 0 +#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000 +#define NVC0_3D_QUERY_GET_MODE_SYNC 0x00000001 +#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002 +#define NVC0_3D_QUERY_GET_FENCE 0x00000010 +#define NVC0_3D_QUERY_GET_STREAM__MASK 0x000000e0 +#define NVC0_3D_QUERY_GET_STREAM__SHIFT 5 +#define NVC0_3D_QUERY_GET_UNK8 0x00000100 +#define NVC0_3D_QUERY_GET_UNIT__MASK 0x0000f000 +#define NVC0_3D_QUERY_GET_UNIT__SHIFT 12 +#define NVC0_3D_QUERY_GET_SYNC_COND__MASK 0x00010000 +#define NVC0_3D_QUERY_GET_SYNC_COND__SHIFT 16 +#define NVC0_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000 +#define NVC0_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000 +#define NVC0_3D_QUERY_GET_INTR 0x00100000 +#define NVC0_3D_QUERY_GET_UNK21 0x00200000 +#define NVC0_3D_QUERY_GET_SELECT__MASK 0x0f800000 +#define NVC0_3D_QUERY_GET_SELECT__SHIFT 23 +#define NVC0_3D_QUERY_GET_SELECT_ZERO 0x00000000 +#define NVC0_3D_QUERY_GET_SELECT_SAMPLECNT 0x01000000 +#define NVC0_3D_QUERY_GET_SELECT_EMITTED_PRIMS 0x05800000 +#define NVC0_3D_QUERY_GET_SELECT_GENERATED_PRIMS 0x09000000 +#define NVC0_3D_QUERY_GET_SHORT 0x10000000 + +#define NVC0_3D_VERTEX_ARRAY_FETCH(i0) (0x00001c00 + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_FETCH__LEN 0x00000020 +#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff +#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0 +#define NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE 0x00001000 + +#define NVC0_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00001c04 + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000020 + +#define NVC0_3D_VERTEX_ARRAY_START_LOW(i0) (0x00001c08 + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000020 + +#define NVC0_3D_VERTEX_ARRAY_DIVISOR(i0) (0x00001c0c + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000020 + +#define NVC0_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0)) +#define NVC0_3D_IBLEND__ESIZE 0x00000020 +#define NVC0_3D_IBLEND__LEN 0x00000008 + +#define NVC0_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0)) +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NVC0_3D_IBLEND_EQUATION_RGB_MIN 0x00008007 +#define NVC0_3D_IBLEND_EQUATION_RGB_MAX 0x00008008 +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0)) + +#define NVC0_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0)) + +#define NVC0_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0)) +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0)) + +#define NVC0_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0)) + +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001f00 + 0x8*(i0)) +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000020 + +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001f04 + 0x8*(i0)) +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000020 + +#define NVC0_3D_SP(i0) (0x00002000 + 0x40*(i0)) +#define NVC0_3D_SP__ESIZE 0x00000040 +#define NVC0_3D_SP__LEN 0x00000006 + +#define NVC0_3D_SP_SELECT(i0) (0x00002000 + 0x40*(i0)) +#define NVC0_3D_SP_SELECT_ENABLE 0x00000001 +#define NVC0_3D_SP_SELECT_PROGRAM__MASK 0x00000070 +#define NVC0_3D_SP_SELECT_PROGRAM__SHIFT 4 +#define NVC0_3D_SP_SELECT_PROGRAM_VP_A 0x00000000 +#define NVC0_3D_SP_SELECT_PROGRAM_VP_B 0x00000010 +#define NVC0_3D_SP_SELECT_PROGRAM_TCP 0x00000020 +#define NVC0_3D_SP_SELECT_PROGRAM_TEP 0x00000030 +#define NVC0_3D_SP_SELECT_PROGRAM_GP 0x00000040 +#define NVC0_3D_SP_SELECT_PROGRAM_FP 0x00000050 + +#define NVC0_3D_SP_START_ID(i0) (0x00002004 + 0x40*(i0)) + +#define NVC0_3D_SP_GPR_ALLOC(i0) (0x0000200c + 0x40*(i0)) + +#define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0)) +#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010 +#define NVC0_3D_TEX_LIMITS__LEN 0x00000005 + +#define NVC0_3D_FIRMWARE(i0) (0x00002300 + 0x4*(i0)) +#define NVC0_3D_FIRMWARE__ESIZE 0x00000004 +#define NVC0_3D_FIRMWARE__LEN 0x00000020 + +#define NVC0_3D_CB_SIZE 0x00002380 + +#define NVC0_3D_CB_ADDRESS_HIGH 0x00002384 + +#define NVC0_3D_CB_ADDRESS_LOW 0x00002388 + +#define NVC0_3D_CB_POS 0x0000238c + +#define NVC0_3D_CB_DATA(i0) (0x00002390 + 0x4*(i0)) +#define NVC0_3D_CB_DATA__ESIZE 0x00000004 +#define NVC0_3D_CB_DATA__LEN 0x00000010 + +#define NVC0_3D_BIND_TSC(i0) (0x00002400 + 0x20*(i0)) +#define NVC0_3D_BIND_TSC__ESIZE 0x00000020 +#define NVC0_3D_BIND_TSC__LEN 0x00000005 +#define NVC0_3D_BIND_TSC_ACTIVE 0x00000001 +#define NVC0_3D_BIND_TSC_SAMPLER__MASK 0x00000ff0 +#define NVC0_3D_BIND_TSC_SAMPLER__SHIFT 4 +#define NVC0_3D_BIND_TSC_TSC__MASK 0x01fff000 +#define NVC0_3D_BIND_TSC_TSC__SHIFT 12 + +#define NVC0_3D_BIND_TIC(i0) (0x00002404 + 0x20*(i0)) +#define NVC0_3D_BIND_TIC__ESIZE 0x00000020 +#define NVC0_3D_BIND_TIC__LEN 0x00000005 +#define NVC0_3D_BIND_TIC_ACTIVE 0x00000001 +#define NVC0_3D_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NVC0_3D_BIND_TIC_TEXTURE__SHIFT 1 +#define NVC0_3D_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NVC0_3D_BIND_TIC_TIC__SHIFT 9 + +#define NVC0_3D_CB_BIND(i0) (0x00002410 + 0x20*(i0)) +#define NVC0_3D_CB_BIND__ESIZE 0x00000020 +#define NVC0_3D_CB_BIND__LEN 0x00000005 +#define NVC0_3D_CB_BIND_VALID 0x00000001 +#define NVC0_3D_CB_BIND_INDEX__MASK 0x000000f0 +#define NVC0_3D_CB_BIND_INDEX__SHIFT 4 + +#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600 + +#define NVE4_3D_TEX_CB_INDEX 0x00002608 +#define NVE4_3D_TEX_CB_INDEX__MIN 0x00000000 +#define NVE4_3D_TEX_CB_INDEX__MAX 0x00000010 + +#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1)) +#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 +#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020 + +#define NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE 0x00003800 + +#define NVC0_3D_MACRO_VERTEX_ARRAY_SELECT 0x00003808 + +#define NVC0_3D_MACRO_BLEND_ENABLES 0x00003810 + +#define NVC0_3D_MACRO_POLYGON_MODE_FRONT 0x00003818 +#define NVC0_3D_MACRO_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NVC0_3D_MACRO_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NVC0_3D_MACRO_POLYGON_MODE_FRONT_FILL 0x00001b02 + +#define NVC0_3D_MACRO_POLYGON_MODE_BACK 0x00003820 +#define NVC0_3D_MACRO_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NVC0_3D_MACRO_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NVC0_3D_MACRO_POLYGON_MODE_BACK_FILL 0x00001b02 + +#define NVC0_3D_MACRO_GP_SELECT 0x00003828 + +#define NVC0_3D_MACRO_TEP_SELECT 0x00003830 + + +#endif /* NVC0_3D_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h new file mode 100644 index 00000000000..84b152213a2 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h @@ -0,0 +1,98 @@ +#ifndef NV_3DDEFS_XML +#define NV_3DDEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38) +- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28) +- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20) +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000 +#define NV50_3D_BLEND_FACTOR_ONE 0x00004001 +#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303 +#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305 +#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308 +#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002 +#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004 +#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901 +#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903 + +#endif /* NV_3DDEFS_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c new file mode 100644 index 00000000000..b49f1aecfec --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -0,0 +1,271 @@ +/* + * Copyright 2013 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christoph Bumiller, Samuel Pitoiset + */ + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_compute.h" + +int +nvc0_screen_compute_setup(struct nvc0_screen *screen, + struct nouveau_pushbuf *push) +{ + struct nouveau_object *chan = screen->base.channel; + struct nouveau_device *dev = screen->base.device; + uint32_t obj_class; + int ret; + int i; + + switch (dev->chipset & 0xf0) { + case 0xc0: + if (dev->chipset == 0xc8) + obj_class = NVC8_COMPUTE_CLASS; + else + obj_class = NVC0_COMPUTE_CLASS; + break; + case 0xd0: + obj_class = NVC0_COMPUTE_CLASS; + break; + default: + NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); + return -1; + } + + ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0, + &screen->compute); + if (ret) { + NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); + return ret; + } + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL, + &screen->parm); + if (ret) + return ret; + + BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->compute->oclass); + + /* hardware limit */ + BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1); + PUSH_DATA (push, screen->mp_count); + BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1); + PUSH_DATA (push, 0xf); + + BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1); + PUSH_DATA (push, 0x8000); + + /* global memory setup */ + BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + PUSH_DATA (push, 0); + BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100); + for (i = 0; i <= 0xff; i++) + PUSH_DATA (push, (0xc << 28) | (i << 16) | i); + BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + PUSH_DATA (push, 1); + + /* local memory and cstack setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->tls->offset); + PUSH_DATA (push, screen->tls->offset); + BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2); + PUSH_DATAh(push, screen->tls->size); + PUSH_DATA (push, screen->tls->size); + BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1); + PUSH_DATA (push, 1 << 24); + + /* shared memory setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1); + PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); + BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1); + PUSH_DATA (push, 2 << 24); + BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1); + PUSH_DATA (push, 0); + + /* code segment setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->text->offset); + PUSH_DATA (push, screen->text->offset); + + /* bind parameters buffer */ + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + PUSH_DATA (push, screen->parm->size); + PUSH_DATAh(push, screen->parm->offset); + PUSH_DATA (push, screen->parm->offset); + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + PUSH_DATA (push, (0 << 8) | 1); + + /* TODO: textures & samplers */ + + return 0; +} + +boolean +nvc0_compute_validate_program(struct nvc0_context *nvc0) +{ + struct nvc0_program *prog = nvc0->compprog; + + if (prog->mem) + return TRUE; + + if (!prog->translated) { + prog->translated = nvc0_program_translate( + prog, nvc0->screen->base.device->chipset); + if (!prog->translated) + return FALSE; + } + if (unlikely(!prog->code_size)) + return FALSE; + + if (likely(prog->code_size)) { + if (nvc0_program_upload_code(nvc0, prog)) { + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE); + return TRUE; + } + } + return FALSE; +} + +static boolean +nvc0_compute_state_validate(struct nvc0_context *nvc0) +{ + if (!nvc0_compute_validate_program(nvc0)) + return FALSE; + + /* TODO: textures, samplers, surfaces, global memory buffers */ + + nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE); + + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp); + if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf))) + return FALSE; + if (unlikely(nvc0->state.flushed)) + nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE); + + return TRUE; + +} + +static void +nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + struct nvc0_program *cp = nvc0->compprog; + + if (cp->parm_size) { + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + PUSH_DATA (push, align(cp->parm_size, 0x100)); + PUSH_DATAh(push, screen->parm->offset); + PUSH_DATA (push, screen->parm->offset); + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + PUSH_DATA (push, (0 << 8) | 1); + /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */ + BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4); + PUSH_DATA (push, 0); + PUSH_DATAp(push, input, cp->parm_size / 4); + + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); + } +} + +void +nvc0_launch_grid(struct pipe_context *pipe, + const uint *block_layout, const uint *grid_layout, + uint32_t label, + const void *input) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *cp = nvc0->compprog; + unsigned s, i; + int ret; + + ret = !nvc0_compute_state_validate(nvc0); + if (ret) + goto out; + + nvc0_compute_upload_input(nvc0, input); + + BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1); + PUSH_DATA (push, nvc0_program_symbol_offset(cp, label)); + + BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3); + PUSH_DATA (push, align(cp->cp.lmem_size, 0x10)); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */ + + BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3); + PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); + PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]); + PUSH_DATA (push, cp->num_barriers); + BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1); + PUSH_DATA (push, cp->num_gprs); + + /* grid/block setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); + PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]); + PUSH_DATA (push, grid_layout[2]); + BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); + PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]); + PUSH_DATA (push, block_layout[2]); + + /* launch preliminary setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1); + PUSH_DATA (push, 0x1); + BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); + + /* kernel launching */ + BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1); + PUSH_DATA (push, 0x1000); + BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1); + PUSH_DATA (push, 0x1); + + /* rebind all the 3D constant buffers + * (looks like binding a CB on COMPUTE clobbers 3D state) */ + nvc0->dirty |= NVC0_NEW_CONSTBUF; + for (s = 0; s < 6; s++) { + for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++) + if (nvc0->constbuf[s][i].u.buf) + nvc0->constbuf_dirty[s] |= 1 << i; + } + memset(nvc0->state.uniform_buffer_bound, 0, + sizeof(nvc0->state.uniform_buffer_bound)); + +out: + if (ret) + NOUVEAU_ERR("Failed to launch grid !\n"); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h new file mode 100644 index 00000000000..9a1a71760d7 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h @@ -0,0 +1,10 @@ +#ifndef NVC0_COMPUTE_H +#define NVC0_COMPUTE_H + +#include "nv50/nv50_defs.xml.h" +#include "nvc0/nvc0_compute.xml.h" + +boolean +nvc0_compute_validate_program(struct nvc0_context *nvc0); + +#endif /* NVC0_COMPUTE_H */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h new file mode 100644 index 00000000000..35e6bfdbea2 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h @@ -0,0 +1,410 @@ +#ifndef NVC0_COMPUTE_XML +#define NVC0_COMPUTE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_compute.xml ( 11145 bytes, from 2013-04-27 14:00:13) +- copyright.xml ( 6452 bytes, from 2013-02-27 22:13:22) +- nvchipsets.xml ( 3954 bytes, from 2013-04-27 14:00:13) +- nv_object.xml ( 14395 bytes, from 2013-04-27 14:00:13) +- nv_defs.xml ( 4437 bytes, from 2013-02-27 22:13:22) +- nv50_defs.xml ( 16652 bytes, from 2013-06-20 13:45:33) + +Copyright (C) 2006-2013 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_COMPUTE_LOCAL_POS_ALLOC 0x00000204 + +#define NVC0_COMPUTE_LOCAL_NEG_ALLOC 0x00000208 + +#define NVC0_COMPUTE_WARP_CSTACK_SIZE 0x0000020c + +#define NVC0_COMPUTE_TEX_LIMITS 0x00000210 +#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f +#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0 +#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000 +#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004 +#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0 +#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4 +#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000 +#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007 + +#define NVC0_COMPUTE_SHARED_BASE 0x00000214 + +#define NVC0_COMPUTE_MEM_BARRIER 0x0000021c +#define NVC0_COMPUTE_MEM_BARRIER_UNK0 0x00000001 +#define NVC0_COMPUTE_MEM_BARRIER_UNK1 0x00000002 +#define NVC0_COMPUTE_MEM_BARRIER_UNK2 0x00000004 +#define NVC0_COMPUTE_MEM_BARRIER_UNK4 0x00000010 +#define NVC0_COMPUTE_MEM_BARRIER_UNK8 0x00000100 +#define NVC0_COMPUTE_MEM_BARRIER_UNK12 0x00001000 + +#define NVC0_COMPUTE_BIND_TSC 0x00000228 +#define NVC0_COMPUTE_BIND_TSC_ACTIVE 0x00000001 +#define NVC0_COMPUTE_BIND_TSC_SAMPLER__MASK 0x00000ff0 +#define NVC0_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4 +#define NVC0_COMPUTE_BIND_TSC_TSC__MASK 0x01fff000 +#define NVC0_COMPUTE_BIND_TSC_TSC__SHIFT 12 + +#define NVC0_COMPUTE_BIND_TIC 0x0000022c +#define NVC0_COMPUTE_BIND_TIC_ACTIVE 0x00000001 +#define NVC0_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NVC0_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1 +#define NVC0_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NVC0_COMPUTE_BIND_TIC_TIC__SHIFT 9 + +#define NVC0_COMPUTE_BIND_TSC2 0x00000230 +#define NVC0_COMPUTE_BIND_TSC2_ACTIVE 0x00000001 +#define NVC0_COMPUTE_BIND_TSC2_SAMPLER__MASK 0x00000010 +#define NVC0_COMPUTE_BIND_TSC2_SAMPLER__SHIFT 4 +#define NVC0_COMPUTE_BIND_TSC2_TSC__MASK 0x01fff000 +#define NVC0_COMPUTE_BIND_TSC2_TSC__SHIFT 12 + +#define NVC0_COMPUTE_BIND_TIC2 0x00000234 +#define NVC0_COMPUTE_BIND_TIC2_ACTIVE 0x00000001 +#define NVC0_COMPUTE_BIND_TIC2_TEXTURE__MASK 0x00000002 +#define NVC0_COMPUTE_BIND_TIC2_TEXTURE__SHIFT 1 +#define NVC0_COMPUTE_BIND_TIC2_TIC__MASK 0x7ffffe00 +#define NVC0_COMPUTE_BIND_TIC2_TIC__SHIFT 9 + +#define NVC0_COMPUTE_GRIDDIM_YX 0x00000238 +#define NVC0_COMPUTE_GRIDDIM_YX_X__MASK 0x0000ffff +#define NVC0_COMPUTE_GRIDDIM_YX_X__SHIFT 0 +#define NVC0_COMPUTE_GRIDDIM_YX_Y__MASK 0xffff0000 +#define NVC0_COMPUTE_GRIDDIM_YX_Y__SHIFT 16 + +#define NVC0_COMPUTE_GRIDDIM_Z 0x0000023c + +#define NVC0_COMPUTE_UNK244_TIC_FLUSH 0x00000244 + +#define NVC0_COMPUTE_SHARED_SIZE 0x0000024c + +#define NVC0_COMPUTE_THREADS_ALLOC 0x00000250 + +#define NVC0_COMPUTE_BARRIER_ALLOC 0x00000254 + +#define NVC0_COMPUTE_UNK028C 0x0000028c + +#define NVC0_COMPUTE_COMPUTE_BEGIN 0x0000029c +#define NVC0_COMPUTE_COMPUTE_BEGIN_UNK0 0x00000001 + +#define NVC0_COMPUTE_UNK02A0 0x000002a0 + +#define NVC0_COMPUTE_CP_GPR_ALLOC 0x000002c0 + +#define NVC0_COMPUTE_UNK02C4 0x000002c4 + +#define NVC0_COMPUTE_GLOBAL_BASE 0x000002c8 +#define NVC0_COMPUTE_GLOBAL_BASE_HIGH__MASK 0x000000ff +#define NVC0_COMPUTE_GLOBAL_BASE_HIGH__SHIFT 0 +#define NVC0_COMPUTE_GLOBAL_BASE_INDEX__MASK 0x00ff0000 +#define NVC0_COMPUTE_GLOBAL_BASE_INDEX__SHIFT 16 +#define NVC0_COMPUTE_GLOBAL_BASE_READ_OK 0x40000000 +#define NVC0_COMPUTE_GLOBAL_BASE_WRITE_OK 0x80000000 + +#define NVC8_COMPUTE_UNK02E0 0x000002e0 + +#define NVC0_COMPUTE_CACHE_SPLIT 0x00000308 +#define NVC0_COMPUTE_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001 +#define NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003 + +#define NVC0_COMPUTE_UNK030C 0x0000030c + +#define NVC0_COMPUTE_UNK0360 0x00000360 +#define NVC0_COMPUTE_UNK0360_UNK0 0x00000001 +#define NVC0_COMPUTE_UNK0360_UNK8__MASK 0x00000300 +#define NVC0_COMPUTE_UNK0360_UNK8__SHIFT 8 +#define NVC8_COMPUTE_UNK0360_UNK10__MASK 0x00000c00 +#define NVC8_COMPUTE_UNK0360_UNK10__SHIFT 10 + +#define NVC0_COMPUTE_LAUNCH 0x00000368 + +#define NVC0_COMPUTE_UNK036C 0x0000036c +#define NVC0_COMPUTE_UNK036C_UNK0__MASK 0x00000003 +#define NVC0_COMPUTE_UNK036C_UNK0__SHIFT 0 +#define NVC8_COMPUTE_UNK036C_UNK2__MASK 0x0000000c +#define NVC8_COMPUTE_UNK036C_UNK2__SHIFT 2 + +#define NVC0_COMPUTE_BLOCKDIM_YX 0x000003ac +#define NVC0_COMPUTE_BLOCKDIM_YX_X__MASK 0x0000ffff +#define NVC0_COMPUTE_BLOCKDIM_YX_X__SHIFT 0 +#define NVC0_COMPUTE_BLOCKDIM_YX_Y__MASK 0xffff0000 +#define NVC0_COMPUTE_BLOCKDIM_YX_Y__SHIFT 16 + +#define NVC0_COMPUTE_BLOCKDIM_Z 0x000003b0 + +#define NVC0_COMPUTE_CP_START_ID 0x000003b4 + +#define NVC0_COMPUTE_FIRMWARE(i0) (0x00000500 + 0x4*(i0)) +#define NVC0_COMPUTE_FIRMWARE__ESIZE 0x00000004 +#define NVC0_COMPUTE_FIRMWARE__LEN 0x00000020 + +#define NVC0_COMPUTE_MP_LIMIT 0x00000758 + +#define NVC0_COMPUTE_LOCAL_BASE 0x0000077c + +#define NVC0_COMPUTE_GRIDID 0x00000780 + +#define NVC0_COMPUTE_TEMP_ADDRESS_HIGH 0x00000790 + +#define NVC0_COMPUTE_TEMP_ADDRESS_LOW 0x00000794 + +#define NVC0_COMPUTE_TEMP_SIZE_HIGH 0x00000798 + +#define NVC0_COMPUTE_TEMP_SIZE_LOW 0x0000079c + +#define NVC0_COMPUTE_WARP_TEMP_ALLOC 0x000007a0 + +#define NVC0_COMPUTE_COMPUTE_END 0x00000a04 +#define NVC0_COMPUTE_COMPUTE_END_UNK0 0x00000001 + +#define NVC0_COMPUTE_UNK0A08 0x00000a08 + +#define NVC0_COMPUTE_CALL_LIMIT_LOG 0x00000d64 + +#define NVC0_COMPUTE_UNK0D94 0x00000d94 + +#define NVC0_COMPUTE_WATCHDOG_TIMER 0x00000de4 + +#define NVC0_COMPUTE_UNK10F4 0x000010f4 +#define NVC0_COMPUTE_UNK10F4_UNK0 0x00000001 +#define NVC0_COMPUTE_UNK10F4_UNK4 0x00000010 +#define NVC0_COMPUTE_UNK10F4_UNK8 0x00000100 + +#define NVC0_COMPUTE_LINKED_TSC 0x00001234 + +#define NVC0_COMPUTE_UNK1288_TIC_FLUSH 0x00001288 + +#define NVC0_COMPUTE_UNK12AC 0x000012ac + +#define NVC0_COMPUTE_TSC_FLUSH 0x00001330 +#define NVC0_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_COMPUTE_TIC_FLUSH 0x00001334 +#define NVC0_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_COMPUTE_TEX_CACHE_CTL 0x00001338 +#define NVC0_COMPUTE_TEX_CACHE_CTL_UNK0__MASK 0x00000007 +#define NVC0_COMPUTE_TEX_CACHE_CTL_UNK0__SHIFT 0 +#define NVC0_COMPUTE_TEX_CACHE_CTL_ENTRY__MASK 0x03fffff0 +#define NVC0_COMPUTE_TEX_CACHE_CTL_ENTRY__SHIFT 4 + +#define NVC0_COMPUTE_UNK1354 0x00001354 + +#define NVC0_COMPUTE_UNK1424_TSC_FLUSH 0x00001424 + +#define NVC0_COMPUTE_COND_ADDRESS_HIGH 0x00001550 + +#define NVC0_COMPUTE_COND_ADDRESS_LOW 0x00001554 + +#define NVC0_COMPUTE_COND_MODE 0x00001558 +#define NVC0_COMPUTE_COND_MODE_NEVER 0x00000000 +#define NVC0_COMPUTE_COND_MODE_ALWAYS 0x00000001 +#define NVC0_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002 +#define NVC0_COMPUTE_COND_MODE_EQUAL 0x00000003 +#define NVC0_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004 + +#define NVC0_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c + +#define NVC0_COMPUTE_TSC_ADDRESS_LOW 0x00001560 + +#define NVC0_COMPUTE_TSC_LIMIT 0x00001564 + +#define NVC0_COMPUTE_TIC_ADDRESS_HIGH 0x00001574 + +#define NVC0_COMPUTE_TIC_ADDRESS_LOW 0x00001578 + +#define NVC0_COMPUTE_TIC_LIMIT 0x0000157c + +#define NVC0_COMPUTE_CODE_ADDRESS_HIGH 0x00001608 + +#define NVC0_COMPUTE_CODE_ADDRESS_LOW 0x0000160c + +#define NVC0_COMPUTE_TEX_MISC 0x00001664 +#define NVC0_COMPUTE_TEX_MISC_UNK 0x00000001 +#define NVC0_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002 + +#define NVC0_COMPUTE_UNK1690 0x00001690 +#define NVC0_COMPUTE_UNK1690_ALWAYS_DERIV 0x00000001 +#define NVC0_COMPUTE_UNK1690_UNK16 0x00010000 + +#define NVC0_COMPUTE_CB_BIND 0x00001694 +#define NVC0_COMPUTE_CB_BIND_VALID 0x00000001 +#define NVC0_COMPUTE_CB_BIND_INDEX__MASK 0x00001f00 +#define NVC0_COMPUTE_CB_BIND_INDEX__SHIFT 8 + +#define NVC0_COMPUTE_FLUSH 0x00001698 +#define NVC0_COMPUTE_FLUSH_CODE 0x00000001 +#define NVC0_COMPUTE_FLUSH_GLOBAL 0x00000010 +#define NVC0_COMPUTE_FLUSH_UNK8 0x00000100 +#define NVC0_COMPUTE_FLUSH_CB 0x00001000 + +#define NVC0_COMPUTE_UNK1930 0x00001930 + +#define NVC0_COMPUTE_UNK1944 0x00001944 + +#define NVC0_COMPUTE_DELAY 0x00001a24 + +#define NVC0_COMPUTE_UNK1A2C(i0) (0x00001a2c + 0x4*(i0)) +#define NVC0_COMPUTE_UNK1A2C__ESIZE 0x00000004 +#define NVC0_COMPUTE_UNK1A2C__LEN 0x00000005 + +#define NVC0_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00 + +#define NVC0_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04 + +#define NVC0_COMPUTE_QUERY_SEQUENCE 0x00001b08 + +#define NVC0_COMPUTE_QUERY_GET 0x00001b0c +#define NVC0_COMPUTE_QUERY_GET_MODE__MASK 0x00000003 +#define NVC0_COMPUTE_QUERY_GET_MODE__SHIFT 0 +#define NVC0_COMPUTE_QUERY_GET_MODE_WRITE 0x00000000 +#define NVC0_COMPUTE_QUERY_GET_MODE_WRITE_INTR_NRHOST 0x00000003 +#define NVC0_COMPUTE_QUERY_GET_INTR 0x00100000 +#define NVC0_COMPUTE_QUERY_GET_SHORT 0x10000000 + +#define NVC0_COMPUTE_CB_SIZE 0x00002380 + +#define NVC0_COMPUTE_CB_ADDRESS_HIGH 0x00002384 + +#define NVC0_COMPUTE_CB_ADDRESS_LOW 0x00002388 + +#define NVC0_COMPUTE_CB_POS 0x0000238c + +#define NVC0_COMPUTE_CB_DATA(i0) (0x00002390 + 0x4*(i0)) +#define NVC0_COMPUTE_CB_DATA__ESIZE 0x00000004 +#define NVC0_COMPUTE_CB_DATA__LEN 0x00000010 + +#define NVC0_COMPUTE_IMAGE(i0) (0x00002700 + 0x20*(i0)) +#define NVC0_COMPUTE_IMAGE__ESIZE 0x00000020 +#define NVC0_COMPUTE_IMAGE__LEN 0x00000008 + +#define NVC0_COMPUTE_IMAGE_ADDRESS_HIGH(i0) (0x00002700 + 0x20*(i0)) + +#define NVC0_COMPUTE_IMAGE_ADDRESS_LOW(i0) (0x00002704 + 0x20*(i0)) + +#define NVC0_COMPUTE_IMAGE_WIDTH(i0) (0x00002708 + 0x20*(i0)) + +#define NVC0_COMPUTE_IMAGE_HEIGHT(i0) (0x0000270c + 0x20*(i0)) +#define NVC0_COMPUTE_IMAGE_HEIGHT_HEIGHT__MASK 0x0000ffff +#define NVC0_COMPUTE_IMAGE_HEIGHT_HEIGHT__SHIFT 0 +#define NVC0_COMPUTE_IMAGE_HEIGHT_UNK16 0x00010000 +#define NVC0_COMPUTE_IMAGE_HEIGHT_LINEAR 0x00100000 + +#define NVC0_COMPUTE_IMAGE_FORMAT(i0) (0x00002710 + 0x20*(i0)) +#define NVC0_COMPUTE_IMAGE_FORMAT_UNK0 0x00000001 +#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_COLOR__MASK 0x00000ff0 +#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_COLOR__SHIFT 4 +#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_ZETA__MASK 0x0001f000 +#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_ZETA__SHIFT 12 + +#define NVC0_COMPUTE_IMAGE_TILE_MODE(i0) (0x00002714 + 0x20*(i0)) + +#define NVC0_COMPUTE_MP_PM_SET(i0) (0x0000335c + 0x4*(i0)) +#define NVC0_COMPUTE_MP_PM_SET__ESIZE 0x00000004 +#define NVC0_COMPUTE_MP_PM_SET__LEN 0x00000008 + +#define NVC0_COMPUTE_MP_PM_SIGSEL(i0) (0x0000337c + 0x4*(i0)) +#define NVC0_COMPUTE_MP_PM_SIGSEL__ESIZE 0x00000004 +#define NVC0_COMPUTE_MP_PM_SIGSEL__LEN 0x00000008 + +#define NVC0_COMPUTE_MP_PM_SRCSEL(i0) (0x0000339c + 0x4*(i0)) +#define NVC0_COMPUTE_MP_PM_SRCSEL__ESIZE 0x00000004 +#define NVC0_COMPUTE_MP_PM_SRCSEL__LEN 0x00000008 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP0__MASK 0x00000007 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP0__SHIFT 0 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG0__MASK 0x00000070 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG0__SHIFT 4 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP1__MASK 0x00000700 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP1__SHIFT 8 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG1__MASK 0x00007000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG1__SHIFT 12 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP2__MASK 0x00070000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP2__SHIFT 16 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG2__MASK 0x00700000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG2__SHIFT 20 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP3__MASK 0x07000000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP3__SHIFT 24 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG3__MASK 0x70000000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG3__SHIFT 28 + +#define NVC0_COMPUTE_MP_PM_OP(i0) (0x000033bc + 0x4*(i0)) +#define NVC0_COMPUTE_MP_PM_OP__ESIZE 0x00000004 +#define NVC0_COMPUTE_MP_PM_OP__LEN 0x00000008 +#define NVC0_COMPUTE_MP_PM_OP_MODE__MASK 0x00000001 +#define NVC0_COMPUTE_MP_PM_OP_MODE__SHIFT 0 +#define NVC0_COMPUTE_MP_PM_OP_MODE_LOGOP 0x00000000 +#define NVC0_COMPUTE_MP_PM_OP_MODE_LOGOP_PULSE 0x00000001 +#define NVC0_COMPUTE_MP_PM_OP_FUNC__MASK 0x000ffff0 +#define NVC0_COMPUTE_MP_PM_OP_FUNC__SHIFT 4 + +#define NVC0_COMPUTE_MP_PM_UNK33DC 0x000033dc + + +#endif /* NVC0_COMPUTE_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c new file mode 100644 index 00000000000..e0c2b74e196 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -0,0 +1,402 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "util/u_framebuffer.h" + +#ifdef NVC0_WITH_DRAW_MODULE +#include "draw/draw_context.h" +#endif + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_screen.h" +#include "nvc0/nvc0_resource.h" + +static void +nvc0_flush(struct pipe_context *pipe, + struct pipe_fence_handle **fence, + unsigned flags) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_screen *screen = &nvc0->screen->base; + + if (fence) + nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence); + + PUSH_KICK(nvc0->base.pushbuf); /* fencing handled in kick_notify */ + + nouveau_context_update_frame_stats(&nvc0->base); +} + +static void +nvc0_texture_barrier(struct pipe_context *pipe) +{ + struct nouveau_pushbuf *push = nvc0_context(pipe)->base.pushbuf; + + IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); + IMMED_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 0); +} + +static void +nvc0_context_unreference_resources(struct nvc0_context *nvc0) +{ + unsigned s, i; + + nouveau_bufctx_del(&nvc0->bufctx_3d); + nouveau_bufctx_del(&nvc0->bufctx); + nouveau_bufctx_del(&nvc0->bufctx_cp); + + util_unreference_framebuffer_state(&nvc0->framebuffer); + + for (i = 0; i < nvc0->num_vtxbufs; ++i) + pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL); + + pipe_resource_reference(&nvc0->idxbuf.buffer, NULL); + + for (s = 0; s < 6; ++s) { + for (i = 0; i < nvc0->num_textures[s]; ++i) + pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); + + for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; ++i) + if (!nvc0->constbuf[s][i].user) + pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, NULL); + } + + for (s = 0; s < 2; ++s) { + for (i = 0; i < NVC0_MAX_SURFACE_SLOTS; ++i) + pipe_surface_reference(&nvc0->surfaces[s][i], NULL); + } + + for (i = 0; i < nvc0->num_tfbbufs; ++i) + pipe_so_target_reference(&nvc0->tfbbuf[i], NULL); + + for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *); + ++i) { + struct pipe_resource **res = util_dynarray_element( + &nvc0->global_residents, struct pipe_resource *, i); + pipe_resource_reference(res, NULL); + } + util_dynarray_fini(&nvc0->global_residents); +} + +static void +nvc0_destroy(struct pipe_context *pipe) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + if (nvc0->screen->cur_ctx == nvc0) { + nvc0->base.pushbuf->kick_notify = NULL; + nvc0->screen->cur_ctx = NULL; + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, NULL); + } + nouveau_pushbuf_kick(nvc0->base.pushbuf, nvc0->base.pushbuf->channel); + + nvc0_context_unreference_resources(nvc0); + nvc0_blitctx_destroy(nvc0); + +#ifdef NVC0_WITH_DRAW_MODULE + draw_destroy(nvc0->draw); +#endif + + nouveau_context_destroy(&nvc0->base); +} + +void +nvc0_default_kick_notify(struct nouveau_pushbuf *push) +{ + struct nvc0_screen *screen = push->user_priv; + + if (screen) { + nouveau_fence_next(&screen->base); + nouveau_fence_update(&screen->base, TRUE); + if (screen->cur_ctx) + screen->cur_ctx->state.flushed = TRUE; + } + NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1); +} + +static int +nvc0_invalidate_resource_storage(struct nouveau_context *ctx, + struct pipe_resource *res, + int ref) +{ + struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe); + unsigned s, i; + + if (res->bind & PIPE_BIND_RENDER_TARGET) { + for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) { + if (nvc0->framebuffer.cbufs[i] && + nvc0->framebuffer.cbufs[i]->texture == res) { + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + if (!--ref) + return ref; + } + } + } + if (res->bind & PIPE_BIND_DEPTH_STENCIL) { + if (nvc0->framebuffer.zsbuf && + nvc0->framebuffer.zsbuf->texture == res) { + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + if (!--ref) + return ref; + } + } + + if (res->bind & PIPE_BIND_VERTEX_BUFFER) { + for (i = 0; i < nvc0->num_vtxbufs; ++i) { + if (nvc0->vtxbuf[i].buffer == res) { + nvc0->dirty |= NVC0_NEW_ARRAYS; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); + if (!--ref) + return ref; + } + } + } + if (res->bind & PIPE_BIND_INDEX_BUFFER) { + if (nvc0->idxbuf.buffer == res) { + nvc0->dirty |= NVC0_NEW_IDXBUF; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX); + if (!--ref) + return ref; + } + } + + if (res->bind & PIPE_BIND_SAMPLER_VIEW) { + for (s = 0; s < 5; ++s) { + for (i = 0; i < nvc0->num_textures[s]; ++i) { + if (nvc0->textures[s][i] && + nvc0->textures[s][i]->texture == res) { + nvc0->textures_dirty[s] |= 1 << i; + nvc0->dirty |= NVC0_NEW_TEXTURES; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); + if (!--ref) + return ref; + } + } + } + } + + if (res->bind & PIPE_BIND_CONSTANT_BUFFER) { + for (s = 0; s < 5; ++s) { + for (i = 0; i < nvc0->num_vtxbufs; ++i) { + if (!nvc0->constbuf[s][i].user && + nvc0->constbuf[s][i].u.buf == res) { + nvc0->dirty |= NVC0_NEW_CONSTBUF; + nvc0->constbuf_dirty[s] |= 1 << i; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i)); + if (!--ref) + return ref; + } + } + } + } + + return ref; +} + +static void +nvc0_context_get_sample_position(struct pipe_context *, unsigned, unsigned, + float *); + +struct pipe_context * +nvc0_create(struct pipe_screen *pscreen, void *priv) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + struct nvc0_context *nvc0; + struct pipe_context *pipe; + int ret; + uint32_t flags; + + nvc0 = CALLOC_STRUCT(nvc0_context); + if (!nvc0) + return NULL; + pipe = &nvc0->base.pipe; + + if (!nvc0_blitctx_create(nvc0)) + goto out_err; + + nvc0->base.pushbuf = screen->base.pushbuf; + nvc0->base.client = screen->base.client; + + ret = nouveau_bufctx_new(screen->base.client, 2, &nvc0->bufctx); + if (!ret) + ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_3D_COUNT, + &nvc0->bufctx_3d); + if (!ret) + ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_CP_COUNT, + &nvc0->bufctx_cp); + if (ret) + goto out_err; + + nvc0->screen = screen; + nvc0->base.screen = &screen->base; + + pipe->screen = pscreen; + pipe->priv = priv; + + pipe->destroy = nvc0_destroy; + + pipe->draw_vbo = nvc0_draw_vbo; + pipe->clear = nvc0_clear; + pipe->launch_grid = (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) ? + nve4_launch_grid : nvc0_launch_grid; + + pipe->flush = nvc0_flush; + pipe->texture_barrier = nvc0_texture_barrier; + pipe->get_sample_position = nvc0_context_get_sample_position; + + if (!screen->cur_ctx) { + screen->cur_ctx = nvc0; + nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx); + } + screen->base.pushbuf->kick_notify = nvc0_default_kick_notify; + + nvc0_init_query_functions(nvc0); + nvc0_init_surface_functions(nvc0); + nvc0_init_state_functions(nvc0); + nvc0_init_transfer_functions(nvc0); + nvc0_init_resource_functions(pipe); + + nvc0->base.invalidate_resource_storage = nvc0_invalidate_resource_storage; + +#ifdef NVC0_WITH_DRAW_MODULE + /* no software fallbacks implemented */ + nvc0->draw = draw_create(pipe); + assert(nvc0->draw); + draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0)); +#endif + + pipe->create_video_codec = nvc0_create_decoder; + pipe->create_video_buffer = nvc0_video_buffer_create; + + /* shader builtin library is per-screen, but we need a context for m2mf */ + nvc0_program_library_upload(nvc0); + + /* add permanently resident buffers to bufctxts */ + + flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + + BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text); + BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo); + BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc); + if (screen->compute) { + BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->text); + BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc); + BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm); + } + + flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR; + + BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache); + if (screen->compute) + BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->tls); + + flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; + + BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->fence.bo); + BCTX_REFN_bo(nvc0->bufctx, FENCE, flags, screen->fence.bo); + if (screen->compute) + BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->fence.bo); + + nvc0->base.scratch.bo_size = 2 << 20; + + memset(nvc0->tex_handles, ~0, sizeof(nvc0->tex_handles)); + + util_dynarray_init(&nvc0->global_residents); + + return pipe; + +out_err: + if (nvc0) { + if (nvc0->bufctx_3d) + nouveau_bufctx_del(&nvc0->bufctx_3d); + if (nvc0->bufctx_cp) + nouveau_bufctx_del(&nvc0->bufctx_cp); + if (nvc0->bufctx) + nouveau_bufctx_del(&nvc0->bufctx); + if (nvc0->blit) + FREE(nvc0->blit); + FREE(nvc0); + } + return NULL; +} + +void +nvc0_bufctx_fence(struct nvc0_context *nvc0, struct nouveau_bufctx *bufctx, + boolean on_flush) +{ + struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending; + struct nouveau_list *it; + NOUVEAU_DRV_STAT_IFD(unsigned count = 0); + + for (it = list->next; it != list; it = it->next) { + struct nouveau_bufref *ref = (struct nouveau_bufref *)it; + struct nv04_resource *res = ref->priv; + if (res) + nvc0_resource_validate(res, (unsigned)ref->priv_data); + NOUVEAU_DRV_STAT_IFD(count++); + } + NOUVEAU_DRV_STAT(&nvc0->screen->base, resource_validate_count, count); +} + +static void +nvc0_context_get_sample_position(struct pipe_context *pipe, + unsigned sample_count, unsigned sample_index, + float *xy) +{ + static const uint8_t ms1[1][2] = { { 0x8, 0x8 } }; + static const uint8_t ms2[2][2] = { + { 0x4, 0x4 }, { 0xc, 0xc } }; /* surface coords (0,0), (1,0) */ + static const uint8_t ms4[4][2] = { + { 0x6, 0x2 }, { 0xe, 0x6 }, /* (0,0), (1,0) */ + { 0x2, 0xa }, { 0xa, 0xe } }; /* (0,1), (1,1) */ + static const uint8_t ms8[8][2] = { + { 0x1, 0x7 }, { 0x5, 0x3 }, /* (0,0), (1,0) */ + { 0x3, 0xd }, { 0x7, 0xb }, /* (0,1), (1,1) */ + { 0x9, 0x5 }, { 0xf, 0x1 }, /* (2,0), (3,0) */ + { 0xb, 0xf }, { 0xd, 0x9 } }; /* (2,1), (3,1) */ +#if 0 + /* NOTE: there are alternative modes for MS2 and MS8, currently not used */ + static const uint8_t ms8_alt[8][2] = { + { 0x9, 0x5 }, { 0x7, 0xb }, /* (2,0), (1,1) */ + { 0xd, 0x9 }, { 0x5, 0x3 }, /* (3,1), (1,0) */ + { 0x3, 0xd }, { 0x1, 0x7 }, /* (0,1), (0,0) */ + { 0xb, 0xf }, { 0xf, 0x1 } }; /* (2,1), (3,0) */ +#endif + + const uint8_t (*ptr)[2]; + + switch (sample_count) { + case 0: + case 1: ptr = ms1; break; + case 2: ptr = ms2; break; + case 4: ptr = ms4; break; + case 8: ptr = ms8; break; + default: + assert(0); + return; /* bad sample count -> undefined locations */ + } + xy[0] = ptr[sample_index][0] * 0.0625f; + xy[1] = ptr[sample_index][1] * 0.0625f; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h new file mode 100644 index 00000000000..3fbecdc1391 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -0,0 +1,357 @@ +#ifndef __NVC0_CONTEXT_H__ +#define __NVC0_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_inlines.h" +#include "util/u_dynarray.h" + +#ifdef NVC0_WITH_DRAW_MODULE +#include "draw/draw_vertex.h" +#endif + +#include "nv50/nv50_debug.h" +#include "nvc0/nvc0_winsys.h" +#include "nvc0/nvc0_stateobj.h" +#include "nvc0/nvc0_screen.h" +#include "nvc0/nvc0_program.h" +#include "nvc0/nvc0_resource.h" + +#include "nv50/nv50_transfer.h" + +#include "nouveau_context.h" + +#include "nvc0/nvc0_3ddefs.xml.h" +#include "nvc0/nvc0_3d.xml.h" +#include "nvc0/nvc0_2d.xml.h" +#include "nvc0/nvc0_m2mf.xml.h" +#include "nvc0/nve4_p2mf.xml.h" + +/* NOTE: must keep NVC0_NEW_...PROG in consecutive bits in this order */ +#define NVC0_NEW_BLEND (1 << 0) +#define NVC0_NEW_RASTERIZER (1 << 1) +#define NVC0_NEW_ZSA (1 << 2) +#define NVC0_NEW_VERTPROG (1 << 3) +#define NVC0_NEW_TCTLPROG (1 << 4) +#define NVC0_NEW_TEVLPROG (1 << 5) +#define NVC0_NEW_GMTYPROG (1 << 6) +#define NVC0_NEW_FRAGPROG (1 << 7) +#define NVC0_NEW_BLEND_COLOUR (1 << 8) +#define NVC0_NEW_STENCIL_REF (1 << 9) +#define NVC0_NEW_CLIP (1 << 10) +#define NVC0_NEW_SAMPLE_MASK (1 << 11) +#define NVC0_NEW_FRAMEBUFFER (1 << 12) +#define NVC0_NEW_STIPPLE (1 << 13) +#define NVC0_NEW_SCISSOR (1 << 14) +#define NVC0_NEW_VIEWPORT (1 << 15) +#define NVC0_NEW_ARRAYS (1 << 16) +#define NVC0_NEW_VERTEX (1 << 17) +#define NVC0_NEW_CONSTBUF (1 << 18) +#define NVC0_NEW_TEXTURES (1 << 19) +#define NVC0_NEW_SAMPLERS (1 << 20) +#define NVC0_NEW_TFB_TARGETS (1 << 21) +#define NVC0_NEW_IDXBUF (1 << 22) +#define NVC0_NEW_SURFACES (1 << 23) + +#define NVC0_NEW_CP_PROGRAM (1 << 0) +#define NVC0_NEW_CP_SURFACES (1 << 1) +#define NVC0_NEW_CP_TEXTURES (1 << 2) +#define NVC0_NEW_CP_SAMPLERS (1 << 3) +#define NVC0_NEW_CP_CONSTBUF (1 << 4) +#define NVC0_NEW_CP_GLOBALS (1 << 5) + +/* 3d bufctx (during draw_vbo, blit_3d) */ +#define NVC0_BIND_FB 0 +#define NVC0_BIND_VTX 1 +#define NVC0_BIND_VTX_TMP 2 +#define NVC0_BIND_IDX 3 +#define NVC0_BIND_TEX(s, i) ( 4 + 32 * (s) + (i)) +#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i)) +#define NVC0_BIND_TFB 244 +#define NVC0_BIND_SUF 245 +#define NVC0_BIND_SCREEN 246 +#define NVC0_BIND_TLS 247 +#define NVC0_BIND_3D_COUNT 248 + +/* compute bufctx (during launch_grid) */ +#define NVC0_BIND_CP_CB(i) ( 0 + (i)) +#define NVC0_BIND_CP_TEX(i) ( 16 + (i)) +#define NVC0_BIND_CP_SUF 48 +#define NVC0_BIND_CP_GLOBAL 49 +#define NVC0_BIND_CP_DESC 50 +#define NVC0_BIND_CP_SCREEN 51 +#define NVC0_BIND_CP_QUERY 52 +#define NVC0_BIND_CP_COUNT 53 + +/* bufctx for other operations */ +#define NVC0_BIND_2D 0 +#define NVC0_BIND_M2MF 0 +#define NVC0_BIND_FENCE 1 + + +struct nvc0_blitctx; + +boolean nvc0_blitctx_create(struct nvc0_context *); +void nvc0_blitctx_destroy(struct nvc0_context *); + +struct nvc0_context { + struct nouveau_context base; + + struct nouveau_bufctx *bufctx_3d; + struct nouveau_bufctx *bufctx; + struct nouveau_bufctx *bufctx_cp; + + struct nvc0_screen *screen; + + void (*m2mf_copy_rect)(struct nvc0_context *, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy); + + uint32_t dirty; + uint32_t dirty_cp; /* dirty flags for compute state */ + + struct { + boolean flushed; + boolean rasterizer_discard; + boolean early_z_forced; + boolean prim_restart; + uint32_t instance_elts; /* bitmask of per-instance elements */ + uint32_t instance_base; + uint32_t constant_vbos; + uint32_t constant_elts; + int32_t index_bias; + uint16_t scissor; + uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */ + uint8_t num_vtxbufs; + uint8_t num_vtxelts; + uint8_t num_textures[6]; + uint8_t num_samplers[6]; + uint8_t tls_required; /* bitmask of shader types using l[] */ + uint8_t c14_bound; /* whether immediate array constbuf is bound */ + uint8_t clip_enable; + uint32_t clip_mode; + uint32_t uniform_buffer_bound[5]; + struct nvc0_transform_feedback_state *tfb; + } state; + + struct nvc0_blend_stateobj *blend; + struct nvc0_rasterizer_stateobj *rast; + struct nvc0_zsa_stateobj *zsa; + struct nvc0_vertex_stateobj *vertex; + + struct nvc0_program *vertprog; + struct nvc0_program *tctlprog; + struct nvc0_program *tevlprog; + struct nvc0_program *gmtyprog; + struct nvc0_program *fragprog; + struct nvc0_program *compprog; + + struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS]; + uint16_t constbuf_dirty[6]; + + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned num_vtxbufs; + struct pipe_index_buffer idxbuf; + uint32_t constant_vbos; + uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ + uint32_t vb_elt_first; /* from pipe_draw_info, for vertex upload */ + uint32_t vb_elt_limit; /* max - min element (count - 1) */ + uint32_t instance_off; /* current base vertex for instanced arrays */ + uint32_t instance_max; /* last instance for current draw call */ + + struct pipe_sampler_view *textures[6][PIPE_MAX_SAMPLERS]; + unsigned num_textures[6]; + uint32_t textures_dirty[6]; + struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS]; + unsigned num_samplers[6]; + uint16_t samplers_dirty[6]; + + uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */ + + struct pipe_framebuffer_state framebuffer; + struct pipe_blend_color blend_colour; + struct pipe_stencil_ref stencil_ref; + struct pipe_poly_stipple stipple; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + struct pipe_clip_state clip; + + unsigned sample_mask; + + boolean vbo_push_hint; + + uint8_t tfbbuf_dirty; + struct pipe_stream_output_target *tfbbuf[4]; + unsigned num_tfbbufs; + + struct pipe_query *cond_query; + boolean cond_cond; + uint cond_mode; + + struct nvc0_blitctx *blit; + + struct pipe_surface *surfaces[2][NVC0_MAX_SURFACE_SLOTS]; + uint16_t surfaces_dirty[2]; + uint16_t surfaces_valid[2]; + uint32_t vport_int[2]; + + struct util_dynarray global_residents; + +#ifdef NVC0_WITH_DRAW_MODULE + struct draw_context *draw; +#endif +}; + +static INLINE struct nvc0_context * +nvc0_context(struct pipe_context *pipe) +{ + return (struct nvc0_context *)pipe; +} + +static INLINE unsigned +nvc0_shader_stage(unsigned pipe) +{ + switch (pipe) { + case PIPE_SHADER_VERTEX: return 0; +/* case PIPE_SHADER_TESSELLATION_CONTROL: return 1; */ +/* case PIPE_SHADER_TESSELLATION_EVALUATION: return 2; */ + case PIPE_SHADER_GEOMETRY: return 3; + case PIPE_SHADER_FRAGMENT: return 4; + case PIPE_SHADER_COMPUTE: return 5; + default: + assert(!"invalid PIPE_SHADER type"); + return 0; + } +} + + +/* nvc0_context.c */ +struct pipe_context *nvc0_create(struct pipe_screen *, void *); +void nvc0_bufctx_fence(struct nvc0_context *, struct nouveau_bufctx *, + boolean on_flush); +void nvc0_default_kick_notify(struct nouveau_pushbuf *); + +/* nvc0_draw.c */ +extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); + +/* nvc0_program.c */ +boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset); +boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *); +void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); +void nvc0_program_library_upload(struct nvc0_context *); +uint32_t nvc0_program_symbol_offset(const struct nvc0_program *, + uint32_t label); + +/* nvc0_query.c */ +void nvc0_init_query_functions(struct nvc0_context *); +void nvc0_query_pushbuf_submit(struct nouveau_pushbuf *, + struct pipe_query *, unsigned result_offset); +void nvc0_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *); +void nvc0_so_target_save_offset(struct pipe_context *, + struct pipe_stream_output_target *, unsigned i, + boolean *serialize); + +#define NVC0_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) + +/* nvc0_shader_state.c */ +void nvc0_vertprog_validate(struct nvc0_context *); +void nvc0_tctlprog_validate(struct nvc0_context *); +void nvc0_tevlprog_validate(struct nvc0_context *); +void nvc0_gmtyprog_validate(struct nvc0_context *); +void nvc0_fragprog_validate(struct nvc0_context *); + +void nvc0_tfb_validate(struct nvc0_context *); + +/* nvc0_state.c */ +extern void nvc0_init_state_functions(struct nvc0_context *); + +/* nvc0_state_validate.c */ +void nvc0_validate_global_residents(struct nvc0_context *, + struct nouveau_bufctx *, int bin); +extern boolean nvc0_state_validate(struct nvc0_context *, uint32_t state_mask, + unsigned space_words); + +/* nvc0_surface.c */ +extern void nvc0_clear(struct pipe_context *, unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil); +extern void nvc0_init_surface_functions(struct nvc0_context *); + +/* nvc0_tex.c */ +boolean nve4_validate_tsc(struct nvc0_context *nvc0, int s); +void nvc0_validate_textures(struct nvc0_context *); +void nvc0_validate_samplers(struct nvc0_context *); +void nve4_set_tex_handles(struct nvc0_context *); +void nvc0_validate_surfaces(struct nvc0_context *); +void nve4_set_surface_info(struct nouveau_pushbuf *, struct pipe_surface *, + struct nvc0_screen *); + +struct pipe_sampler_view * +nvc0_create_texture_view(struct pipe_context *, + struct pipe_resource *, + const struct pipe_sampler_view *, + uint32_t flags, + enum pipe_texture_target); +struct pipe_sampler_view * +nvc0_create_sampler_view(struct pipe_context *, + struct pipe_resource *, + const struct pipe_sampler_view *); + +/* nvc0_transfer.c */ +void +nvc0_init_transfer_functions(struct nvc0_context *); + +void +nvc0_m2mf_push_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data); +void +nve4_p2mf_push_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data); +void +nvc0_cb_push(struct nouveau_context *, + struct nouveau_bo *bo, unsigned domain, + unsigned base, unsigned size, + unsigned offset, unsigned words, const uint32_t *data); + +/* nvc0_vbo.c */ +void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *); + +void * +nvc0_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements); +void +nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso); + +void nvc0_vertex_arrays_validate(struct nvc0_context *); + +void nvc0_idxbuf_validate(struct nvc0_context *); + +/* nvc0_video.c */ +struct pipe_video_codec * +nvc0_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ); + +struct pipe_video_buffer * +nvc0_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *templat); + +/* nvc0_push.c */ +void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *); + +/* nve4_compute.c */ +void nve4_launch_grid(struct pipe_context *, + const uint *, const uint *, uint32_t, const void *); + +/* nvc0_compute.c */ +void nvc0_launch_grid(struct pipe_context *, + const uint *, const uint *, uint32_t, const void *); + +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_draw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_draw.c new file mode 100644 index 00000000000..e261d5058fc --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_draw.c @@ -0,0 +1,88 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "draw/draw_pipe.h" + +#include "nvc0/nvc0_context.h" + +struct nvc0_render_stage { + struct draw_stage stage; + struct nvc0_context *nvc0; +}; + +static INLINE struct nvc0_render_stage * +nvc0_render_stage(struct draw_stage *stage) +{ + return (struct nvc0_render_stage *)stage; +} + +static void +nvc0_render_point(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_line(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_flush(struct draw_stage *stage, unsigned flags) +{ +} + +static void +nvc0_render_reset_stipple_counter(struct draw_stage *stage) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_destroy(struct draw_stage *stage) +{ + FREE(stage); +} + +struct draw_stage * +nvc0_draw_render_stage(struct nvc0_context *nvc0) +{ + struct nvc0_render_stage *rs = CALLOC_STRUCT(nvc0_render_stage); + + rs->nvc0 = nvc0; + rs->stage.draw = nvc0->draw; + rs->stage.destroy = nvc0_render_destroy; + rs->stage.point = nvc0_render_point; + rs->stage.line = nvc0_render_line; + rs->stage.tri = nvc0_render_tri; + rs->stage.flush = nvc0_render_flush; + rs->stage.reset_stipple_counter = nvc0_render_reset_stipple_counter; + + return &rs->stage; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_formats.c b/src/gallium/drivers/nouveau/nvc0/nvc0_formats.c new file mode 100644 index 00000000000..2bfdb0e076c --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_formats.c @@ -0,0 +1,25 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define NOUVEAU_DRIVER 0xc0 + +#include "nv50/nv50_formats.c" diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h new file mode 100644 index 00000000000..f009980c629 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h @@ -0,0 +1,236 @@ + +#ifndef __NVC0_PGRAPH_MACROS_H__ +#define __NVC0_PGRAPH_MACROS_H__ + +/* extrinsrt r1, r2, src, size, dst: replace bits [dst:dst+size) in r1 + * with bits [src:src+size) in r2 + * + * bra(n)z annul: no delay slot + */ + +/* Bitfield version of NVC0_3D_VERTEX_ARRAY_PER_INSTANCE[]. + * Args: size, bitfield + */ +static const uint32_t nvc0_9097_per_instance_bf[] = +{ + 0x00000301, /* parm $r3 (the bitfield) */ + 0x00000211, /* mov $r2 0 */ + 0x05880021, /* maddr [NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(0), increment = 4] */ + 0xffffc911, /* mov $r1 (add $r1 -0x1) */ + 0x0040d043, /* send (extrshl $r3 $r2 0x1 0) */ + 0xffff8897, /* exit branz $r1 0x3 */ + 0x00005211 /* mov $r2 (add $r2 0x1) */ +}; + +/* The comments above the macros describe what they *should* be doing, + * but we use less functionality for now. + */ + +/* + * for (i = 0; i < 8; ++i) + * [NVC0_3D_BLEND_ENABLE(i)] = BIT(i of arg); + * + * [3428] = arg; + * + * if (arg == 0 || [NVC0_3D_MULTISAMPLE_ENABLE] == 0) + * [0d9c] = 0; + * else + * [0d9c] = [342c]; + */ +static const uint32_t nvc0_9097_blend_enables[] = +{ + 0x05360021, /* 0x00: maddr [NVC0_3D_BLEND_ENABLE(0), increment = 4] */ + 0x00404042, /* 0x01: send extrinsrt 0 $r1 0 0x1 0 */ + 0x00424042, /* 0x02: send extrinsrt 0 $r1 0x1 0x1 0 */ + 0x00444042, /* 0x03: send extrinsrt 0 $r1 0x2 0x1 0 */ + 0x00464042, /* 0x04: send extrinsrt 0 $r1 0x3 0x1 0 */ + 0x00484042, /* 0x05: send extrinsrt 0 $r1 0x4 0x1 0 */ + 0x004a4042, /* 0x06: send extrinsrt 0 $r1 0x5 0x1 0 */ + 0x004c40c2, /* 0x07: exit send extrinsrt 0 $r1 0x6 0x1 0 */ + 0x004e4042, /* 0x08: send extrinsrt 0 $r1 0x7 0x1 0 */ +}; + +/* + * uint64 limit = (parm(0) << 32) | parm(1); + * uint64 start = (parm(2) << 32); + * + * if (limit) { + * start |= parm(3); + * --limit; + * } else { + * start |= 1; + * } + * + * [0x1c04 + (arg & 0xf) * 16 + 0] = (start >> 32) & 0xff; + * [0x1c04 + (arg & 0xf) * 16 + 4] = start & 0xffffffff; + * [0x1f00 + (arg & 0xf) * 8 + 0] = (limit >> 32) & 0xff; + * [0x1f00 + (arg & 0xf) * 8 + 4] = limit & 0xffffffff; + */ +static const uint32_t nvc0_9097_vertex_array_select[] = +{ + 0x00000201, /* 0x00: parm $r2 */ + 0x00000301, /* 0x01: parm $r3 */ + 0x00000401, /* 0x02: parm $r4 */ + 0x00000501, /* 0x03: parm $r5 */ + 0x11004612, /* 0x04: mov $r6 extrinsrt 0 $r1 0 4 2 */ + 0x09004712, /* 0x05: mov $r7 extrinsrt 0 $r1 0 4 1 */ + 0x05c07621, /* 0x06: maddr $r6 add $6 0x1701 */ + 0x00002041, /* 0x07: send $r4 */ + 0x00002841, /* 0x08: send $r5 */ + 0x05f03f21, /* 0x09: maddr $r7 add $7 0x17c0 */ + 0x000010c1, /* 0x0a: exit send $r2 */ + 0x00001841, /* 0x0b: send $r3 */ +}; + +/* + * [GL_POLYGON_MODE_FRONT] = arg; + * + * if (BIT(31 of [0x3410])) + * [1a24] = 0x7353; + * + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41) + * [02ec] = 0; + * else + * if ([GL_POLYGON_MODE_BACK] == GL_LINE || arg == GL_LINE) + * [02ec] = BYTE(1 of [0x3410]) << 4; + * else + * [02ec] = BYTE(0 of [0x3410]) << 4; + */ +static const uint32_t nvc0_9097_poly_mode_front[] = +{ + 0x00db0215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_BACK] */ + 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */ + 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */ + 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x00dac021, /* 0x08: maddr 0x36b */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr [02ec] */ + 0x00003041 /* 0x10: send $r6 */ +}; + +/* + * [GL_POLYGON_MODE_BACK] = arg; + * + * if (BIT(31 of [0x3410])) + * [1a24] = 0x7353; + * + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41) + * [02ec] = 0; + * else + * if ([GL_POLYGON_MODE_FRONT] == GL_LINE || arg == GL_LINE) + * [02ec] = BYTE(1 of [0x3410]) << 4; + * else + * [02ec] = BYTE(0 of [0x3410]) << 4; + */ +/* NOTE: 0x3410 = 0x80002006 by default, + * POLYGON_MODE == GL_LINE check replaced by (MODE & 1) + * SP_SELECT(i) == (i << 4) | 1 check replaced by SP_SELECT(i) & 1 + */ +static const uint32_t nvc0_9097_poly_mode_back[] = +{ + 0x00dac215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_FRONT] */ + 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */ + 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */ + 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x00db0021, /* 0x08: maddr 0x36c */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr [02ec] */ + 0x00003041 /* 0x10: send $r6 */ +}; + +/* + * [NVC0_3D_SP_SELECT(4)] = arg + * + * if BIT(31 of [0x3410]) == 0 + * [1a24] = 0x7353; + * + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || arg == 0x41) + * [02ec] = 0 + * else + * if (any POLYGON MODE == LINE) + * [02ec] = BYTE(1 of [3410]) << 4; + * else + * [02ec] = BYTE(0 of [3410]) << 4; // 02ec valid bits are 0xff1 + */ +static const uint32_t nvc0_9097_gp_select[] = /* 0x0f */ +{ + 0x00dac215, /* 0x00: read $r2 0x36b */ + 0x00db0315, /* 0x01: read $r3 0x36c */ + 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */ + 0x020c0415, /* 0x03: read $r4 0x830 */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x02100021, /* 0x08: maddr 0x840 */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr 0xbb */ + 0x00003041, /* 0x10: send $r6 */ +}; + +/* + * [NVC0_3D_SP_SELECT(3)] = arg + * + * if BIT(31 of [0x3410]) == 0 + * [1a24] = 0x7353; + * + * if (arg == 0x31) { + * if (BIT(2 of [0x3430])) { + * int i = 15; do { --i; } while(i); + * [0x1a2c] = 0; + * } + * } + * + * if ([NVC0_3D_SP_SELECT(4)] == 0x41 || arg == 0x31) + * [02ec] = 0 + * else + * if ([any POLYGON_MODE] == GL_LINE) + * [02ec] = BYTE(1 of [3410]) << 4; + * else + * [02ec] = BYTE(0 of [3410]) << 4; + */ +static const uint32_t nvc0_9097_tep_select[] = /* 0x10 */ +{ + 0x00dac215, /* 0x00: read $r2 0x36b */ + 0x00db0315, /* 0x01: read $r3 0x36c */ + 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */ + 0x02100415, /* 0x03: read $r4 0x840 */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x020c0021, /* 0x08: maddr 0x830 */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr 0xbb */ + 0x00003041, /* 0x10: send $r6 */ +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h new file mode 100644 index 00000000000..3bf628d425e --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h @@ -0,0 +1,138 @@ +#ifndef NVC0_M2MF_XML +#define NVC0_M2MF_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_m2mf.xml ( 2227 bytes, from 2010-10-16 16:10:29) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24) +- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_M2MF_TILING_MODE_IN 0x00000204 + +#define NVC0_M2MF_TILING_PITCH_IN 0x00000208 + +#define NVC0_M2MF_TILING_HEIGHT_IN 0x0000020c + +#define NVC0_M2MF_TILING_DEPTH_IN 0x00000210 + +#define NVC0_M2MF_TILING_POSITION_IN_Z 0x00000214 + +#define NVC0_M2MF_TILING_MODE_OUT 0x00000220 + +#define NVC0_M2MF_TILING_PITCH_OUT 0x00000224 + +#define NVC0_M2MF_TILING_HEIGHT_OUT 0x00000228 + +#define NVC0_M2MF_TILING_DEPTH_OUT 0x0000022c + +#define NVC0_M2MF_TILING_POSITION_OUT_Z 0x00000230 + +#define NVC0_M2MF_OFFSET_OUT_HIGH 0x00000238 + +#define NVC0_M2MF_OFFSET_OUT_LOW 0x0000023c + +#define NVC0_M2MF_EXEC 0x00000300 +#define NVC0_M2MF_EXEC_PUSH 0x00000001 +#define NVC0_M2MF_EXEC_LINEAR_IN 0x00000010 +#define NVC0_M2MF_EXEC_LINEAR_OUT 0x00000100 +#define NVC0_M2MF_EXEC_NOTIFY 0x00002000 +#define NVC0_M2MF_EXEC_INC__MASK 0x00f00000 +#define NVC0_M2MF_EXEC_INC__SHIFT 20 + +#define NVC0_M2MF_DATA 0x00000304 + +#define NVC0_M2MF_OFFSET_IN_HIGH 0x0000030c + +#define NVC0_M2MF_OFFSET_IN_LOW 0x00000310 + +#define NVC0_M2MF_PITCH_IN 0x00000314 + +#define NVC0_M2MF_PITCH_OUT 0x00000318 + +#define NVC0_M2MF_LINE_LENGTH_IN 0x0000031c + +#define NVC0_M2MF_LINE_COUNT 0x00000320 + +#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH 0x0000032c + +#define NVC0_M2MF_NOTIFY_ADDRESS_LOW 0x00000330 + +#define NVC0_M2MF_NOTIFY 0x00000334 + +#define NVC0_M2MF_TILING_POSITION_IN_X 0x00000344 + +#define NVC0_M2MF_TILING_POSITION_IN_Y 0x00000348 + +#define NVC0_M2MF_TILING_POSITION_OUT_X 0x0000034c + +#define NVC0_M2MF_TILING_POSITION_OUT_Y 0x00000350 + + +#endif /* NVC0_M2MF_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c new file mode 100644 index 00000000000..79c9390b78f --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c @@ -0,0 +1,358 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_resource.h" + +static uint32_t +nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz) +{ + return nv50_tex_choose_tile_dims_helper(nx, ny, nz); +} + +static uint32_t +nvc0_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed) +{ + const unsigned ms = util_logbase2(mt->base.base.nr_samples); + + uint32_t tile_flags; + + if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR)) + return 0; + if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR)) + return 0; + + switch (mt->base.base.format) { + case PIPE_FORMAT_Z16_UNORM: + if (compressed) + tile_flags = 0x02 + ms; + else + tile_flags = 0x01; + break; + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + if (compressed) + tile_flags = 0x51 + ms; + else + tile_flags = 0x46; + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + if (compressed) + tile_flags = 0x17 + ms; + else + tile_flags = 0x11; + break; + case PIPE_FORMAT_Z32_FLOAT: + if (compressed) + tile_flags = 0x86 + ms; + else + tile_flags = 0x7b; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + if (compressed) + tile_flags = 0xce + ms; + else + tile_flags = 0xc3; + break; + default: + switch (util_format_get_blocksizebits(mt->base.base.format)) { + case 128: + if (compressed) + tile_flags = 0xf4 + ms * 2; + else + tile_flags = 0xfe; + break; + case 64: + if (compressed) { + switch (ms) { + case 0: tile_flags = 0xe6; break; + case 1: tile_flags = 0xeb; break; + case 2: tile_flags = 0xed; break; + case 3: tile_flags = 0xf2; break; + default: + return 0; + } + } else { + tile_flags = 0xfe; + } + break; + case 32: + if (compressed && ms) { + switch (ms) { + /* This one makes things blurry: + case 0: tile_flags = 0xdb; break; + */ + case 1: tile_flags = 0xdd; break; + case 2: tile_flags = 0xdf; break; + case 3: tile_flags = 0xe4; break; + default: + return 0; + } + } else { + tile_flags = 0xfe; + } + break; + case 16: + case 8: + tile_flags = 0xfe; + break; + default: + return 0; + } + break; + } + + return tile_flags; +} + +static INLINE boolean +nvc0_miptree_init_ms_mode(struct nv50_miptree *mt) +{ + switch (mt->base.base.nr_samples) { + case 8: + mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS8; + mt->ms_x = 2; + mt->ms_y = 1; + break; + case 4: + mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS4; + mt->ms_x = 1; + mt->ms_y = 1; + break; + case 2: + mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS2; + mt->ms_x = 1; + break; + case 1: + case 0: + mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; + break; + default: + NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples); + return FALSE; + } + return TRUE; +} + +static void +nvc0_miptree_init_layout_video(struct nv50_miptree *mt) +{ + const struct pipe_resource *pt = &mt->base.base; + const unsigned blocksize = util_format_get_blocksize(pt->format); + + assert(pt->last_level == 0); + assert(mt->ms_x == 0 && mt->ms_y == 0); + assert(!util_format_is_compressed(pt->format)); + + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; + + mt->level[0].tile_mode = 0x10; + mt->level[0].pitch = align(pt->width0 * blocksize, 64); + mt->total_size = align(pt->height0, 16) * mt->level[0].pitch * (mt->layout_3d ? pt->depth0 : 1); + + if (pt->array_size > 1) { + mt->layer_stride = align(mt->total_size, NVC0_TILE_SIZE(0x10)); + mt->total_size = mt->layer_stride * pt->array_size; + } +} + +static void +nvc0_miptree_init_layout_tiled(struct nv50_miptree *mt) +{ + struct pipe_resource *pt = &mt->base.base; + unsigned w, h, d, l; + const unsigned blocksize = util_format_get_blocksize(pt->format); + + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; + + w = pt->width0 << mt->ms_x; + h = pt->height0 << mt->ms_y; + + /* For 3D textures, a mipmap is spanned by all the layers, for array + * textures and cube maps, each layer contains its own mipmaps. + */ + d = mt->layout_3d ? pt->depth0 : 1; + + assert(!mt->ms_mode || !pt->last_level); + + for (l = 0; l <= pt->last_level; ++l) { + struct nv50_miptree_level *lvl = &mt->level[l]; + unsigned tsx, tsy, tsz; + unsigned nbx = util_format_get_nblocksx(pt->format, w); + unsigned nby = util_format_get_nblocksy(pt->format, h); + + lvl->offset = mt->total_size; + + lvl->tile_mode = nvc0_tex_choose_tile_dims(nbx, nby, d); + + tsx = NVC0_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */ + tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode); + tsz = NVC0_TILE_SIZE_Z(lvl->tile_mode); + + lvl->pitch = align(nbx * blocksize, tsx); + + mt->total_size += lvl->pitch * align(nby, tsy) * align(d, tsz); + + w = u_minify(w, 1); + h = u_minify(h, 1); + d = u_minify(d, 1); + } + + if (pt->array_size > 1) { + mt->layer_stride = align(mt->total_size, + NVC0_TILE_SIZE(mt->level[0].tile_mode)); + mt->total_size = mt->layer_stride * pt->array_size; + } +} + +const struct u_resource_vtbl nvc0_miptree_vtbl = +{ + nv50_miptree_get_handle, /* get_handle */ + nv50_miptree_destroy, /* resource_destroy */ + nvc0_miptree_transfer_map, /* transfer_map */ + u_default_transfer_flush_region, /* transfer_flush_region */ + nvc0_miptree_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + +struct pipe_resource * +nvc0_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ) +{ + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); + struct pipe_resource *pt = &mt->base.base; + boolean compressed = dev->drm_version >= 0x01000101; + int ret; + union nouveau_bo_config bo_config; + uint32_t bo_flags; + + if (!mt) + return NULL; + + mt->base.vtbl = &nvc0_miptree_vtbl; + *pt = *templ; + pipe_reference_init(&pt->reference, 1); + pt->screen = pscreen; + + if (pt->usage == PIPE_USAGE_STAGING) { + switch (pt->target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + if (pt->last_level == 0 && + !util_format_is_depth_or_stencil(pt->format) && + pt->nr_samples <= 1) + pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR; + break; + default: + break; + } + } + + if (pt->bind & PIPE_BIND_LINEAR) + pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR; + + bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed); + + if (!nvc0_miptree_init_ms_mode(mt)) { + FREE(mt); + return NULL; + } + + if (unlikely(pt->flags & NVC0_RESOURCE_FLAG_VIDEO)) { + nvc0_miptree_init_layout_video(mt); + } else + if (likely(bo_config.nvc0.memtype)) { + nvc0_miptree_init_layout_tiled(mt); + } else + if (!nv50_miptree_init_layout_linear(mt, 128)) { + FREE(mt); + return NULL; + } + bo_config.nvc0.tile_mode = mt->level[0].tile_mode; + + if (!bo_config.nvc0.memtype && pt->usage == PIPE_USAGE_STAGING) + mt->base.domain = NOUVEAU_BO_GART; + else + mt->base.domain = NOUVEAU_BO_VRAM; + + bo_flags = mt->base.domain | NOUVEAU_BO_NOSNOOP; + + if (mt->base.base.bind & (PIPE_BIND_CURSOR | PIPE_BIND_DISPLAY_TARGET)) + bo_flags |= NOUVEAU_BO_CONTIG; + + ret = nouveau_bo_new(dev, bo_flags, 4096, mt->total_size, &bo_config, + &mt->base.bo); + if (ret) { + FREE(mt); + return NULL; + } + mt->base.address = mt->base.bo->offset; + + NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_count, 1); + NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_bytes, + mt->total_size); + + return pt; +} + +/* Offset of zslice @z from start of level @l. */ +INLINE unsigned +nvc0_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z) +{ + const struct pipe_resource *pt = &mt->base.base; + + unsigned tds = NVC0_TILE_SHIFT_Z(mt->level[l].tile_mode); + unsigned ths = NVC0_TILE_SHIFT_Y(mt->level[l].tile_mode); + + unsigned nby = util_format_get_nblocksy(pt->format, + u_minify(pt->height0, l)); + + /* to next 2D tile slice within a 3D tile */ + unsigned stride_2d = NVC0_TILE_SIZE_2D(mt->level[l].tile_mode); + + /* to slice in the next (in z direction) 3D tile */ + unsigned stride_3d = (align(nby, (1 << ths)) * mt->level[l].pitch) << tds; + + return (z & (1 << (tds - 1))) * stride_2d + (z >> tds) * stride_3d; +} + +/* Surface functions. + */ + +struct pipe_surface * +nvc0_miptree_surface_new(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *templ) +{ + struct nv50_surface *ns = nv50_surface_from_miptree(nv50_miptree(pt), templ); + if (!ns) + return NULL; + ns->base.context = pipe; + return &ns->base; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c new file mode 100644 index 00000000000..71deb3485d5 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -0,0 +1,811 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" + +#include "nvc0/nvc0_context.h" + +#include "codegen/nv50_ir_driver.h" +#include "nvc0/nve4_compute.h" + +/* NOTE: Using a[0x270] in FP may cause an error even if we're using less than + * 124 scalar varying values. + */ +static uint32_t +nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase) +{ + switch (sn) { + case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; + case TGSI_SEMANTIC_PRIMID: return 0x060; + case TGSI_SEMANTIC_PSIZE: return 0x06c; + case TGSI_SEMANTIC_POSITION: return 0x070; + case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; + case TGSI_SEMANTIC_FOG: return 0x2e8; + case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; + case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; + case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4; + case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; + case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; + case TGSI_SEMANTIC_PCOORD: return 0x2e0; + case NV50_SEMANTIC_TESSCOORD: return 0x2f0; + case TGSI_SEMANTIC_INSTANCEID: return 0x2f8; + case TGSI_SEMANTIC_VERTEXID: return 0x2fc; + case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; + case TGSI_SEMANTIC_FACE: return 0x3fc; + case NV50_SEMANTIC_INVOCATIONID: return ~0; + default: + assert(!"invalid TGSI input semantic"); + return ~0; + } +} + +static uint32_t +nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase) +{ + switch (sn) { + case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; + case TGSI_SEMANTIC_PRIMID: return 0x060; + case NV50_SEMANTIC_LAYER: return 0x064; + case NV50_SEMANTIC_VIEWPORTINDEX: return 0x068; + case TGSI_SEMANTIC_PSIZE: return 0x06c; + case TGSI_SEMANTIC_POSITION: return 0x070; + case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; + case TGSI_SEMANTIC_FOG: return 0x2e8; + case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; + case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; + case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4; + case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; + case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; + case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; + case TGSI_SEMANTIC_EDGEFLAG: return ~0; + default: + assert(!"invalid TGSI output semantic"); + return ~0; + } +} + +static int +nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info) +{ + unsigned i, c, n; + + for (n = 0, i = 0; i < info->numInputs; ++i) { + switch (info->in[i].sn) { + case TGSI_SEMANTIC_INSTANCEID: /* for SM4 only, in TGSI they're SVs */ + case TGSI_SEMANTIC_VERTEXID: + info->in[i].mask = 0x1; + info->in[i].slot[0] = + nvc0_shader_input_address(info->in[i].sn, 0, 0) / 4; + continue; + default: + break; + } + for (c = 0; c < 4; ++c) + info->in[i].slot[c] = (0x80 + n * 0x10 + c * 0x4) / 4; + ++n; + } + + return 0; +} + +static int +nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info) +{ + unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10); + unsigned offset; + unsigned i, c; + + for (i = 0; i < info->numInputs; ++i) { + offset = nvc0_shader_input_address(info->in[i].sn, + info->in[i].si, ubase); + if (info->in[i].patch && offset >= 0x20) + offset = 0x20 + info->in[i].si * 0x10; + + if (info->in[i].sn == NV50_SEMANTIC_TESSCOORD) + info->in[i].mask &= 3; + + for (c = 0; c < 4; ++c) + info->in[i].slot[c] = (offset + c * 0x4) / 4; + } + + return 0; +} + +static int +nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) +{ + unsigned count = info->prop.fp.numColourResults * 4; + unsigned i, c; + + for (i = 0; i < info->numOutputs; ++i) + if (info->out[i].sn == TGSI_SEMANTIC_COLOR) + for (c = 0; c < 4; ++c) + info->out[i].slot[c] = info->out[i].si * 4 + c; + + if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) + info->out[info->io.sampleMask].slot[0] = count++; + else + if (info->target >= 0xe0) + count++; /* on Kepler, depth is always last colour reg + 2 */ + + if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) + info->out[info->io.fragDepth].slot[2] = count; + + return 0; +} + +static int +nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info) +{ + unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10); + unsigned offset; + unsigned i, c; + + for (i = 0; i < info->numOutputs; ++i) { + offset = nvc0_shader_output_address(info->out[i].sn, + info->out[i].si, ubase); + if (info->out[i].patch && offset >= 0x20) + offset = 0x20 + info->out[i].si * 0x10; + + for (c = 0; c < 4; ++c) + info->out[i].slot[c] = (offset + c * 0x4) / 4; + } + + return 0; +} + +static int +nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info) +{ + int ret; + + if (info->type == PIPE_SHADER_VERTEX) + ret = nvc0_vp_assign_input_slots(info); + else + ret = nvc0_sp_assign_input_slots(info); + if (ret) + return ret; + + if (info->type == PIPE_SHADER_FRAGMENT) + ret = nvc0_fp_assign_output_slots(info); + else + ret = nvc0_sp_assign_output_slots(info); + return ret; +} + +static INLINE void +nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot) +{ + uint8_t min = (vp->hdr[4] >> 12) & 0xff; + uint8_t max = (vp->hdr[4] >> 24); + + min = MIN2(min, slot); + max = MAX2(max, slot); + + vp->hdr[4] = (max << 24) | (min << 12); +} + +/* Common part of header generation for VP, TCP, TEP and GP. */ +static int +nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) +{ + unsigned i, c, a; + + for (i = 0; i < info->numInputs; ++i) { + if (info->in[i].patch) + continue; + for (c = 0; c < 4; ++c) { + a = info->in[i].slot[c]; + if (info->in[i].mask & (1 << c)) { + if (info->in[i].sn != NV50_SEMANTIC_TESSCOORD) + vp->hdr[5 + a / 32] |= 1 << (a % 32); + else + nvc0_vtgp_hdr_update_oread(vp, info->in[i].slot[c]); + } + } + } + + for (i = 0; i < info->numOutputs; ++i) { + if (info->out[i].patch) + continue; + for (c = 0; c < 4; ++c) { + if (!(info->out[i].mask & (1 << c))) + continue; + assert(info->out[i].slot[c] >= 0x40 / 4); + a = info->out[i].slot[c] - 0x40 / 4; + vp->hdr[13 + a / 32] |= 1 << (a % 32); + if (info->out[i].oread) + nvc0_vtgp_hdr_update_oread(vp, info->out[i].slot[c]); + } + } + + for (i = 0; i < info->numSysVals; ++i) { + switch (info->sv[i].sn) { + case TGSI_SEMANTIC_PRIMID: + vp->hdr[5] |= 1 << 24; + break; + case TGSI_SEMANTIC_INSTANCEID: + vp->hdr[10] |= 1 << 30; + break; + case TGSI_SEMANTIC_VERTEXID: + vp->hdr[10] |= 1 << 31; + break; + default: + break; + } + } + + vp->vp.clip_enable = info->io.clipDistanceMask; + for (i = 0; i < 8; ++i) + if (info->io.cullDistanceMask & (1 << i)) + vp->vp.clip_mode |= 1 << (i * 4); + + if (info->io.genUserClip < 0) + vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */ + + return 0; +} + +static int +nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) +{ + vp->hdr[0] = 0x20061 | (1 << 10); + vp->hdr[4] = 0xff000; + + vp->hdr[18] = info->io.clipDistanceMask; + + return nvc0_vtgp_gen_header(vp, info); +} + +#if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN) +static void +nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info) +{ + if (info->prop.tp.outputPrim == PIPE_PRIM_MAX) { + tp->tp.tess_mode = ~0; + return; + } + switch (info->prop.tp.domain) { + case PIPE_PRIM_LINES: + tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_ISOLINES; + break; + case PIPE_PRIM_TRIANGLES: + tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES; + if (info->prop.tp.winding > 0) + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW; + break; + case PIPE_PRIM_QUADS: + tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS; + break; + default: + tp->tp.tess_mode = ~0; + return; + } + if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS) + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED; + + switch (info->prop.tp.partitioning) { + case PIPE_TESS_PART_INTEGER: + case PIPE_TESS_PART_POW2: + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL; + break; + case PIPE_TESS_PART_FRACT_ODD: + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD; + break; + case PIPE_TESS_PART_FRACT_EVEN: + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN; + break; + default: + assert(!"invalid tessellator partitioning"); + break; + } +} +#endif + +#ifdef PIPE_SHADER_HULL +static int +nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info) +{ + unsigned opcs = 6; /* output patch constants (at least the TessFactors) */ + + tcp->tp.input_patch_size = info->prop.tp.inputPatchSize; + + if (info->numPatchConstants) + opcs = 8 + info->numPatchConstants * 4; + + tcp->hdr[0] = 0x20061 | (2 << 10); + + tcp->hdr[1] = opcs << 24; + tcp->hdr[2] = info->prop.tp.outputPatchSize << 24; + + tcp->hdr[4] = 0xff000; /* initial min/max parallel output read address */ + + nvc0_vtgp_gen_header(tcp, info); + + nvc0_tp_get_tess_mode(tcp, info); + + return 0; +} +#endif + +#ifdef PIPE_SHADER_DOMAIN +static int +nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info) +{ + tep->tp.input_patch_size = ~0; + + tep->hdr[0] = 0x20061 | (3 << 10); + tep->hdr[4] = 0xff000; + + nvc0_vtgp_gen_header(tep, info); + + nvc0_tp_get_tess_mode(tep, info); + + tep->hdr[18] |= 0x3 << 12; /* ? */ + + return 0; +} +#endif + +static int +nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info) +{ + gp->hdr[0] = 0x20061 | (4 << 10); + + gp->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24; + + switch (info->prop.gp.outputPrim) { + case PIPE_PRIM_POINTS: + gp->hdr[3] = 0x01000000; + gp->hdr[0] |= 0xf0000000; + break; + case PIPE_PRIM_LINE_STRIP: + gp->hdr[3] = 0x06000000; + gp->hdr[0] |= 0x10000000; + break; + case PIPE_PRIM_TRIANGLE_STRIP: + gp->hdr[3] = 0x07000000; + gp->hdr[0] |= 0x10000000; + break; + default: + assert(0); + break; + } + + gp->hdr[4] = info->prop.gp.maxVertices & 0x1ff; + + return nvc0_vtgp_gen_header(gp, info); +} + +#define NVC0_INTERP_FLAT (1 << 0) +#define NVC0_INTERP_PERSPECTIVE (2 << 0) +#define NVC0_INTERP_LINEAR (3 << 0) +#define NVC0_INTERP_CENTROID (1 << 2) + +static uint8_t +nvc0_hdr_interp_mode(const struct nv50_ir_varying *var) +{ + if (var->linear) + return NVC0_INTERP_LINEAR; + if (var->flat) + return NVC0_INTERP_FLAT; + return NVC0_INTERP_PERSPECTIVE; +} + +static int +nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) +{ + unsigned i, c, a, m; + + /* just 00062 on Kepler */ + fp->hdr[0] = 0x20062 | (5 << 10); + fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ + + if (info->prop.fp.usesDiscard) + fp->hdr[0] |= 0x8000; + if (info->prop.fp.numColourResults > 1) + fp->hdr[0] |= 0x4000; + if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) + fp->hdr[19] |= 0x1; + if (info->prop.fp.writesDepth) { + fp->hdr[19] |= 0x2; + fp->flags[0] = 0x11; /* deactivate ZCULL */ + } + + for (i = 0; i < info->numInputs; ++i) { + m = nvc0_hdr_interp_mode(&info->in[i]); + for (c = 0; c < 4; ++c) { + if (!(info->in[i].mask & (1 << c))) + continue; + a = info->in[i].slot[c]; + if (info->in[i].slot[0] >= (0x060 / 4) && + info->in[i].slot[0] <= (0x07c / 4)) { + fp->hdr[5] |= 1 << (24 + (a - 0x060 / 4)); + } else + if (info->in[i].slot[0] >= (0x2c0 / 4) && + info->in[i].slot[0] <= (0x2fc / 4)) { + fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000; + } else { + if (info->in[i].slot[c] < (0x040 / 4) || + info->in[i].slot[c] > (0x380 / 4)) + continue; + a *= 2; + if (info->in[i].slot[0] >= (0x300 / 4)) + a -= 32; + fp->hdr[4 + a / 32] |= m << (a % 32); + } + } + } + + for (i = 0; i < info->numOutputs; ++i) { + if (info->out[i].sn == TGSI_SEMANTIC_COLOR) + fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0]; + } + + fp->fp.early_z = info->prop.fp.earlyFragTests; + + return 0; +} + +static struct nvc0_transform_feedback_state * +nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info, + const struct pipe_stream_output_info *pso) +{ + struct nvc0_transform_feedback_state *tfb; + unsigned b, i, c; + + tfb = MALLOC_STRUCT(nvc0_transform_feedback_state); + if (!tfb) + return NULL; + for (b = 0; b < 4; ++b) { + tfb->stride[b] = pso->stride[b] * 4; + tfb->varying_count[b] = 0; + } + memset(tfb->varying_index, 0xff, sizeof(tfb->varying_index)); /* = skip */ + + for (i = 0; i < pso->num_outputs; ++i) { + unsigned s = pso->output[i].start_component; + unsigned p = pso->output[i].dst_offset; + b = pso->output[i].output_buffer; + + for (c = 0; c < pso->output[i].num_components; ++c) + tfb->varying_index[b][p++] = + info->out[pso->output[i].register_index].slot[s + c]; + + tfb->varying_count[b] = MAX2(tfb->varying_count[b], p); + } + for (b = 0; b < 4; ++b) // zero unused indices (looks nicer) + for (c = tfb->varying_count[b]; c & 3; ++c) + tfb->varying_index[b][c] = 0; + + return tfb; +} + +#ifdef DEBUG +static void +nvc0_program_dump(struct nvc0_program *prog) +{ + unsigned pos; + + if (prog->type != PIPE_SHADER_COMPUTE) { + for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos) + debug_printf("HDR[%02lx] = 0x%08x\n", + pos * sizeof(prog->hdr[0]), prog->hdr[pos]); + } + debug_printf("shader binary code (0x%x bytes):", prog->code_size); + for (pos = 0; pos < prog->code_size / 4; ++pos) { + if ((pos % 8) == 0) + debug_printf("\n"); + debug_printf("%08x ", prog->code[pos]); + } + debug_printf("\n"); +} +#endif + +boolean +nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) +{ + struct nv50_ir_prog_info *info; + int ret; + + info = CALLOC_STRUCT(nv50_ir_prog_info); + if (!info) + return FALSE; + + info->type = prog->type; + info->target = chipset; + info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; + info->bin.source = (void *)prog->pipe.tokens; + + info->io.genUserClip = prog->vp.num_ucps; + info->io.ucpBase = 256; + info->io.ucpCBSlot = 15; + + if (prog->type == PIPE_SHADER_COMPUTE) { + if (chipset >= NVISA_GK104_CHIPSET) { + info->io.resInfoCBSlot = 0; + info->io.texBindBase = NVE4_CP_INPUT_TEX(0); + info->io.suInfoBase = NVE4_CP_INPUT_SUF(0); + info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0); + } + info->io.msInfoCBSlot = 0; + info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS; + } else { + if (chipset >= NVISA_GK104_CHIPSET) { + info->io.resInfoCBSlot = 15; + info->io.texBindBase = 0x20; + info->io.suInfoBase = 0; /* TODO */ + } + info->io.msInfoCBSlot = 15; + info->io.msInfoBase = 0; /* TODO */ + } + + info->assignSlots = nvc0_program_assign_varying_slots; + +#ifdef DEBUG + info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); + info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); +#else + info->optLevel = 3; +#endif + + ret = nv50_ir_generate_code(info); + if (ret) { + NOUVEAU_ERR("shader translation failed: %i\n", ret); + goto out; + } + if (prog->type != PIPE_SHADER_COMPUTE) + FREE(info->bin.syms); + + prog->code = info->bin.code; + prog->code_size = info->bin.codeSize; + prog->immd_data = info->immd.buf; + prog->immd_size = info->immd.bufSize; + prog->relocs = info->bin.relocData; + prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); + prog->num_barriers = info->numBarriers; + + prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; + + if (info->io.edgeFlagOut < PIPE_MAX_ATTRIBS) + info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */ + prog->vp.edgeflag = info->io.edgeFlagIn; + + switch (prog->type) { + case PIPE_SHADER_VERTEX: + ret = nvc0_vp_gen_header(prog, info); + break; +#ifdef PIPE_SHADER_HULL + case PIPE_SHADER_HULL: + ret = nvc0_tcp_gen_header(prog, info); + break; +#endif +#ifdef PIPE_SHADER_DOMAIN + case PIPE_SHADER_DOMAIN: + ret = nvc0_tep_gen_header(prog, info); + break; +#endif + case PIPE_SHADER_GEOMETRY: + ret = nvc0_gp_gen_header(prog, info); + break; + case PIPE_SHADER_FRAGMENT: + ret = nvc0_fp_gen_header(prog, info); + break; + case PIPE_SHADER_COMPUTE: + prog->cp.syms = info->bin.syms; + prog->cp.num_syms = info->bin.numSyms; + break; + default: + ret = -1; + NOUVEAU_ERR("unknown program type: %u\n", prog->type); + break; + } + if (ret) + goto out; + + if (info->bin.tlsSpace) { + assert(info->bin.tlsSpace < (1 << 24)); + prog->hdr[0] |= 1 << 26; + prog->hdr[1] |= info->bin.tlsSpace; /* l[] size */ + prog->need_tls = TRUE; + } + /* TODO: factor 2 only needed where joinat/precont is used, + * and we only have to count non-uniform branches + */ + /* + if ((info->maxCFDepth * 2) > 16) { + prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200; + prog->need_tls = TRUE; + } + */ + if (info->io.globalAccess) + prog->hdr[0] |= 1 << 16; + + if (prog->pipe.stream_output.num_outputs) + prog->tfb = nvc0_program_create_tfb_state(info, + &prog->pipe.stream_output); + +out: + FREE(info); + return !ret; +} + +boolean +nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ + struct nvc0_screen *screen = nvc0->screen; + const boolean is_cp = prog->type == PIPE_SHADER_COMPUTE; + int ret; + uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); + uint32_t lib_pos = screen->lib_code->start; + uint32_t code_pos; + + /* c[] bindings need to be aligned to 0x100, but we could use relocations + * to save space. */ + if (prog->immd_size) { + prog->immd_base = size; + size = align(size, 0x40); + size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */ + } + /* On Fermi, SP_START_ID must be aligned to 0x40. + * On Kepler, the first instruction must be aligned to 0x80 because + * latency information is expected only at certain positions. + */ + if (screen->base.class_3d >= NVE4_3D_CLASS) + size = size + (is_cp ? 0x40 : 0x70); + size = align(size, 0x40); + + ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem); + if (ret) { + struct nouveau_heap *heap = screen->text_heap; + struct nouveau_heap *iter; + for (iter = heap; iter && iter->next != heap; iter = iter->next) { + struct nvc0_program *evict = iter->priv; + if (evict) + nouveau_heap_free(&evict->mem); + } + debug_printf("WARNING: out of code space, evicting all shaders.\n"); + ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); + if (ret) { + NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); + return FALSE; + } + IMMED_NVC0(nvc0->base.pushbuf, NVC0_3D(SERIALIZE), 0); + } + prog->code_base = prog->mem->start; + prog->immd_base = align(prog->mem->start + prog->immd_base, 0x100); + assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <= + prog->mem->start + prog->mem->size)); + + if (!is_cp) { + if (screen->base.class_3d >= NVE4_3D_CLASS) { + switch (prog->mem->start & 0xff) { + case 0x40: prog->code_base += 0x70; break; + case 0x80: prog->code_base += 0x30; break; + case 0xc0: prog->code_base += 0x70; break; + default: + prog->code_base += 0x30; + assert((prog->mem->start & 0xff) == 0x00); + break; + } + } + code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE; + } else { + if (screen->base.class_3d >= NVE4_3D_CLASS) { + if (prog->mem->start & 0x40) + prog->code_base += 0x40; + assert((prog->code_base & 0x7f) == 0x00); + } + code_pos = prog->code_base; + } + + if (prog->relocs) + nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0); + +#ifdef DEBUG + if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE)) + nvc0_program_dump(prog); +#endif + + if (!is_cp) + nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base, + NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); + nvc0->base.push_data(&nvc0->base, screen->text, code_pos, + NOUVEAU_BO_VRAM, prog->code_size, prog->code); + if (prog->immd_size) + nvc0->base.push_data(&nvc0->base, + screen->text, prog->immd_base, NOUVEAU_BO_VRAM, + prog->immd_size, prog->immd_data); + + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1); + PUSH_DATA (nvc0->base.pushbuf, 0x1011); + + return TRUE; +} + +/* Upload code for builtin functions like integer division emulation. */ +void +nvc0_program_library_upload(struct nvc0_context *nvc0) +{ + struct nvc0_screen *screen = nvc0->screen; + int ret; + uint32_t size; + const uint32_t *code; + + if (screen->lib_code) + return; + + nv50_ir_get_target_library(screen->base.device->chipset, &code, &size); + if (!size) + return; + + ret = nouveau_heap_alloc(screen->text_heap, align(size, 0x100), NULL, + &screen->lib_code); + if (ret) + return; + + nvc0->base.push_data(&nvc0->base, + screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, + size, code); + /* no need for a memory barrier, will be emitted with first program */ +} + +void +nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ + const struct pipe_shader_state pipe = prog->pipe; + const ubyte type = prog->type; + + if (prog->mem) + nouveau_heap_free(&prog->mem); + if (prog->code) + FREE(prog->code); /* may be 0 for hardcoded shaders */ + FREE(prog->immd_data); + FREE(prog->relocs); + if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms) + FREE(prog->cp.syms); + if (prog->tfb) { + if (nvc0->state.tfb == prog->tfb) + nvc0->state.tfb = NULL; + FREE(prog->tfb); + } + + memset(prog, 0, sizeof(*prog)); + + prog->pipe = pipe; + prog->type = type; +} + +uint32_t +nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label) +{ + const struct nv50_ir_prog_symbol *syms = + (const struct nv50_ir_prog_symbol *)prog->cp.syms; + unsigned base = 0; + unsigned i; + if (prog->type != PIPE_SHADER_COMPUTE) + base = NVC0_SHADER_HEADER_SIZE; + for (i = 0; i < prog->cp.num_syms; ++i) + if (syms[i].label == label) + return prog->code_base + base + syms[i].offset; + return prog->code_base; /* no symbols or symbol not found */ +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h new file mode 100644 index 00000000000..9c184d1f1d5 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h @@ -0,0 +1,68 @@ + +#ifndef __NVC0_PROGRAM_H__ +#define __NVC0_PROGRAM_H__ + +#include "pipe/p_state.h" + +#define NVC0_CAP_MAX_PROGRAM_TEMPS 128 + + +struct nvc0_transform_feedback_state { + uint32_t stride[4]; + uint8_t varying_count[4]; + uint8_t varying_index[4][128]; +}; + + +#define NVC0_SHADER_HEADER_SIZE (20 * 4) + +struct nvc0_program { + struct pipe_shader_state pipe; + + ubyte type; + boolean translated; + boolean need_tls; + uint8_t num_gprs; + + uint32_t *code; + uint32_t *immd_data; + unsigned code_base; + unsigned code_size; + unsigned immd_base; + unsigned immd_size; /* size of immediate array data */ + unsigned parm_size; /* size of non-bindable uniforms (c0[]) */ + + uint32_t hdr[20]; + uint32_t flags[2]; + + struct { + uint32_t clip_mode; /* clip/cull selection */ + uint8_t clip_enable; /* mask of defined clip planes */ + uint8_t num_ucps; /* also set to max if ClipDistance is used */ + uint8_t edgeflag; /* attribute index of edgeflag input */ + boolean need_vertex_id; + } vp; + struct { + uint8_t early_z; + uint8_t in_pos[PIPE_MAX_SHADER_INPUTS]; + } fp; + struct { + uint32_t tess_mode; /* ~0 if defined by the other stage */ + uint32_t input_patch_size; + } tp; + struct { + uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */ + uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */ + void *syms; + unsigned num_syms; + } cp; + uint8_t num_barriers; + + void *relocs; + + struct nvc0_transform_feedback_state *tfb; + + struct nouveau_heap *mem; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_push.c b/src/gallium/drivers/nouveau/nvc0/nvc0_push.c new file mode 100644 index 00000000000..15e8be6968d --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_push.c @@ -0,0 +1,409 @@ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_resource.h" + +#include "nvc0/nvc0_3d.xml.h" + +struct push_context { + struct nouveau_pushbuf *push; + + void *idxbuf; + + uint32_t vertex_words; + uint32_t packet_vertex_limit; + + struct translate *translate; + + boolean primitive_restart; + boolean need_vertex_id; + uint32_t prim; + uint32_t restart_index; + uint32_t instance_id; + + struct { + int buffer; + float value; + uint8_t *data; + unsigned offset; + unsigned stride; + } edgeflag; +}; + +static void +init_push_context(struct nvc0_context *nvc0, struct push_context *ctx) +{ + struct pipe_vertex_element *ve; + + ctx->push = nvc0->base.pushbuf; + ctx->translate = nvc0->vertex->translate; + + if (likely(nvc0->vertex->num_elements < 32)) + ctx->need_vertex_id = nvc0->vertprog->vp.need_vertex_id; + else + ctx->need_vertex_id = FALSE; + + ctx->edgeflag.buffer = -1; + ctx->edgeflag.value = 0.5f; + + if (unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) { + ve = &nvc0->vertex->element[nvc0->vertprog->vp.edgeflag].pipe; + ctx->edgeflag.buffer = ve->vertex_buffer_index; + ctx->edgeflag.offset = ve->src_offset; + ctx->packet_vertex_limit = 1; + } else { + ctx->packet_vertex_limit = nvc0->vertex->vtx_per_packet_max; + if (unlikely(ctx->need_vertex_id)) + ctx->packet_vertex_limit = 1; + } + + ctx->vertex_words = nvc0->vertex->vtx_size; +} + +static INLINE void +set_edgeflag(struct push_context *ctx, unsigned vtx_id) +{ + float f = *(float *)(ctx->edgeflag.data + vtx_id * ctx->edgeflag.stride); + + if (ctx->edgeflag.value != f) { + ctx->edgeflag.value = f; + IMMED_NVC0(ctx->push, NVC0_3D(EDGEFLAG), f ? 1 : 0); + } +} + +static INLINE void +set_vertexid(struct push_context *ctx, uint32_t vtx_id) +{ +#if 0 + BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_ID), 1); /* broken on nvc0 */ +#else + BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_DATA), 1); /* as last attribute */ +#endif + PUSH_DATA (ctx->push, vtx_id); +} + +static INLINE unsigned +prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static void +emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) +{ + uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i08(elts, push, ctx->restart_index); + + if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr)) + set_edgeflag(ctx, elts[0]); + + size = ctx->vertex_words * nr; + + BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size); + + ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id, + ctx->push->cur); + ctx->push->cur += size; + + if (unlikely(ctx->need_vertex_id) && likely(size)) + set_vertexid(ctx, elts[0]); + + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_END_GL), 2); + PUSH_DATA (ctx->push, 0); + PUSH_DATA (ctx->push, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); + } + } +} + +static void +emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) +{ + uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i16(elts, push, ctx->restart_index); + + if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr)) + set_edgeflag(ctx, elts[0]); + + size = ctx->vertex_words * nr; + + BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size); + + ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id, + ctx->push->cur); + ctx->push->cur += size; + + if (unlikely(ctx->need_vertex_id)) + set_vertexid(ctx, elts[0]); + + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_END_GL), 2); + PUSH_DATA (ctx->push, 0); + PUSH_DATA (ctx->push, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); + } + } +} + +static void +emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) +{ + uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i32(elts, push, ctx->restart_index); + + if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr)) + set_edgeflag(ctx, elts[0]); + + size = ctx->vertex_words * nr; + + BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size); + + ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id, + ctx->push->cur); + ctx->push->cur += size; + + if (unlikely(ctx->need_vertex_id)) + set_vertexid(ctx, elts[0]); + + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_END_GL), 2); + PUSH_DATA (ctx->push, 0); + PUSH_DATA (ctx->push, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); + } + } +} + +static void +emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) +{ + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size = ctx->vertex_words * push; + + if (unlikely(ctx->edgeflag.buffer >= 0)) + set_edgeflag(ctx, start); + + BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size); + + ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id, + ctx->push->cur); + ctx->push->cur += size; + + if (unlikely(ctx->need_vertex_id)) + set_vertexid(ctx, start); + + count -= push; + start += push; + } +} + + +#define NVC0_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ + switch (prim) { + NVC0_PRIM_GL_CASE(POINTS); + NVC0_PRIM_GL_CASE(LINES); + NVC0_PRIM_GL_CASE(LINE_LOOP); + NVC0_PRIM_GL_CASE(LINE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLES); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLE_FAN); + NVC0_PRIM_GL_CASE(QUADS); + NVC0_PRIM_GL_CASE(QUAD_STRIP); + NVC0_PRIM_GL_CASE(POLYGON); + NVC0_PRIM_GL_CASE(LINES_ADJACENCY); + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NVC0_PRIM_GL_CASE(PATCHES); */ + default: + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } +} + +void +nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) +{ + struct push_context ctx; + unsigned i, index_size; + unsigned inst_count = info->instance_count; + unsigned vert_count = info->count; + boolean apply_bias = info->indexed && info->index_bias; + + init_push_context(nvc0, &ctx); + + for (i = 0; i < nvc0->num_vtxbufs; ++i) { + uint8_t *data; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; + struct nv04_resource *res = nv04_resource(vb->buffer); + + data = nouveau_resource_map_offset(&nvc0->base, res, + vb->buffer_offset, NOUVEAU_BO_RD); + + if (apply_bias && likely(!(nvc0->vertex->instance_bufs & (1 << i)))) + data += info->index_bias * vb->stride; + + ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); + + if (unlikely(i == ctx.edgeflag.buffer)) { + ctx.edgeflag.data = data + ctx.edgeflag.offset; + ctx.edgeflag.stride = vb->stride; + } + } + + if (info->indexed) { + ctx.idxbuf = + nouveau_resource_map_offset(&nvc0->base, + nv04_resource(nvc0->idxbuf.buffer), + nvc0->idxbuf.offset, NOUVEAU_BO_RD); + if (!ctx.idxbuf) + return; + index_size = nvc0->idxbuf.index_size; + ctx.primitive_restart = info->primitive_restart; + ctx.restart_index = info->restart_index; + } else { + ctx.idxbuf = NULL; + index_size = 0; + ctx.primitive_restart = FALSE; + ctx.restart_index = 0; + + if (info->count_from_stream_output) { + struct pipe_context *pipe = &nvc0->base.pipe; + struct nvc0_so_target *targ; + targ = nvc0_so_target(info->count_from_stream_output); + pipe->get_query_result(pipe, targ->pq, TRUE, (void*)&vert_count); + vert_count /= targ->stride; + } + } + + ctx.instance_id = info->start_instance; + ctx.prim = nvc0_prim_gl(info->mode); + + if (unlikely(ctx.need_vertex_id)) { + const unsigned a = nvc0->vertex->num_elements; + BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1); + PUSH_DATA (ctx.push, (a << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) | + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); + BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 1); + PUSH_DATA (ctx.push, (((0x80 + a * 0x10) / 4) << 4) | 1); + } + + while (inst_count--) { + BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (ctx.push, ctx.prim); + switch (index_size) { + case 0: + emit_vertices_seq(&ctx, info->start, vert_count); + break; + case 1: + emit_vertices_i08(&ctx, info->start, vert_count); + break; + case 2: + emit_vertices_i16(&ctx, info->start, vert_count); + break; + case 4: + emit_vertices_i32(&ctx, info->start, vert_count); + break; + default: + assert(0); + break; + } + IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0); + + ctx.instance_id++; + ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + + if (unlikely(ctx.edgeflag.value == 0.0f)) + IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1); + + if (unlikely(ctx.need_vertex_id)) { + const unsigned a = nvc0->vertex->num_elements; + IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0); + BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1); + PUSH_DATA (ctx.push, + NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST | + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); + } + + if (info->indexed) + nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer)); + + for (i = 0; i < nvc0->num_vtxbufs; ++i) + nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer)); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c new file mode 100644 index 00000000000..21aa3580e7c --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -0,0 +1,1448 @@ +/* + * Copyright 2011 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christoph Bumiller + */ + +#define NVC0_PUSH_EXPLICIT_SPACE_CHECKING + +#include "nvc0/nvc0_context.h" +#include "nv_object.xml.h" +#include "nvc0/nve4_compute.xml.h" +#include "nvc0/nvc0_compute.xml.h" + +#define NVC0_QUERY_STATE_READY 0 +#define NVC0_QUERY_STATE_ACTIVE 1 +#define NVC0_QUERY_STATE_ENDED 2 +#define NVC0_QUERY_STATE_FLUSHED 3 + +struct nvc0_query { + uint32_t *data; + uint16_t type; + uint16_t index; + int8_t ctr[4]; + uint32_t sequence; + struct nouveau_bo *bo; + uint32_t base; + uint32_t offset; /* base + i * rotate */ + uint8_t state; + boolean is64bit; + uint8_t rotate; + int nesting; /* only used for occlusion queries */ + union { + struct nouveau_mm_allocation *mm; + uint64_t value; + } u; + struct nouveau_fence *fence; +}; + +#define NVC0_QUERY_ALLOC_SPACE 256 + +static void nvc0_mp_pm_query_begin(struct nvc0_context *, struct nvc0_query *); +static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *); +static boolean nvc0_mp_pm_query_result(struct nvc0_context *, + struct nvc0_query *, void *, boolean); + +static INLINE struct nvc0_query * +nvc0_query(struct pipe_query *pipe) +{ + return (struct nvc0_query *)pipe; +} + +static boolean +nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size) +{ + struct nvc0_screen *screen = nvc0->screen; + int ret; + + if (q->bo) { + nouveau_bo_ref(NULL, &q->bo); + if (q->u.mm) { + if (q->state == NVC0_QUERY_STATE_READY) + nouveau_mm_free(q->u.mm); + else + nouveau_fence_work(screen->base.fence.current, + nouveau_mm_free_work, q->u.mm); + } + } + if (size) { + q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base); + if (!q->bo) + return FALSE; + q->offset = q->base; + + ret = nouveau_bo_map(q->bo, 0, screen->base.client); + if (ret) { + nvc0_query_allocate(nvc0, q, 0); + return FALSE; + } + q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base); + } + return TRUE; +} + +static void +nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0); + nouveau_fence_ref(NULL, &nvc0_query(pq)->fence); + FREE(nvc0_query(pq)); +} + +static struct pipe_query * +nvc0_query_create(struct pipe_context *pipe, unsigned type) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_query *q; + unsigned space = NVC0_QUERY_ALLOC_SPACE; + + q = CALLOC_STRUCT(nvc0_query); + if (!q) + return NULL; + + switch (type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + q->rotate = 32; + space = NVC0_QUERY_ALLOC_SPACE; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + q->is64bit = TRUE; + space = 512; + break; + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + q->is64bit = TRUE; + space = 64; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + q->is64bit = TRUE; + space = 32; + break; + case PIPE_QUERY_TIME_ELAPSED: + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_GPU_FINISHED: + space = 32; + break; + case NVC0_QUERY_TFB_BUFFER_OFFSET: + space = 16; + break; + default: +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + if (type >= NVC0_QUERY_DRV_STAT(0) && type <= NVC0_QUERY_DRV_STAT_LAST) { + space = 0; + q->is64bit = true; + q->index = type - NVC0_QUERY_DRV_STAT(0); + break; + } else +#endif + if (nvc0->screen->base.device->drm_version >= 0x01000101) { + if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) { + /* for each MP: + * [00] = WS0.C0 + * [04] = WS0.C1 + * [08] = WS0.C2 + * [0c] = WS0.C3 + * [10] = WS1.C0 + * [14] = WS1.C1 + * [18] = WS1.C2 + * [1c] = WS1.C3 + * [20] = WS2.C0 + * [24] = WS2.C1 + * [28] = WS2.C2 + * [2c] = WS2.C3 + * [30] = WS3.C0 + * [34] = WS3.C1 + * [38] = WS3.C2 + * [3c] = WS3.C3 + * [40] = MP.C4 + * [44] = MP.C5 + * [48] = MP.C6 + * [4c] = MP.C7 + * [50] = WS0.sequence + * [54] = WS1.sequence + * [58] = WS2.sequence + * [5c] = WS3.sequence + */ + space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t); + break; + } else + if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) { + /* for each MP: + * [00] = MP.C0 + * [04] = MP.C1 + * [08] = MP.C2 + * [0c] = MP.C3 + * [10] = MP.C4 + * [14] = MP.C5 + * [18] = MP.C6 + * [1c] = MP.C7 + * [20] = MP.sequence + */ + space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t); + break; + } + } + debug_printf("invalid query type: %u\n", type); + FREE(q); + return NULL; + } + if (!nvc0_query_allocate(nvc0, q, space)) { + FREE(q); + return NULL; + } + + q->type = type; + + if (q->rotate) { + /* we advance before query_begin ! */ + q->offset -= q->rotate; + q->data -= q->rotate / sizeof(*q->data); + } else + if (!q->is64bit) + q->data[0] = 0; /* initialize sequence */ + + return (struct pipe_query *)q; +} + +static void +nvc0_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q, + unsigned offset, uint32_t get) +{ + offset += q->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4); + PUSH_DATAh(push, q->bo->offset + offset); + PUSH_DATA (push, q->bo->offset + offset); + PUSH_DATA (push, q->sequence); + PUSH_DATA (push, get); +} + +static void +nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q) +{ + q->offset += q->rotate; + q->data += q->rotate / sizeof(*q->data); + if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE) + nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE); +} + +static void +nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_query *q = nvc0_query(pq); + + /* For occlusion queries we have to change the storage, because a previous + * query might set the initial render conition to FALSE even *after* we re- + * initialized it to TRUE. + */ + if (q->rotate) { + nvc0_query_rotate(nvc0, q); + + /* XXX: can we do this with the GPU, and sync with respect to a previous + * query ? + */ + q->data[0] = q->sequence; /* initialize sequence */ + q->data[1] = 1; /* initial render condition = TRUE */ + q->data[4] = q->sequence + 1; /* for comparison COND_MODE */ + q->data[5] = 0; + } + q->sequence++; + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + q->nesting = nvc0->screen->num_occlusion_queries_active++; + if (q->nesting) { + nvc0_query_get(push, q, 0x10, 0x0100f002); + } else { + PUSH_SPACE(push, 3); + BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1); + PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT); + IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1); + } + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nvc0_query_get(push, q, 0x10, 0x09005002 | (q->index << 5)); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5)); + break; + case PIPE_QUERY_SO_STATISTICS: + nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5)); + nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5)); + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5)); + break; + case PIPE_QUERY_TIME_ELAPSED: + nvc0_query_get(push, q, 0x10, 0x00005002); + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */ + nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */ + nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */ + nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */ + nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */ + nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */ + nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */ + nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */ + nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */ + nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */ + break; + default: +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + if (q->type >= NVC0_QUERY_DRV_STAT(0) && + q->type <= NVC0_QUERY_DRV_STAT_LAST) { + if (q->index >= 5) + q->u.value = nvc0->screen->base.stats.v[q->index]; + else + q->u.value = 0; + } else +#endif + if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) || + (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) { + nvc0_mp_pm_query_begin(nvc0, q); + } + break; + } + q->state = NVC0_QUERY_STATE_ACTIVE; +} + +static void +nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_query *q = nvc0_query(pq); + + if (q->state != NVC0_QUERY_STATE_ACTIVE) { + /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */ + if (q->rotate) + nvc0_query_rotate(nvc0, q); + q->sequence++; + } + q->state = NVC0_QUERY_STATE_ENDED; + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + nvc0_query_get(push, q, 0, 0x0100f002); + if (--nvc0->screen->num_occlusion_queries_active == 0) { + PUSH_SPACE(push, 1); + IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0); + } + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nvc0_query_get(push, q, 0, 0x09005002 | (q->index << 5)); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5)); + break; + case PIPE_QUERY_SO_STATISTICS: + nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5)); + nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5)); + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + /* TODO: How do we sum over all streams for render condition ? */ + /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */ + nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5)); + nvc0_query_get(push, q, 0x20, 0x00005002); + break; + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIME_ELAPSED: + nvc0_query_get(push, q, 0, 0x00005002); + break; + case PIPE_QUERY_GPU_FINISHED: + nvc0_query_get(push, q, 0, 0x1000f010); + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ + nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ + nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ + nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ + nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ + nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ + nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ + nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ + nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */ + nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */ + break; + case NVC0_QUERY_TFB_BUFFER_OFFSET: + /* indexed by TFB buffer instead of by vertex stream */ + nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5)); + break; + default: +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + if (q->type >= NVC0_QUERY_DRV_STAT(0) && + q->type <= NVC0_QUERY_DRV_STAT_LAST) { + q->u.value = nvc0->screen->base.stats.v[q->index] - q->u.value; + return; + } else +#endif + if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) || + (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) { + nvc0_mp_pm_query_end(nvc0, q); + } + break; + } + if (q->is64bit) + nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence); +} + +static INLINE void +nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q) +{ + if (q->is64bit) { + if (nouveau_fence_signalled(q->fence)) + q->state = NVC0_QUERY_STATE_READY; + } else { + if (q->data[0] == q->sequence) + q->state = NVC0_QUERY_STATE_READY; + } +} + +static boolean +nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, union pipe_query_result *result) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_query *q = nvc0_query(pq); + uint64_t *res64 = (uint64_t*)result; + uint32_t *res32 = (uint32_t*)result; + boolean *res8 = (boolean*)result; + uint64_t *data64 = (uint64_t *)q->data; + unsigned i; + +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + if (q->type >= NVC0_QUERY_DRV_STAT(0) && + q->type <= NVC0_QUERY_DRV_STAT_LAST) { + res64[0] = q->u.value; + return TRUE; + } else +#endif + if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) || + (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) { + return nvc0_mp_pm_query_result(nvc0, q, result, wait); + } + + if (q->state != NVC0_QUERY_STATE_READY) + nvc0_query_update(nvc0->screen->base.client, q); + + if (q->state != NVC0_QUERY_STATE_READY) { + if (!wait) { + if (q->state != NVC0_QUERY_STATE_FLUSHED) { + q->state = NVC0_QUERY_STATE_FLUSHED; + /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */ + PUSH_KICK(nvc0->base.pushbuf); + } + return FALSE; + } + if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client)) + return FALSE; + NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1); + } + q->state = NVC0_QUERY_STATE_READY; + + switch (q->type) { + case PIPE_QUERY_GPU_FINISHED: + res8[0] = TRUE; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ + res64[0] = q->data[1] - q->data[5]; + break; + case PIPE_QUERY_OCCLUSION_PREDICATE: + res8[0] = q->data[1] != q->data[5]; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ + case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ + res64[0] = data64[0] - data64[2]; + break; + case PIPE_QUERY_SO_STATISTICS: + res64[0] = data64[0] - data64[4]; + res64[1] = data64[2] - data64[6]; + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + res8[0] = data64[0] != data64[2]; + break; + case PIPE_QUERY_TIMESTAMP: + res64[0] = data64[1]; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + res64[0] = 1000000000; + res8[8] = FALSE; + break; + case PIPE_QUERY_TIME_ELAPSED: + res64[0] = data64[1] - data64[3]; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + for (i = 0; i < 10; ++i) + res64[i] = data64[i * 2] - data64[24 + i * 2]; + break; + case NVC0_QUERY_TFB_BUFFER_OFFSET: + res32[0] = q->data[1]; + break; + default: + assert(0); /* can't happen, we don't create queries with invalid type */ + return FALSE; + } + + return TRUE; +} + +void +nvc0_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq) +{ + struct nvc0_query *q = nvc0_query(pq); + unsigned offset = q->offset; + + if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); + PUSH_DATAh(push, q->bo->offset + offset); + PUSH_DATA (push, q->bo->offset + offset); + PUSH_DATA (push, q->sequence); + PUSH_DATA (push, (1 << 12) | + NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); +} + +static void +nvc0_render_condition(struct pipe_context *pipe, + struct pipe_query *pq, + boolean condition, uint mode) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_query *q; + uint32_t cond; + boolean negated = FALSE; + boolean wait = + mode != PIPE_RENDER_COND_NO_WAIT && + mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; + + nvc0->cond_query = pq; + nvc0->cond_cond = condition; + nvc0->cond_mode = mode; + + if (!pq) { + PUSH_SPACE(push, 1); + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + return; + } + q = nvc0_query(pq); + + /* NOTE: comparison of 2 queries only works if both have completed */ + switch (q->type) { + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + cond = negated ? NVC0_3D_COND_MODE_EQUAL : + NVC0_3D_COND_MODE_NOT_EQUAL; + wait = TRUE; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + if (likely(!negated)) { + if (unlikely(q->nesting)) + cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL : + NVC0_3D_COND_MODE_ALWAYS; + else + cond = NVC0_3D_COND_MODE_RES_NON_ZERO; + } else { + cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS; + } + break; + default: + assert(!"render condition query not a predicate"); + mode = NVC0_3D_COND_MODE_ALWAYS; + break; + } + + if (wait) + nvc0_query_fifo_wait(push, pq); + + PUSH_SPACE(push, 4); + PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3); + PUSH_DATAh(push, q->bo->offset + q->offset); + PUSH_DATA (push, q->bo->offset + q->offset); + PUSH_DATA (push, cond); +} + +void +nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push, + struct pipe_query *pq, unsigned result_offset) +{ + struct nvc0_query *q = nvc0_query(pq); + +#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8)) + + nouveau_pushbuf_space(push, 0, 0, 1); + nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 | + NVC0_IB_ENTRY_1_NO_PREFETCH); +} + +void +nvc0_so_target_save_offset(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg, + unsigned index, boolean *serialize) +{ + struct nvc0_so_target *targ = nvc0_so_target(ptarg); + + if (*serialize) { + *serialize = FALSE; + PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1); + IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0); + + NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1); + } + + nvc0_query(targ->pq)->index = index; + + nvc0_query_end(pipe, targ->pq); +} + + +/* === DRIVER STATISTICS === */ + +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + +static const char *nvc0_drv_stat_names[] = +{ + "drv-tex_obj_current_count", + "drv-tex_obj_current_bytes", + "drv-buf_obj_current_count", + "drv-buf_obj_current_bytes_vid", + "drv-buf_obj_current_bytes_sys", + "drv-tex_transfers_rd", + "drv-tex_transfers_wr", + "drv-tex_copy_count", + "drv-tex_blit_count", + "drv-tex_cache_flush_count", + "drv-buf_transfers_rd", + "drv-buf_transfers_wr", + "drv-buf_read_bytes_staging_vid", + "drv-buf_write_bytes_direct", + "drv-buf_write_bytes_staging_vid", + "drv-buf_write_bytes_staging_sys", + "drv-buf_copy_bytes", + "drv-buf_non_kernel_fence_sync_count", + "drv-any_non_kernel_fence_sync_count", + "drv-query_sync_count", + "drv-gpu_serialize_count", + "drv-draw_calls_array", + "drv-draw_calls_indexed", + "drv-draw_calls_fallback_count", + "drv-user_buffer_upload_bytes", + "drv-constbuf_upload_count", + "drv-constbuf_upload_bytes", + "drv-pushbuf_count", + "drv-resource_validate_count" +}; + +#endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */ + + +/* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */ + +/* Code to read out MP counters: They are accessible via mmio, too, but let's + * just avoid mapping registers in userspace. We'd have to know which MPs are + * enabled/present, too, and that information is not presently exposed. + * We could add a kernel interface for it, but reading the counters like this + * has the advantage of being async (if get_result isn't called immediately). + */ +static const uint64_t nve4_read_mp_pm_counters_code[] = +{ + /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20 + * mov b32 $r8 $tidx + * mov b32 $r12 $physid + * mov b32 $r0 $pm0 + * mov b32 $r1 $pm1 + * mov b32 $r2 $pm2 + * mov b32 $r3 $pm3 + * mov b32 $r4 $pm4 + * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b + * mov b32 $r5 $pm5 + * mov b32 $r6 $pm6 + * mov b32 $r7 $pm7 + * set $p0 0x1 eq u32 $r8 0x0 + * mov b32 $r10 c0[0x0] + * ext u32 $r8 $r12 0x414 + * mov b32 $r11 c0[0x4] + * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04 + * ext u32 $r9 $r12 0x208 + * (not $p0) exit + * set $p1 0x1 eq u32 $r9 0x0 + * mul $r8 u32 $r8 u32 96 + * mul $r12 u32 $r9 u32 16 + * mul $r13 u32 $r9 u32 4 + * add b32 $r9 $r8 $r13 + * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c + * add b32 $r8 $r8 $r12 + * mov b32 $r12 $r10 + * add b32 $r10 $c $r10 $r8 + * mov b32 $r13 $r11 + * add b32 $r11 $r11 0x0 $c + * add b32 $r12 $c $r12 $r9 + * st b128 wt g[$r10d] $r0q + * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00 + * mov b32 $r0 c0[0x8] + * add b32 $r13 $r13 0x0 $c + * $p1 st b128 wt g[$r12d+0x40] $r4q + * st b32 wt g[$r12d+0x50] $r0 + * exit */ + 0x2202020202020207ULL, + 0x2c00000084021c04ULL, + 0x2c0000000c031c04ULL, + 0x2c00000010001c04ULL, + 0x2c00000014005c04ULL, + 0x2c00000018009c04ULL, + 0x2c0000001c00dc04ULL, + 0x2c00000020011c04ULL, + 0x22b0420042320207ULL, + 0x2c00000024015c04ULL, + 0x2c00000028019c04ULL, + 0x2c0000002c01dc04ULL, + 0x190e0000fc81dc03ULL, + 0x2800400000029de4ULL, + 0x7000c01050c21c03ULL, + 0x280040001002dde4ULL, + 0x204282020042e047ULL, + 0x7000c00820c25c03ULL, + 0x80000000000021e7ULL, + 0x190e0000fc93dc03ULL, + 0x1000000180821c02ULL, + 0x1000000040931c02ULL, + 0x1000000010935c02ULL, + 0x4800000034825c03ULL, + 0x22c042c042c04287ULL, + 0x4800000030821c03ULL, + 0x2800000028031de4ULL, + 0x4801000020a29c03ULL, + 0x280000002c035de4ULL, + 0x0800000000b2dc42ULL, + 0x4801000024c31c03ULL, + 0x9400000000a01fc5ULL, + 0x200002e04202c047ULL, + 0x2800400020001de4ULL, + 0x0800000000d35c42ULL, + 0x9400000100c107c5ULL, + 0x9400000140c01f85ULL, + 0x8000000000001de7ULL +}; + +/* NOTE: intentionally using the same names as NV */ +static const char *nve4_pm_query_names[] = +{ + /* MP counters */ + "prof_trigger_00", + "prof_trigger_01", + "prof_trigger_02", + "prof_trigger_03", + "prof_trigger_04", + "prof_trigger_05", + "prof_trigger_06", + "prof_trigger_07", + "warps_launched", + "threads_launched", + "sm_cta_launched", + "inst_issued1", + "inst_issued2", + "inst_executed", + "local_load", + "local_store", + "shared_load", + "shared_store", + "l1_local_load_hit", + "l1_local_load_miss", + "l1_local_store_hit", + "l1_local_store_miss", + "gld_request", + "gst_request", + "l1_global_load_hit", + "l1_global_load_miss", + "uncached_global_load_transaction", + "global_store_transaction", + "branch", + "divergent_branch", + "active_warps", + "active_cycles", + "inst_issued", + "atom_count", + "gred_count", + "shared_load_replay", + "shared_store_replay", + "local_load_transactions", + "local_store_transactions", + "l1_shared_load_transactions", + "l1_shared_store_transactions", + "global_ld_mem_divergence_replays", + "global_st_mem_divergence_replays", + /* metrics, i.e. functions of the MP counters */ + "metric-ipc", /* inst_executed, clock */ + "metric-ipac", /* inst_executed, active_cycles */ + "metric-ipec", /* inst_executed, (bool)inst_executed */ + "metric-achieved_occupancy", /* active_warps, active_cycles */ + "metric-sm_efficiency", /* active_cycles, clock */ + "metric-inst_replay_overhead" /* inst_issued, inst_executed */ +}; + +/* For simplicity, we will allocate as many group slots as we allocate counter + * slots. This means that a single counter which wants to source from 2 groups + * will have to be declared as using 2 counter slots. This shouldn't really be + * a problem because such queries don't make much sense ... (unless someone is + * really creative). + */ +struct nvc0_mp_counter_cfg +{ + uint32_t func : 16; /* mask or 4-bit logic op (depending on mode) */ + uint32_t mode : 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */ + uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */ + uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */ + uint32_t sig_sel : 8; /* signal group */ + uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */ +}; + +#define NVC0_COUNTER_OPn_SUM 0 +#define NVC0_COUNTER_OPn_OR 1 +#define NVC0_COUNTER_OPn_AND 2 +#define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */ +#define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */ +#define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */ +#define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */ + +struct nvc0_mp_pm_query_cfg +{ + struct nvc0_mp_counter_cfg ctr[4]; + uint8_t num_counters; + uint8_t op; + uint8_t norm[2]; /* normalization num,denom */ +}; + +#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } } +#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } } +#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \ + { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \ + { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \ + {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } } +#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \ + { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \ + { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \ + {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } } +#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \ + { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \ + { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \ + {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } } + +/* NOTES: + * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps + * inst_executed etc.: we only count a single warp scheduler + * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers; + * this is inaccurate ! + */ +static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] = +{ + _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1), + _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1), + _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1), + _Q1A(PROF_TRIGGER_3, 0x0001, B6, USER, 0x0000000c, 1, 1), + _Q1A(PROF_TRIGGER_4, 0x0001, B6, USER, 0x00000010, 1, 1), + _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1), + _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1), + _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1), + _Q1A(LAUNCHED_WARPS, 0x0001, B6, LAUNCH, 0x00000004, 1, 1), + _Q1A(LAUNCHED_THREADS, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1), + _Q1B(LAUNCHED_CTA, 0x0001, B6, WARP, 0x0000001c, 1, 1), + _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1), + _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1), + _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1), + _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1), + _Q1A(LD_SHARED, 0x0001, B6, LDST, 0x00000000, 1, 1), + _Q1A(ST_SHARED, 0x0001, B6, LDST, 0x00000004, 1, 1), + _Q1A(LD_LOCAL, 0x0001, B6, LDST, 0x00000008, 1, 1), + _Q1A(ST_LOCAL, 0x0001, B6, LDST, 0x0000000c, 1, 1), + _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1), + _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1), + _Q1B(L1_LOCAL_LOAD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1), + _Q1B(L1_LOCAL_LOAD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1), + _Q1B(L1_LOCAL_STORE_HIT, 0x0001, B6, L1, 0x00000008, 1, 1), + _Q1B(L1_LOCAL_STORE_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1), + _Q1B(L1_GLOBAL_LOAD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1), + _Q1B(L1_GLOBAL_LOAD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1), + _Q1B(GLD_TRANSACTIONS_UNCACHED, 0x0001, B6, MEM, 0x00000000, 1, 1), + _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1), + _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1), + _Q1A(BRANCH_DIVERGENT, 0x0001, B6, BRANCH, 0x00000010, 1, 1), + _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1), + _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1), + _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1), + _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1), + _Q1B(LD_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1), + _Q1B(ST_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1), + _Q1B(LD_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1), + _Q1B(ST_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1), + _Q1B(L1_LD_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1), + _Q1B(L1_ST_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1), + _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1), + _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1), + _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1), + _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1), + _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1), + _M2A(INST_REPLAY_OHEAD, 0x3, B6, ISSUE, 0x104, 0x3, B6, EXEC, 0x398, REL_SUM_MM, 100, 1), + _M2B(MP_OCCUPANCY, 0x3f, B6, WARP, 0x31483104, 0x01, B6, WARP, 0x0, AVG_DIV_MM, 200, 64), + _M2B(MP_EFFICIENCY, 0x01, B6, WARP, 0x0, 0xffff, LOGOP, WARP, 0x0, AVG_DIV_M0, 100, 1), +}; + +#undef _Q1A +#undef _Q1B +#undef _M2A +#undef _M2B + +/* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */ +static const uint64_t nvc0_read_mp_pm_counters_code[] = +{ + /* mov b32 $r8 $tidx + * mov b32 $r9 $physid + * mov b32 $r0 $pm0 + * mov b32 $r1 $pm1 + * mov b32 $r2 $pm2 + * mov b32 $r3 $pm3 + * mov b32 $r4 $pm4 + * mov b32 $r5 $pm5 + * mov b32 $r6 $pm6 + * mov b32 $r7 $pm7 + * set $p0 0x1 eq u32 $r8 0x0 + * mov b32 $r10 c0[0x0] + * mov b32 $r11 c0[0x4] + * ext u32 $r8 $r9 0x414 + * (not $p0) exit + * mul $r8 u32 $r8 u32 36 + * add b32 $r10 $c $r10 $r8 + * add b32 $r11 $r11 0x0 $c + * mov b32 $r8 c0[0x8] + * st b128 wt g[$r10d+0x00] $r0q + * st b128 wt g[$r10d+0x10] $r4q + * st b32 wt g[$r10d+0x20] $r8 + * exit */ + 0x2c00000084021c04ULL, + 0x2c0000000c025c04ULL, + 0x2c00000010001c04ULL, + 0x2c00000014005c04ULL, + 0x2c00000018009c04ULL, + 0x2c0000001c00dc04ULL, + 0x2c00000020011c04ULL, + 0x2c00000024015c04ULL, + 0x2c00000028019c04ULL, + 0x2c0000002c01dc04ULL, + 0x190e0000fc81dc03ULL, + 0x2800400000029de4ULL, + 0x280040001002dde4ULL, + 0x7000c01050921c03ULL, + 0x80000000000021e7ULL, + 0x1000000090821c02ULL, + 0x4801000020a29c03ULL, + 0x0800000000b2dc42ULL, + 0x2800400020021de4ULL, + 0x9400000000a01fc5ULL, + 0x9400000040a11fc5ULL, + 0x9400000080a21f85ULL, + 0x8000000000001de7ULL +}; + +static const char *nvc0_pm_query_names[] = +{ + /* MP counters */ + "inst_executed", + "branch", + "divergent_branch", + "active_warps", + "active_cycles", + "warps_launched", + "threads_launched", + "shared_load", + "shared_store", + "local_load", + "local_store", + "gred_count", + "atom_count", + "gld_request", + "gst_request", + "inst_issued1_0", + "inst_issued1_1", + "inst_issued2_0", + "inst_issued2_1", + "thread_inst_executed_0", + "thread_inst_executed_1", + "thread_inst_executed_2", + "thread_inst_executed_3", + "prof_trigger_00", + "prof_trigger_01", + "prof_trigger_02", + "prof_trigger_03", + "prof_trigger_04", + "prof_trigger_05", + "prof_trigger_06", + "prof_trigger_07", +}; + +#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } } + +static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] = +{ + _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00), + _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00), + _Q(BRANCH_DIVERGENT, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00), + _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65), + _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(LAUNCHED_WARPS, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(LAUNCHED_THREADS, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65), + _Q(LD_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(ST_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(LD_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(ST_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), + _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), + _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), + _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), + _Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_3, 0xaaaa, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_4, 0xaaaa, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00), +}; + +#undef _Q + +static const struct nvc0_mp_pm_query_cfg * +nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q) +{ + struct nvc0_screen *screen = nvc0->screen; + + if (screen->base.class_3d >= NVE4_3D_CLASS) + return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC]; + return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)]; +} + +void +nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q) +{ + struct nvc0_screen *screen = nvc0->screen; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const struct nvc0_mp_pm_query_cfg *cfg; + unsigned i, c; + unsigned num_ab[2] = { 0, 0 }; + + cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + + /* check if we have enough free counter slots */ + for (i = 0; i < cfg->num_counters; ++i) + num_ab[cfg->ctr[i].sig_dom]++; + + if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 || + screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) { + NOUVEAU_ERR("Not enough free MP counter slots !\n"); + return; + } + + assert(cfg->num_counters <= 4); + PUSH_SPACE(push, 4 * 8 + 6); + + if (!screen->pm.mp_counters_enabled) { + screen->pm.mp_counters_enabled = TRUE; + BEGIN_NVC0(push, SUBC_SW(0x06ac), 1); + PUSH_DATA (push, 0x1fcb); + } + + /* set sequence field to 0 (used to check if result is available) */ + for (i = 0; i < screen->mp_count; ++i) + q->data[i * 10 + 10] = 0; + + for (i = 0; i < cfg->num_counters; ++i) { + const unsigned d = cfg->ctr[i].sig_dom; + + if (!screen->pm.num_mp_pm_active[d]) { + uint32_t m = (1 << 22) | (1 << (7 + (8 * !d))); + if (screen->pm.num_mp_pm_active[!d]) + m |= 1 << (7 + (8 * d)); + BEGIN_NVC0(push, SUBC_SW(0x0600), 1); + PUSH_DATA (push, m); + } + screen->pm.num_mp_pm_active[d]++; + + for (c = d * 4; c < (d * 4 + 4); ++c) { + if (!screen->pm.mp_counter[c]) { + q->ctr[i] = c; + screen->pm.mp_counter[c] = (struct pipe_query *)q; + break; + } + } + assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */ + + /* configure and reset the counter(s) */ + if (screen->base.class_3d >= NVE4_3D_CLASS) { + if (d == 0) + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1); + else + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1); + PUSH_DATA (push, cfg->ctr[i].sig_sel); + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1); + PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3)); + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1); + PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1); + PUSH_DATA (push, 0); + } else { + unsigned s; + + for (s = 0; s < cfg->ctr[i].num_src; s++) { + BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(s)), 1); + PUSH_DATA (push, cfg->ctr[i].sig_sel); + BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(s)), 1); + PUSH_DATA (push, (cfg->ctr[i].src_sel >> (s * 8)) & 0xff); + BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(s)), 1); + PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); + BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(s)), 1); + PUSH_DATA (push, 0); + } + } + } +} + +static void +nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) +{ + struct nvc0_screen *screen = nvc0->screen; + struct pipe_context *pipe = &nvc0->base.pipe; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS; + uint32_t mask; + uint32_t input[3]; + const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 }; + const uint grid[3] = { screen->mp_count, 1, 1 }; + unsigned c; + const struct nvc0_mp_pm_query_cfg *cfg; + + cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + + if (unlikely(!screen->pm.prog)) { + struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program); + prog->type = PIPE_SHADER_COMPUTE; + prog->translated = TRUE; + prog->num_gprs = 14; + prog->parm_size = 12; + if (is_nve4) { + prog->code = (uint32_t *)nve4_read_mp_pm_counters_code; + prog->code_size = sizeof(nve4_read_mp_pm_counters_code); + } else { + prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code; + prog->code_size = sizeof(nvc0_read_mp_pm_counters_code); + } + screen->pm.prog = prog; + } + + /* disable all counting */ + PUSH_SPACE(push, 8); + for (c = 0; c < 8; ++c) + if (screen->pm.mp_counter[c]) { + if (is_nve4) { + IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0); + } else { + IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0); + } + } + /* release counters for this query */ + for (c = 0; c < 8; ++c) { + if (nvc0_query(screen->pm.mp_counter[c]) == q) { + screen->pm.num_mp_pm_active[c / 4]--; + screen->pm.mp_counter[c] = NULL; + } + } + + BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR, + q->bo); + + PUSH_SPACE(push, 1); + IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0); + + pipe->bind_compute_state(pipe, screen->pm.prog); + input[0] = (q->bo->offset + q->base); + input[1] = (q->bo->offset + q->base) >> 32; + input[2] = q->sequence; + pipe->launch_grid(pipe, block, grid, 0, input); + + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY); + + /* re-activate other counters */ + PUSH_SPACE(push, 16); + mask = 0; + for (c = 0; c < 8; ++c) { + unsigned i; + q = nvc0_query(screen->pm.mp_counter[c]); + if (!q) + continue; + cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + for (i = 0; i < cfg->num_counters; ++i) { + if (mask & (1 << q->ctr[i])) + break; + mask |= 1 << q->ctr[i]; + if (is_nve4) { + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1); + } else { + BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(q->ctr[i])), 1); + } + PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); + } + } +} + +static INLINE boolean +nvc0_mp_pm_query_read_data(uint32_t count[32][4], + struct nvc0_context *nvc0, boolean wait, + struct nvc0_query *q, + const struct nvc0_mp_pm_query_cfg *cfg, + unsigned mp_count) +{ + unsigned p, c; + + for (p = 0; p < mp_count; ++p) { + const unsigned b = (0x24 / 4) * p; + + for (c = 0; c < cfg->num_counters; ++c) { + if (q->data[b + 8] != q->sequence) { + if (!wait) + return FALSE; + if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client)) + return FALSE; + } + count[p][c] = q->data[b + q->ctr[c]]; + } + } + return TRUE; +} + +static INLINE boolean +nve4_mp_pm_query_read_data(uint32_t count[32][4], + struct nvc0_context *nvc0, boolean wait, + struct nvc0_query *q, + const struct nvc0_mp_pm_query_cfg *cfg, + unsigned mp_count) +{ + unsigned p, c, d; + + for (p = 0; p < mp_count; ++p) { + const unsigned b = (0x60 / 4) * p; + + for (c = 0; c < cfg->num_counters; ++c) { + count[p][c] = 0; + for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) { + if (q->data[b + 20 + d] != q->sequence) { + if (!wait) + return FALSE; + if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client)) + return FALSE; + } + if (q->ctr[c] & ~0x3) + count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)]; + else + count[p][c] += q->data[b + d * 4 + q->ctr[c]]; + } + } + } + return TRUE; +} + +/* Metric calculations: + * sum(x) ... sum of x over all MPs + * avg(x) ... average of x over all MPs + * + * IPC : sum(inst_executed) / clock + * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued) + * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles) + * MP_EFFICIENCY : avg(active_cycles / clock) + * + * NOTE: Interpretation of IPC requires knowledge of MP count. + */ +static boolean +nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q, + void *result, boolean wait) +{ + uint32_t count[32][4]; + uint64_t value = 0; + unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32); + unsigned p, c; + const struct nvc0_mp_pm_query_cfg *cfg; + boolean ret; + + cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) + ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count); + else + ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count); + if (!ret) + return FALSE; + + if (cfg->op == NVC0_COUNTER_OPn_SUM) { + for (c = 0; c < cfg->num_counters; ++c) + for (p = 0; p < mp_count; ++p) + value += count[p][c]; + value = (value * cfg->norm[0]) / cfg->norm[1]; + } else + if (cfg->op == NVC0_COUNTER_OPn_OR) { + uint32_t v = 0; + for (c = 0; c < cfg->num_counters; ++c) + for (p = 0; p < mp_count; ++p) + v |= count[p][c]; + value = (v * cfg->norm[0]) / cfg->norm[1]; + } else + if (cfg->op == NVC0_COUNTER_OPn_AND) { + uint32_t v = ~0; + for (c = 0; c < cfg->num_counters; ++c) + for (p = 0; p < mp_count; ++p) + v &= count[p][c]; + value = (v * cfg->norm[0]) / cfg->norm[1]; + } else + if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) { + uint64_t v[2] = { 0, 0 }; + for (p = 0; p < mp_count; ++p) { + v[0] += count[p][0]; + v[1] += count[p][1]; + } + if (v[0]) + value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]); + } else + if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) { + for (p = 0; p < mp_count; ++p) + value += count[p][0]; + if (count[0][1]) + value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]); + else + value = 0; + } else + if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) { + unsigned mp_used = 0; + for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0]) + if (count[p][1]) + value += (count[p][0] * cfg->norm[0]) / count[p][1]; + if (mp_used) + value /= mp_used * cfg->norm[1]; + } else + if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) { + unsigned mp_used = 0; + for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0]) + value += count[p][0]; + if (count[0][1] && mp_used) { + value *= cfg->norm[0]; + value /= count[0][1] * mp_used * cfg->norm[1]; + } else { + value = 0; + } + } + + *(uint64_t *)result = value; + return TRUE; +} + +int +nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen, + unsigned id, + struct pipe_driver_query_info *info) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + int count = 0; + + count += NVC0_QUERY_DRV_STAT_COUNT; + + if (screen->base.device->drm_version >= 0x01000101) { + if (screen->base.class_3d >= NVE4_3D_CLASS) { + count += NVE4_PM_QUERY_COUNT; + } else + if (screen->compute) { + count += NVC0_PM_QUERY_COUNT; /* NVC0_COMPUTE is not always enabled */ + } + } + + if (!info) + return count; + +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + if (id < NVC0_QUERY_DRV_STAT_COUNT) { + info->name = nvc0_drv_stat_names[id]; + info->query_type = NVC0_QUERY_DRV_STAT(id); + info->max_value = ~0ULL; + info->uses_byte_units = !!strstr(info->name, "bytes"); + return 1; + } else +#endif + if (id < count) { + if (screen->base.class_3d >= NVE4_3D_CLASS) { + info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT]; + info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); + info->max_value = (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? + ~0ULL : 100; + info->uses_byte_units = FALSE; + return 1; + } else + if (screen->compute) { + info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT]; + info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); + info->max_value = ~0ULL; + info->uses_byte_units = FALSE; + return 1; + } + } + /* user asked for info about non-existing query */ + info->name = "this_is_not_the_query_you_are_looking_for"; + info->query_type = 0xdeadd01d; + info->max_value = 0; + info->uses_byte_units = FALSE; + return 0; +} + +void +nvc0_init_query_functions(struct nvc0_context *nvc0) +{ + struct pipe_context *pipe = &nvc0->base.pipe; + + pipe->create_query = nvc0_query_create; + pipe->destroy_query = nvc0_query_destroy; + pipe->begin_query = nvc0_query_begin; + pipe->end_query = nvc0_query_end; + pipe->get_query_result = nvc0_query_result; + pipe->render_condition = nvc0_render_condition; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c new file mode 100644 index 00000000000..4e70903b538 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c @@ -0,0 +1,62 @@ + +#include "pipe/p_context.h" +#include "nvc0/nvc0_resource.h" +#include "nouveau_screen.h" + + +static struct pipe_resource * +nvc0_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + switch (templ->target) { + case PIPE_BUFFER: + return nouveau_buffer_create(screen, templ); + default: + return nvc0_miptree_create(screen, templ); + } +} + +static struct pipe_resource * +nvc0_resource_from_handle(struct pipe_screen * screen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + if (templ->target == PIPE_BUFFER) { + return NULL; + } else { + struct pipe_resource *res = nv50_miptree_from_handle(screen, + templ, whandle); + nv04_resource(res)->vtbl = &nvc0_miptree_vtbl; + return res; + } +} + +static struct pipe_surface * +nvc0_surface_create(struct pipe_context *pipe, + struct pipe_resource *pres, + const struct pipe_surface *templ) +{ + if (unlikely(pres->target == PIPE_BUFFER)) + return nv50_surface_from_buffer(pipe, pres, templ); + return nvc0_miptree_surface_new(pipe, pres, templ); +} + +void +nvc0_init_resource_functions(struct pipe_context *pcontext) +{ + pcontext->transfer_map = u_transfer_map_vtbl; + pcontext->transfer_flush_region = u_transfer_flush_region_vtbl; + pcontext->transfer_unmap = u_transfer_unmap_vtbl; + pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; + pcontext->create_surface = nvc0_surface_create; + pcontext->surface_destroy = nv50_surface_destroy; +} + +void +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen) +{ + pscreen->resource_create = nvc0_resource_create; + pscreen->resource_from_handle = nvc0_resource_from_handle; + pscreen->resource_get_handle = u_resource_get_handle_vtbl; + pscreen->resource_destroy = u_resource_destroy_vtbl; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h new file mode 100644 index 00000000000..0d5f026d6e1 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h @@ -0,0 +1,58 @@ + +#ifndef __NVC0_RESOURCE_H__ +#define __NVC0_RESOURCE_H__ + +#include "nv50/nv50_resource.h" + +#define NVC0_RESOURCE_FLAG_VIDEO (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 0) + + +#define NVC0_TILE_SHIFT_X(m) ((((m) >> 0) & 0xf) + 6) +#define NVC0_TILE_SHIFT_Y(m) ((((m) >> 4) & 0xf) + 3) +#define NVC0_TILE_SHIFT_Z(m) ((((m) >> 8) & 0xf) + 0) + +#define NVC0_TILE_SIZE_X(m) (64 << (((m) >> 0) & 0xf)) +#define NVC0_TILE_SIZE_Y(m) ( 8 << (((m) >> 4) & 0xf)) +#define NVC0_TILE_SIZE_Z(m) ( 1 << (((m) >> 8) & 0xf)) + +/* it's ok to mask only in the end because max value is 3 * 5 */ + +#define NVC0_TILE_SIZE_2D(m) ((64 * 8) << (((m) + ((m) >> 4)) & 0xf)) + +#define NVC0_TILE_SIZE(m) ((64 * 8) << (((m) + ((m) >> 4) + ((m) >> 8)) & 0xf)) + + +void +nvc0_init_resource_functions(struct pipe_context *pcontext); + +void +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen); + +/* Internal functions: + */ +struct pipe_resource * +nvc0_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmp); + +const struct u_resource_vtbl nvc0_miptree_vtbl; + +struct pipe_surface * +nvc0_miptree_surface_new(struct pipe_context *, + struct pipe_resource *, + const struct pipe_surface *templ); + +unsigned +nvc0_mt_zslice_offset(const struct nv50_miptree *, unsigned l, unsigned z); + +void * +nvc0_miptree_transfer_map(struct pipe_context *pctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer); +void +nvc0_miptree_transfer_unmap(struct pipe_context *pcontext, + struct pipe_transfer *ptx); + +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c new file mode 100644 index 00000000000..ff7890bbac4 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -0,0 +1,967 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_format_s3tc.h" +#include "pipe/p_screen.h" + +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" + +#include "nouveau_vp3_video.h" + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_screen.h" + +#include "nvc0/nvc0_graph_macros.h" + +#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS +# define NOUVEAU_GETPARAM_GRAPH_UNITS 13 +#endif + +static boolean +nvc0_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bindings) +{ + if (sample_count > 8) + return FALSE; + if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */ + return FALSE; + + if (!util_format_is_supported(format, bindings)) + return FALSE; + + if ((bindings & PIPE_BIND_SAMPLER_VIEW) && (target != PIPE_BUFFER)) + if (util_format_get_blocksizebits(format) == 3 * 32) + return FALSE; + + /* transfers & shared are always supported */ + bindings &= ~(PIPE_BIND_TRANSFER_READ | + PIPE_BIND_TRANSFER_WRITE | + PIPE_BIND_SHARED); + + return (nvc0_format_table[format].usage & bindings) == bindings; +} + +static int +nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; + + switch (param) { + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 16 * 5; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 15; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return (class_3d >= NVE4_3D_CLASS) ? 13 : 12; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 2048; + case PIPE_CAP_MIN_TEXEL_OFFSET: + return -8; + case PIPE_CAP_MAX_TEXEL_OFFSET: + return 7; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_CUBE_MAP_ARRAY: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return 1; + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return 65536; + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_TGSI_TEXCOORD: + return 1; + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 150; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + return 1; + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + return 1; + case PIPE_CAP_QUERY_TIMESTAMP: + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + return 1; + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return 4; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 128; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 0; + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_START_INSTANCE: + return 1; + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + return 0; /* state trackers will know better */ + case PIPE_CAP_USER_CONSTANT_BUFFERS: + case PIPE_CAP_USER_INDEX_BUFFERS: + case PIPE_CAP_USER_VERTEX_BUFFERS: + return 1; + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 256; + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + return 1; /* 256 for binding as RT, but that's not possible in GL */ + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return NOUVEAU_MIN_BUFFER_MAP_ALIGN; + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + return 0; + case PIPE_CAP_COMPUTE: + return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 1; + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50; + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static int +nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, + enum pipe_shader_cap param) +{ + const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; + + switch (shader) { + case PIPE_SHADER_VERTEX: + /* + case PIPE_SHADER_TESSELLATION_CONTROL: + case PIPE_SHADER_TESSELLATION_EVALUATION: + */ + case PIPE_SHADER_GEOMETRY: + case PIPE_SHADER_FRAGMENT: + break; + case PIPE_SHADER_COMPUTE: + if (class_3d < NVE4_3D_CLASS) + return 0; + break; + default: + return 0; + } + + switch (param) { + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 16; + case PIPE_SHADER_CAP_MAX_INPUTS: + if (shader == PIPE_SHADER_VERTEX) + return 32; + /* NOTE: These only count our slots for GENERIC varyings. + * The address space may be larger, but the actual hard limit seems to be + * less than what the address space layout permits, so don't add TEXCOORD, + * COLOR, etc. here. + */ + if (shader == PIPE_SHADER_FRAGMENT) + return 0x1f0 / 16; + /* Actually this counts CLIPVERTEX, which occupies the last generic slot, + * and excludes 0x60 per-patch inputs. + */ + return 0x200 / 16; + case PIPE_SHADER_CAP_MAX_CONSTS: + return 65536 / 16; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + if (shader == PIPE_SHADER_COMPUTE && class_3d >= NVE4_3D_CLASS) + return NVE4_MAX_PIPE_CONSTBUFS_COMPUTE; + return NVC0_MAX_PIPE_CONSTBUFS; + case PIPE_SHADER_CAP_MAX_ADDRS: + return 1; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + return shader != PIPE_SHADER_FRAGMENT; + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; + case PIPE_SHADER_CAP_MAX_TEMPS: + return NVC0_CAP_MAX_PROGRAM_TEMPS; + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 1; + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + return 0; + case PIPE_SHADER_CAP_SUBROUTINES: + return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 1; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + return 16; /* would be 32 in linked (OpenGL-style) mode */ + /* + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLER_VIEWS: + return 32; + */ + default: + NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); + return 0; + } +} + +static float +nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 10.0f; + case PIPE_CAPF_MAX_POINT_WIDTH: + return 63.0f; + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 63.375f; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 15.0f; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0.0f; + } +} + +static int +nvc0_screen_get_compute_param(struct pipe_screen *pscreen, + enum pipe_compute_cap param, void *data) +{ + uint64_t *data64 = (uint64_t *)data; + const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass; + + switch (param) { + case PIPE_COMPUTE_CAP_GRID_DIMENSION: + data64[0] = 3; + return 8; + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: + data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fffffff : 65535; + data64[1] = 65535; + data64[2] = 65535; + return 24; + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: + data64[0] = 1024; + data64[1] = 1024; + data64[2] = 64; + return 24; + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: + data64[0] = 1024; + return 8; + case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */ + data64[0] = (uint64_t)1 << 40; + return 8; + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */ + data64[0] = 48 << 10; + return 8; + case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */ + data64[0] = 512 << 10; + return 8; + case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ + data64[0] = 4096; + return 8; + default: + return 0; + } +} + +static void +nvc0_screen_destroy(struct pipe_screen *pscreen) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + + if (screen->base.fence.current) { + nouveau_fence_wait(screen->base.fence.current); + nouveau_fence_ref(NULL, &screen->base.fence.current); + } + if (screen->base.pushbuf) + screen->base.pushbuf->user_priv = NULL; + + if (screen->blitter) + nvc0_blitter_destroy(screen); + if (screen->pm.prog) { + screen->pm.prog->code = NULL; /* hardcoded, don't FREE */ + nvc0_program_destroy(NULL, screen->pm.prog); + } + + nouveau_bo_ref(NULL, &screen->text); + nouveau_bo_ref(NULL, &screen->uniform_bo); + nouveau_bo_ref(NULL, &screen->tls); + nouveau_bo_ref(NULL, &screen->txc); + nouveau_bo_ref(NULL, &screen->fence.bo); + nouveau_bo_ref(NULL, &screen->poly_cache); + nouveau_bo_ref(NULL, &screen->parm); + + nouveau_heap_destroy(&screen->lib_code); + nouveau_heap_destroy(&screen->text_heap); + + FREE(screen->tic.entries); + + nouveau_mm_destroy(screen->mm_VRAM_fe0); + + nouveau_object_del(&screen->eng3d); + nouveau_object_del(&screen->eng2d); + nouveau_object_del(&screen->m2mf); + nouveau_object_del(&screen->compute); + + nouveau_screen_fini(&screen->base); + + FREE(screen); +} + +static int +nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, + unsigned size, const uint32_t *data) +{ + struct nouveau_pushbuf *push = screen->base.pushbuf; + + size /= 4; + + BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2); + PUSH_DATA (push, (m - 0x3800) / 8); + PUSH_DATA (push, pos); + BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1); + PUSH_DATA (push, pos); + PUSH_DATAp(push, data, size); + + return pos + size; +} + +static void +nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) +{ + BEGIN_NVC0(push, SUBC_3D(0x10cc), 1); + PUSH_DATA (push, 0xff); + BEGIN_NVC0(push, SUBC_3D(0x10e0), 2); + PUSH_DATA (push, 0xff); + PUSH_DATA (push, 0xff); + BEGIN_NVC0(push, SUBC_3D(0x10ec), 2); + PUSH_DATA (push, 0xff); + PUSH_DATA (push, 0xff); + BEGIN_NVC0(push, SUBC_3D(0x074c), 1); + PUSH_DATA (push, 0x3f); + + BEGIN_NVC0(push, SUBC_3D(0x16a8), 1); + PUSH_DATA (push, (3 << 16) | 3); + BEGIN_NVC0(push, SUBC_3D(0x1794), 1); + PUSH_DATA (push, (2 << 16) | 2); + BEGIN_NVC0(push, SUBC_3D(0x0de8), 1); + PUSH_DATA (push, 1); + + BEGIN_NVC0(push, SUBC_3D(0x12ac), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_3D(0x0218), 1); + PUSH_DATA (push, 0x10); + BEGIN_NVC0(push, SUBC_3D(0x10fc), 1); + PUSH_DATA (push, 0x10); + BEGIN_NVC0(push, SUBC_3D(0x1290), 1); + PUSH_DATA (push, 0x10); + BEGIN_NVC0(push, SUBC_3D(0x12d8), 2); + PUSH_DATA (push, 0x10); + PUSH_DATA (push, 0x10); + BEGIN_NVC0(push, SUBC_3D(0x1140), 1); + PUSH_DATA (push, 0x10); + BEGIN_NVC0(push, SUBC_3D(0x1610), 1); + PUSH_DATA (push, 0xe); + + BEGIN_NVC0(push, SUBC_3D(0x164c), 1); + PUSH_DATA (push, 1 << 12); + BEGIN_NVC0(push, SUBC_3D(0x030c), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_3D(0x0300), 1); + PUSH_DATA (push, 3); + + BEGIN_NVC0(push, SUBC_3D(0x02d0), 1); + PUSH_DATA (push, 0x3fffff); + BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, SUBC_3D(0x19c0), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, SUBC_3D(0x075c), 1); + PUSH_DATA (push, 3); + + if (obj_class >= NVE4_3D_CLASS) { + BEGIN_NVC0(push, SUBC_3D(0x07fc), 1); + PUSH_DATA (push, 1); + } + + /* TODO: find out what software methods 0x1528, 0x1280 and (on nve4) 0x02dc + * are supposed to do */ +} + +static void +nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + struct nouveau_pushbuf *push = screen->base.pushbuf; + + /* we need to do it after possible flush in MARK_RING */ + *sequence = ++screen->base.fence.sequence; + + BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4); + PUSH_DATAh(push, screen->fence.bo->offset); + PUSH_DATA (push, screen->fence.bo->offset); + PUSH_DATA (push, *sequence); + PUSH_DATA (push, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT | + (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT)); +} + +static u32 +nvc0_screen_fence_update(struct pipe_screen *pscreen) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + return screen->fence.map[0]; +} + +static int +nvc0_screen_init_compute(struct nvc0_screen *screen) +{ + screen->base.base.get_compute_param = nvc0_screen_get_compute_param; + + switch (screen->base.device->chipset & 0xf0) { + case 0xc0: + case 0xd0: + /* Using COMPUTE has weird effects on 3D state, we need to + * investigate this further before enabling it by default. + */ + if (debug_get_bool_option("NVC0_COMPUTE", FALSE)) + return nvc0_screen_compute_setup(screen, screen->base.pushbuf); + return 0; + case 0xe0: + case 0xf0: + return nve4_screen_compute_setup(screen, screen->base.pushbuf); + default: + return -1; + } +} + +boolean +nvc0_screen_resize_tls_area(struct nvc0_screen *screen, + uint32_t lpos, uint32_t lneg, uint32_t cstack) +{ + struct nouveau_bo *bo = NULL; + int ret; + uint64_t size = (lpos + lneg) * 32 + cstack; + + if (size >= (1 << 20)) { + NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size); + return FALSE; + } + + size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */ + size = align(size, 0x8000); + size *= screen->mp_count; + + size = align(size, 1 << 17); + + ret = nouveau_bo_new(screen->base.device, NOUVEAU_BO_VRAM, 1 << 17, size, + NULL, &bo); + if (ret) { + NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size); + return FALSE; + } + nouveau_bo_ref(NULL, &screen->tls); + screen->tls = bo; + return TRUE; +} + +#define FAIL_SCREEN_INIT(str, err) \ + do { \ + NOUVEAU_ERR(str, err); \ + nvc0_screen_destroy(pscreen); \ + return NULL; \ + } while(0) + +struct pipe_screen * +nvc0_screen_create(struct nouveau_device *dev) +{ + struct nvc0_screen *screen; + struct pipe_screen *pscreen; + struct nouveau_object *chan; + struct nouveau_pushbuf *push; + uint64_t value; + uint32_t obj_class; + int ret; + unsigned i; + union nouveau_bo_config mm_config; + + switch (dev->chipset & ~0xf) { + case 0xc0: + case 0xd0: + case 0xe0: + case 0xf0: + break; + default: + return NULL; + } + + screen = CALLOC_STRUCT(nvc0_screen); + if (!screen) + return NULL; + pscreen = &screen->base.base; + + ret = nouveau_screen_init(&screen->base, dev); + if (ret) { + nvc0_screen_destroy(pscreen); + return NULL; + } + chan = screen->base.channel; + push = screen->base.pushbuf; + push->user_priv = screen; + push->rsvd_kick = 5; + + screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER | + PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER; + screen->base.sysmem_bindings |= + PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER; + + pscreen->destroy = nvc0_screen_destroy; + pscreen->context_create = nvc0_create; + pscreen->is_format_supported = nvc0_screen_is_format_supported; + pscreen->get_param = nvc0_screen_get_param; + pscreen->get_shader_param = nvc0_screen_get_shader_param; + pscreen->get_paramf = nvc0_screen_get_paramf; + pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info; + + nvc0_screen_init_resource_functions(pscreen); + + screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param; + screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, NULL, + &screen->fence.bo); + if (ret) + goto fail; + nouveau_bo_map(screen->fence.bo, 0, NULL); + screen->fence.map = screen->fence.bo->map; + screen->base.fence.emit = nvc0_screen_fence_emit; + screen->base.fence.update = nvc0_screen_fence_update; + + switch (dev->chipset & 0xf0) { + case 0xf0: + obj_class = NVF0_P2MF_CLASS; + break; + case 0xe0: + obj_class = NVE4_P2MF_CLASS; + break; + default: + obj_class = NVC0_M2MF_CLASS; + break; + } + ret = nouveau_object_new(chan, 0xbeef323f, obj_class, NULL, 0, + &screen->m2mf); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret); + + BEGIN_NVC0(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->m2mf->oclass); + if (screen->m2mf->oclass == NVE4_P2MF_CLASS) { + BEGIN_NVC0(push, SUBC_COPY(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, 0xa0b5); + } + + ret = nouveau_object_new(chan, 0xbeef902d, NVC0_2D_CLASS, NULL, 0, + &screen->eng2d); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret); + + BEGIN_NVC0(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->eng2d->oclass); + BEGIN_NVC0(push, NVC0_2D(SINGLE_GPC), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_2D(OPERATION), 1); + PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY); + BEGIN_NVC0(push, NVC0_2D(CLIP_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_2D(COLOR_KEY_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_2D(0x0884), 1); + PUSH_DATA (push, 0x3f); + BEGIN_NVC0(push, SUBC_2D(0x0888), 1); + PUSH_DATA (push, 1); + + BEGIN_NVC0(push, SUBC_2D(NVC0_GRAPH_NOTIFY_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->fence.bo->offset + 16); + PUSH_DATA (push, screen->fence.bo->offset + 16); + + switch (dev->chipset & 0xf0) { + case 0xf0: + obj_class = NVF0_3D_CLASS; + break; + case 0xe0: + obj_class = NVE4_3D_CLASS; + break; + case 0xd0: + case 0xc0: + default: + switch (dev->chipset) { + case 0xd9: + case 0xc8: + obj_class = NVC8_3D_CLASS; + break; + case 0xc1: + obj_class = NVC1_3D_CLASS; + break; + default: + obj_class = NVC0_3D_CLASS; + break; + } + break; + } + ret = nouveau_object_new(chan, 0xbeef003d, obj_class, NULL, 0, + &screen->eng3d); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret); + screen->base.class_3d = obj_class; + + BEGIN_NVC0(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->eng3d->oclass); + + BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1); + PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS); + + if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) { + /* kill shaders after about 1 second (at 100 MHz) */ + BEGIN_NVC0(push, NVC0_3D(WATCHDOG_TIMER), 1); + PUSH_DATA (push, 0x17); + } + + IMMED_NVC0(push, NVC0_3D(ZETA_COMP_ENABLE), dev->drm_version >= 0x01000101); + BEGIN_NVC0(push, NVC0_3D(RT_COMP_ENABLE(0)), 8); + for (i = 0; i < 8; ++i) + PUSH_DATA(push, dev->drm_version >= 0x01000101); + + BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1); + PUSH_DATA (push, 1); + + BEGIN_NVC0(push, NVC0_3D(CSAA_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 1); + PUSH_DATA (push, NVC0_3D_MULTISAMPLE_MODE_MS1); + BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_CTRL), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(LINE_WIDTH_SEPARATE), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(LINE_LAST_PIXEL), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(BLEND_SEPARATE_ALPHA), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1); + PUSH_DATA (push, 0); + if (screen->eng3d->oclass < NVE4_3D_CLASS) { + BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1); + PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); + } else { + BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1); + PUSH_DATA (push, 15); + } + BEGIN_NVC0(push, NVC0_3D(CALL_LIMIT_LOG), 1); + PUSH_DATA (push, 8); /* 128 */ + BEGIN_NVC0(push, NVC0_3D(ZCULL_STATCTRS_ENABLE), 1); + PUSH_DATA (push, 1); + if (screen->eng3d->oclass >= NVC1_3D_CLASS) { + BEGIN_NVC0(push, NVC0_3D(CACHE_SPLIT), 1); + PUSH_DATA (push, NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1); + } + + nvc0_magic_3d_init(push, screen->eng3d->oclass); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL, + &screen->text); + if (ret) + goto fail; + + /* XXX: getting a page fault at the end of the code buffer every few + * launches, don't use the last 256 bytes to work around them - prefetch ? + */ + nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, NULL, + &screen->uniform_bo); + if (ret) + goto fail; + + for (i = 0; i < 5; ++i) { + /* TIC and TSC entries for each unit (nve4+ only) */ + /* auxiliary constants (6 user clip planes, base instance id) */ + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 512); + PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9)); + PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9)); + BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); + PUSH_DATA (push, (15 << 4) | 1); + if (screen->eng3d->oclass >= NVE4_3D_CLASS) { + unsigned j; + BEGIN_1IC0(push, NVC0_3D(CB_POS), 9); + PUSH_DATA (push, 0); + for (j = 0; j < 8; ++j) + PUSH_DATA(push, j); + } else { + BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1); + PUSH_DATA (push, 0x54); + } + } + BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1); + PUSH_DATA (push, 0); + + /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */ + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 256); + PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); + PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 5); + PUSH_DATA (push, 0); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); + PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); + + if (dev->drm_version >= 0x01000101) { + ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); + if (ret) { + NOUVEAU_ERR("NOUVEAU_GETPARAM_GRAPH_UNITS failed.\n"); + goto fail; + } + } else { + if (dev->chipset >= 0xe0 && dev->chipset < 0xf0) + value = (8 << 8) | 4; + else + value = (16 << 8) | 4; + } + screen->mp_count = value >> 8; + screen->mp_count_compute = screen->mp_count; + + nvc0_screen_resize_tls_area(screen, 128 * 16, 0, 0x200); + + BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->text->offset); + PUSH_DATA (push, screen->text->offset); + BEGIN_NVC0(push, NVC0_3D(TEMP_ADDRESS_HIGH), 4); + PUSH_DATAh(push, screen->tls->offset); + PUSH_DATA (push, screen->tls->offset); + PUSH_DATA (push, screen->tls->size >> 32); + PUSH_DATA (push, screen->tls->size); + BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1); + PUSH_DATA (push, 0); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL, + &screen->poly_cache); + if (ret) + goto fail; + + BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->poly_cache->offset); + PUSH_DATA (push, screen->poly_cache->offset); + PUSH_DATA (push, 3); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, NULL, + &screen->txc); + if (ret) + goto fail; + + BEGIN_NVC0(push, NVC0_3D(TIC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset); + PUSH_DATA (push, screen->txc->offset); + PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); + + BEGIN_NVC0(push, NVC0_3D(TSC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset + 65536); + PUSH_DATA (push, screen->txc->offset + 65536); + PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); + + BEGIN_NVC0(push, NVC0_3D(SCREEN_Y_CONTROL), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(WINDOW_OFFSET_X), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1); /* deactivate ZCULL */ + PUSH_DATA (push, 0x3f); + + BEGIN_NVC0(push, NVC0_3D(CLIP_RECTS_MODE), 1); + PUSH_DATA (push, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY); + BEGIN_NVC0(push, NVC0_3D(CLIP_RECT_HORIZ(0)), 8 * 2); + for (i = 0; i < 8 * 2; ++i) + PUSH_DATA(push, 0); + BEGIN_NVC0(push, NVC0_3D(CLIP_RECTS_EN), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(CLIPID_ENABLE), 1); + PUSH_DATA (push, 0); + + /* neither scissors, viewport nor stencil mask should affect clears */ + BEGIN_NVC0(push, NVC0_3D(CLEAR_FLAGS), 1); + PUSH_DATA (push, 0); + + BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(0)), 2); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 1.0f); + BEGIN_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 1); + PUSH_DATA (push, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1); + + /* We use scissors instead of exact view volume clipping, + * so they're always enabled. + */ + BEGIN_NVC0(push, NVC0_3D(SCISSOR_ENABLE(0)), 3); + PUSH_DATA (push, 1); + PUSH_DATA (push, 8192 << 16); + PUSH_DATA (push, 8192 << 16); + +#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); + + i = 0; + MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, nvc0_9097_per_instance_bf); + MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, nvc0_9097_blend_enables); + MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select); + MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, nvc0_9097_tep_select); + MK_MACRO(NVC0_3D_MACRO_GP_SELECT, nvc0_9097_gp_select); + MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front); + MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back); + + BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(RT_SEPARATE_FRAG_DATA), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); + PUSH_DATA (push, 0x40); + BEGIN_NVC0(push, NVC0_3D(LAYER), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1); + PUSH_DATA (push, 0x30); + BEGIN_NVC0(push, NVC0_3D(PATCH_VERTICES), 1); + PUSH_DATA (push, 3); + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); + PUSH_DATA (push, 0x20); + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(0)), 1); + PUSH_DATA (push, 0x00); + + BEGIN_NVC0(push, NVC0_3D(POINT_COORD_REPLACE), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(POINT_RASTER_RULES), 1); + PUSH_DATA (push, NVC0_3D_POINT_RASTER_RULES_OGL); + + IMMED_NVC0(push, NVC0_3D(EDGEFLAG), 1); + + if (nvc0_screen_init_compute(screen)) + goto fail; + + PUSH_KICK (push); + + screen->tic.entries = CALLOC(4096, sizeof(void *)); + screen->tsc.entries = screen->tic.entries + 2048; + + mm_config.nvc0.tile_mode = 0; + mm_config.nvc0.memtype = 0xfe0; + screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config); + + if (!nvc0_blitter_create(screen)) + goto fail; + + nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE); + + return pscreen; + +fail: + nvc0_screen_destroy(pscreen); + return NULL; +} + +int +nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry) +{ + int i = screen->tic.next; + + while (screen->tic.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); + + screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); + + if (screen->tic.entries[i]) + nv50_tic_entry(screen->tic.entries[i])->id = -1; + + screen->tic.entries[i] = entry; + return i; +} + +int +nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry) +{ + int i = screen->tsc.next; + + while (screen->tsc.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); + + screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); + + if (screen->tsc.entries[i]) + nv50_tsc_entry(screen->tsc.entries[i])->id = -1; + + screen->tsc.entries[i] = entry; + return i; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h new file mode 100644 index 00000000000..27a0c5f784d --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -0,0 +1,325 @@ +#ifndef __NVC0_SCREEN_H__ +#define __NVC0_SCREEN_H__ + +#include "nouveau_screen.h" +#include "nouveau_mm.h" +#include "nouveau_fence.h" +#include "nouveau_heap.h" + +#include "nv_object.xml.h" + +#include "nvc0/nvc0_winsys.h" +#include "nvc0/nvc0_stateobj.h" + +#define NVC0_TIC_MAX_ENTRIES 2048 +#define NVC0_TSC_MAX_ENTRIES 2048 + +/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */ +#define NVC0_MAX_PIPE_CONSTBUFS 14 +#define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE 7 + +#define NVC0_MAX_SURFACE_SLOTS 16 + +struct nvc0_context; + +struct nvc0_blitter; + +struct nvc0_screen { + struct nouveau_screen base; + + struct nvc0_context *cur_ctx; + + int num_occlusion_queries_active; + + struct nouveau_bo *text; + struct nouveau_bo *parm; /* for COMPUTE */ + struct nouveau_bo *uniform_bo; /* for 3D */ + struct nouveau_bo *tls; + struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ + struct nouveau_bo *poly_cache; + + uint16_t mp_count; + uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */ + + struct nouveau_heap *text_heap; + struct nouveau_heap *lib_code; /* allocated from text_heap */ + + struct nvc0_blitter *blitter; + + struct { + void **entries; + int next; + uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32]; + } tic; + + struct { + void **entries; + int next; + uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32]; + } tsc; + + struct { + struct nouveau_bo *bo; + uint32_t *map; + } fence; + + struct { + struct nvc0_program *prog; /* compute state object to read MP counters */ + struct pipe_query *mp_counter[8]; /* counter to query allocation */ + uint8_t num_mp_pm_active[2]; + boolean mp_counters_enabled; + } pm; + + struct nouveau_mman *mm_VRAM_fe0; + + struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */ + struct nouveau_object *eng2d; + struct nouveau_object *m2mf; + struct nouveau_object *compute; +}; + +static INLINE struct nvc0_screen * +nvc0_screen(struct pipe_screen *screen) +{ + return (struct nvc0_screen *)screen; +} + + +/* Performance counter queries: + */ +#define NVE4_PM_QUERY_COUNT 49 +#define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i)) +#define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1) +#define NVE4_PM_QUERY_PROF_TRIGGER_0 0 +#define NVE4_PM_QUERY_PROF_TRIGGER_1 1 +#define NVE4_PM_QUERY_PROF_TRIGGER_2 2 +#define NVE4_PM_QUERY_PROF_TRIGGER_3 3 +#define NVE4_PM_QUERY_PROF_TRIGGER_4 4 +#define NVE4_PM_QUERY_PROF_TRIGGER_5 5 +#define NVE4_PM_QUERY_PROF_TRIGGER_6 6 +#define NVE4_PM_QUERY_PROF_TRIGGER_7 7 +#define NVE4_PM_QUERY_LAUNCHED_WARPS 8 +#define NVE4_PM_QUERY_LAUNCHED_THREADS 9 +#define NVE4_PM_QUERY_LAUNCHED_CTA 10 +#define NVE4_PM_QUERY_INST_ISSUED1 11 +#define NVE4_PM_QUERY_INST_ISSUED2 12 +#define NVE4_PM_QUERY_INST_EXECUTED 13 +#define NVE4_PM_QUERY_LD_LOCAL 14 +#define NVE4_PM_QUERY_ST_LOCAL 15 +#define NVE4_PM_QUERY_LD_SHARED 16 +#define NVE4_PM_QUERY_ST_SHARED 17 +#define NVE4_PM_QUERY_L1_LOCAL_LOAD_HIT 18 +#define NVE4_PM_QUERY_L1_LOCAL_LOAD_MISS 19 +#define NVE4_PM_QUERY_L1_LOCAL_STORE_HIT 20 +#define NVE4_PM_QUERY_L1_LOCAL_STORE_MISS 21 +#define NVE4_PM_QUERY_GLD_REQUEST 22 +#define NVE4_PM_QUERY_GST_REQUEST 23 +#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_HIT 24 +#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_MISS 25 +#define NVE4_PM_QUERY_GLD_TRANSACTIONS_UNCACHED 26 +#define NVE4_PM_QUERY_GST_TRANSACTIONS 27 +#define NVE4_PM_QUERY_BRANCH 28 +#define NVE4_PM_QUERY_BRANCH_DIVERGENT 29 +#define NVE4_PM_QUERY_ACTIVE_WARPS 30 +#define NVE4_PM_QUERY_ACTIVE_CYCLES 31 +#define NVE4_PM_QUERY_INST_ISSUED 32 +#define NVE4_PM_QUERY_ATOM_COUNT 33 +#define NVE4_PM_QUERY_GRED_COUNT 34 +#define NVE4_PM_QUERY_LD_SHARED_REPLAY 35 +#define NVE4_PM_QUERY_ST_SHARED_REPLAY 36 +#define NVE4_PM_QUERY_LD_LOCAL_TRANSACTIONS 37 +#define NVE4_PM_QUERY_ST_LOCAL_TRANSACTIONS 38 +#define NVE4_PM_QUERY_L1_LD_SHARED_TRANSACTIONS 39 +#define NVE4_PM_QUERY_L1_ST_SHARED_TRANSACTIONS 40 +#define NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY 41 +#define NVE4_PM_QUERY_GST_MEM_DIV_REPLAY 42 +#define NVE4_PM_QUERY_METRIC_IPC 43 +#define NVE4_PM_QUERY_METRIC_IPAC 44 +#define NVE4_PM_QUERY_METRIC_IPEC 45 +#define NVE4_PM_QUERY_METRIC_MP_OCCUPANCY 46 +#define NVE4_PM_QUERY_METRIC_MP_EFFICIENCY 47 +#define NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD 48 + +/* +#define NVE4_PM_QUERY_GR_IDLE 50 +#define NVE4_PM_QUERY_BSP_IDLE 51 +#define NVE4_PM_QUERY_VP_IDLE 52 +#define NVE4_PM_QUERY_PPP_IDLE 53 +#define NVE4_PM_QUERY_CE0_IDLE 54 +#define NVE4_PM_QUERY_CE1_IDLE 55 +#define NVE4_PM_QUERY_CE2_IDLE 56 +*/ +/* L2 queries (PCOUNTER) */ +/* +#define NVE4_PM_QUERY_L2_SUBP_WRITE_L1_SECTOR_QUERIES 57 +... +*/ +/* TEX queries (PCOUNTER) */ +/* +#define NVE4_PM_QUERY_TEX0_CACHE_SECTOR_QUERIES 58 +... +*/ + +#define NVC0_PM_QUERY_COUNT 31 +#define NVC0_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i)) +#define NVC0_PM_QUERY_LAST NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1) +#define NVC0_PM_QUERY_INST_EXECUTED 0 +#define NVC0_PM_QUERY_BRANCH 1 +#define NVC0_PM_QUERY_BRANCH_DIVERGENT 2 +#define NVC0_PM_QUERY_ACTIVE_WARPS 3 +#define NVC0_PM_QUERY_ACTIVE_CYCLES 4 +#define NVC0_PM_QUERY_LAUNCHED_WARPS 5 +#define NVC0_PM_QUERY_LAUNCHED_THREADS 6 +#define NVC0_PM_QUERY_LD_SHARED 7 +#define NVC0_PM_QUERY_ST_SHARED 8 +#define NVC0_PM_QUERY_LD_LOCAL 9 +#define NVC0_PM_QUERY_ST_LOCAL 10 +#define NVC0_PM_QUERY_GRED_COUNT 11 +#define NVC0_PM_QUERY_ATOM_COUNT 12 +#define NVC0_PM_QUERY_GLD_REQUEST 13 +#define NVC0_PM_QUERY_GST_REQUEST 14 +#define NVC0_PM_QUERY_INST_ISSUED1_0 15 +#define NVC0_PM_QUERY_INST_ISSUED1_1 16 +#define NVC0_PM_QUERY_INST_ISSUED2_0 17 +#define NVC0_PM_QUERY_INST_ISSUED2_1 18 +#define NVC0_PM_QUERY_TH_INST_EXECUTED_0 19 +#define NVC0_PM_QUERY_TH_INST_EXECUTED_1 20 +#define NVC0_PM_QUERY_TH_INST_EXECUTED_2 21 +#define NVC0_PM_QUERY_TH_INST_EXECUTED_3 22 +#define NVC0_PM_QUERY_PROF_TRIGGER_0 23 +#define NVC0_PM_QUERY_PROF_TRIGGER_1 24 +#define NVC0_PM_QUERY_PROF_TRIGGER_2 25 +#define NVC0_PM_QUERY_PROF_TRIGGER_3 26 +#define NVC0_PM_QUERY_PROF_TRIGGER_4 27 +#define NVC0_PM_QUERY_PROF_TRIGGER_5 28 +#define NVC0_PM_QUERY_PROF_TRIGGER_6 29 +#define NVC0_PM_QUERY_PROF_TRIGGER_7 30 + +/* Driver statistics queries: + */ +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + +#define NVC0_QUERY_DRV_STAT(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i)) +#define NVC0_QUERY_DRV_STAT_COUNT 29 +#define NVC0_QUERY_DRV_STAT_LAST NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1) +#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT 0 +#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES 1 +#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT 2 +#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID 3 +#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS 4 +#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ 5 +#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE 6 +#define NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT 7 +#define NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT 8 +#define NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT 9 +#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ 10 +#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE 11 +#define NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID 12 +#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT 13 +#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID 14 +#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS 15 +#define NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES 16 +#define NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT 17 +#define NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT 18 +#define NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT 19 +#define NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT 20 +#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY 21 +#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED 22 +#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT 23 +#define NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES 24 +#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT 25 +#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES 26 +#define NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT 27 +#define NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT 28 + +#else + +#define NVC0_QUERY_DRV_STAT_COUNT 0 + +#endif + +int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned, + struct pipe_driver_query_info *); + +boolean nvc0_blitter_create(struct nvc0_screen *); +void nvc0_blitter_destroy(struct nvc0_screen *); + +void nvc0_screen_make_buffers_resident(struct nvc0_screen *); + +int nvc0_screen_tic_alloc(struct nvc0_screen *, void *); +int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *); + +int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *); +int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *); + +boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos, + uint32_t lneg, uint32_t cstack); + +static INLINE void +nvc0_resource_fence(struct nv04_resource *res, uint32_t flags) +{ + struct nvc0_screen *screen = nvc0_screen(res->base.screen); + + if (res->mm) { + nouveau_fence_ref(screen->base.fence.current, &res->fence); + if (flags & NOUVEAU_BO_WR) + nouveau_fence_ref(screen->base.fence.current, &res->fence_wr); + } +} + +static INLINE void +nvc0_resource_validate(struct nv04_resource *res, uint32_t flags) +{ + if (likely(res->bo)) { + if (flags & NOUVEAU_BO_WR) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING | + NOUVEAU_BUFFER_STATUS_DIRTY; + if (flags & NOUVEAU_BO_RD) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nvc0_resource_fence(res, flags); + } +} + +struct nvc0_format { + uint32_t rt; + uint32_t tic; + uint32_t vtx; + uint32_t usage; +}; + +extern const struct nvc0_format nvc0_format_table[]; + +static INLINE void +nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic) +{ + if (tic->id >= 0) + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); +} + +static INLINE void +nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc) +{ + if (tsc->id >= 0) + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); +} + +static INLINE void +nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic) +{ + if (tic->id >= 0) { + screen->tic.entries[tic->id] = NULL; + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); + } +} + +static INLINE void +nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc) +{ + if (tsc->id >= 0) { + screen->tsc.entries[tsc->id] = NULL; + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); + } +} + +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c new file mode 100644 index 00000000000..b820ef21df8 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -0,0 +1,278 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" + +#include "nvc0/nvc0_context.h" + +static INLINE void +nvc0_program_update_context_state(struct nvc0_context *nvc0, + struct nvc0_program *prog, int stage) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + if (prog && prog->need_tls) { + const uint32_t flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR; + if (!nvc0->state.tls_required) + BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls); + nvc0->state.tls_required |= 1 << stage; + } else { + if (nvc0->state.tls_required == (1 << stage)) + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TLS); + nvc0->state.tls_required &= ~(1 << stage); + } + + if (prog && prog->immd_size) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + /* NOTE: may overlap code of a different shader */ + PUSH_DATA (push, align(prog->immd_size, 0x100)); + PUSH_DATAh(push, nvc0->screen->text->offset + prog->immd_base); + PUSH_DATA (push, nvc0->screen->text->offset + prog->immd_base); + BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1); + PUSH_DATA (push, (14 << 4) | 1); + + nvc0->state.c14_bound |= 1 << stage; + } else + if (nvc0->state.c14_bound & (1 << stage)) { + BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1); + PUSH_DATA (push, (14 << 4) | 0); + + nvc0->state.c14_bound &= ~(1 << stage); + } +} + +static INLINE boolean +nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ + if (prog->mem) + return TRUE; + + if (!prog->translated) { + prog->translated = nvc0_program_translate( + prog, nvc0->screen->base.device->chipset); + if (!prog->translated) + return FALSE; + } + + if (likely(prog->code_size)) + return nvc0_program_upload_code(nvc0, prog); + return TRUE; /* stream output info only */ +} + +void +nvc0_vertprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *vp = nvc0->vertprog; + + if (!nvc0_program_validate(nvc0, vp)) + return; + nvc0_program_update_context_state(nvc0, vp, 0); + + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 2); + PUSH_DATA (push, 0x11); + PUSH_DATA (push, vp->code_base); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1); + PUSH_DATA (push, vp->num_gprs); + + // BEGIN_NVC0(push, NVC0_3D_(0x163c), 1); + // PUSH_DATA (push, 0); +} + +void +nvc0_fragprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *fp = nvc0->fragprog; + + if (!nvc0_program_validate(nvc0, fp)) + return; + nvc0_program_update_context_state(nvc0, fp, 4); + + if (fp->fp.early_z != nvc0->state.early_z_forced) { + nvc0->state.early_z_forced = fp->fp.early_z; + IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z); + } + + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2); + PUSH_DATA (push, 0x51); + PUSH_DATA (push, fp->code_base); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1); + PUSH_DATA (push, fp->num_gprs); + + BEGIN_NVC0(push, SUBC_3D(0x0360), 2); + PUSH_DATA (push, 0x20164010); + PUSH_DATA (push, 0x20); + BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1); + PUSH_DATA (push, fp->flags[0]); +} + +void +nvc0_tctlprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *tp = nvc0->tctlprog; + + if (tp && nvc0_program_validate(nvc0, tp)) { + if (tp->tp.tess_mode != ~0) { + BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1); + PUSH_DATA (push, tp->tp.tess_mode); + } + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2); + PUSH_DATA (push, 0x21); + PUSH_DATA (push, tp->code_base); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1); + PUSH_DATA (push, tp->num_gprs); + + if (tp->tp.input_patch_size <= 32) + IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), tp->tp.input_patch_size); + } else { + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); + PUSH_DATA (push, 0x20); + } + nvc0_program_update_context_state(nvc0, tp, 1); +} + +void +nvc0_tevlprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *tp = nvc0->tevlprog; + + if (tp && nvc0_program_validate(nvc0, tp)) { + if (tp->tp.tess_mode != ~0) { + BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1); + PUSH_DATA (push, tp->tp.tess_mode); + } + BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1); + PUSH_DATA (push, 0x31); + BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1); + PUSH_DATA (push, tp->code_base); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1); + PUSH_DATA (push, tp->num_gprs); + } else { + BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1); + PUSH_DATA (push, 0x30); + } + nvc0_program_update_context_state(nvc0, tp, 2); +} + +void +nvc0_gmtyprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *gp = nvc0->gmtyprog; + + if (gp) + nvc0_program_validate(nvc0, gp); + + /* we allow GPs with no code for specifying stream output state only */ + if (gp && gp->code_size) { + const boolean gp_selects_layer = gp->hdr[13] & (1 << 9); + + BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); + PUSH_DATA (push, 0x41); + BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1); + PUSH_DATA (push, gp->code_base); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1); + PUSH_DATA (push, gp->num_gprs); + BEGIN_NVC0(push, NVC0_3D(LAYER), 1); + PUSH_DATA (push, gp_selects_layer ? NVC0_3D_LAYER_USE_GP : 0); + } else { + IMMED_NVC0(push, NVC0_3D(LAYER), 0); + BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); + PUSH_DATA (push, 0x40); + } + nvc0_program_update_context_state(nvc0, gp, 3); +} + +void +nvc0_tfb_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_transform_feedback_state *tfb; + unsigned b; + + if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb; + else + if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb; + else + tfb = nvc0->vertprog->tfb; + + IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0); + + if (tfb && tfb != nvc0->state.tfb) { + for (b = 0; b < 4; ++b) { + if (tfb->varying_count[b]) { + unsigned n = (tfb->varying_count[b] + 3) / 4; + + BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3); + PUSH_DATA (push, 0); + PUSH_DATA (push, tfb->varying_count[b]); + PUSH_DATA (push, tfb->stride[b]); + BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n); + PUSH_DATAp(push, tfb->varying_index[b], n); + + if (nvc0->tfbbuf[b]) + nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b]; + } else { + IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0); + } + } + } + nvc0->state.tfb = tfb; + + if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS)) + return; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB); + + for (b = 0; b < nvc0->num_tfbbufs; ++b) { + struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]); + struct nv04_resource *buf = nv04_resource(targ->pipe.buffer); + + if (tfb) + targ->stride = tfb->stride[b]; + + if (!(nvc0->tfbbuf_dirty & (1 << b))) + continue; + + if (!targ->clean) + nvc0_query_fifo_wait(push, targ->pq); + BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5); + PUSH_DATA (push, 1); + PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset); + PUSH_DATA (push, buf->address + targ->pipe.buffer_offset); + PUSH_DATA (push, targ->pipe.buffer_size); + if (!targ->clean) { + nvc0_query_pushbuf_submit(push, targ->pq, 0x4); + } else { + PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */ + targ->clean = FALSE; + } + BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR); + } + for (; b < 4; ++b) + IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c new file mode 100644 index 00000000000..e56ef0160f2 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -0,0 +1,1247 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "util/u_helpers.h" +#include "util/u_inlines.h" +#include "util/u_transfer.h" + +#include "tgsi/tgsi_parse.h" + +#include "nvc0/nvc0_stateobj.h" +#include "nvc0/nvc0_context.h" + +#include "nvc0/nvc0_3d.xml.h" +#include "nv50/nv50_texture.xml.h" + +#include "nouveau_gldefs.h" + +static INLINE uint32_t +nvc0_colormask(unsigned mask) +{ + uint32_t ret = 0; + + if (mask & PIPE_MASK_R) + ret |= 0x0001; + if (mask & PIPE_MASK_G) + ret |= 0x0010; + if (mask & PIPE_MASK_B) + ret |= 0x0100; + if (mask & PIPE_MASK_A) + ret |= 0x1000; + + return ret; +} + +#define NVC0_BLEND_FACTOR_CASE(a, b) \ + case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b + +static INLINE uint32_t +nvc0_blend_fac(unsigned factor) +{ + switch (factor) { + NVC0_BLEND_FACTOR_CASE(ONE, ONE); + NVC0_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE); + NVC0_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA); + NVC0_BLEND_FACTOR_CASE(ZERO, ZERO); + NVC0_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA); + default: + return NV50_3D_BLEND_FACTOR_ZERO; + } +} + +static void * +nvc0_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj); + int i; + int r; /* reference */ + uint32_t ms; + uint8_t blend_en = 0; + boolean indep_masks = FALSE; + boolean indep_funcs = FALSE; + + so->pipe = *cso; + + /* check which states actually have differing values */ + if (cso->independent_blend_enable) { + for (r = 0; r < 8 && !cso->rt[r].blend_enable; ++r); + blend_en |= 1 << r; + for (i = r + 1; i < 8; ++i) { + if (!cso->rt[i].blend_enable) + continue; + blend_en |= 1 << i; + if (cso->rt[i].rgb_func != cso->rt[r].rgb_func || + cso->rt[i].rgb_src_factor != cso->rt[r].rgb_src_factor || + cso->rt[i].rgb_dst_factor != cso->rt[r].rgb_dst_factor || + cso->rt[i].alpha_func != cso->rt[r].alpha_func || + cso->rt[i].alpha_src_factor != cso->rt[r].alpha_src_factor || + cso->rt[i].alpha_dst_factor != cso->rt[r].alpha_dst_factor) { + indep_funcs = TRUE; + break; + } + } + for (; i < 8; ++i) + blend_en |= (cso->rt[i].blend_enable ? 1 : 0) << i; + + for (i = 1; i < 8; ++i) { + if (cso->rt[i].colormask != cso->rt[0].colormask) { + indep_masks = TRUE; + break; + } + } + } else { + r = 0; + if (cso->rt[0].blend_enable) + blend_en = 0xff; + } + + if (cso->logicop_enable) { + SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2); + SB_DATA (so, 1); + SB_DATA (so, nvgl_logicop_func(cso->logicop_func)); + + SB_IMMED_3D(so, MACRO_BLEND_ENABLES, 0); + } else { + SB_IMMED_3D(so, LOGIC_OP_ENABLE, 0); + + SB_IMMED_3D(so, BLEND_INDEPENDENT, indep_funcs); + SB_IMMED_3D(so, MACRO_BLEND_ENABLES, blend_en); + if (indep_funcs) { + for (i = 0; i < 8; ++i) { + if (cso->rt[i].blend_enable) { + SB_BEGIN_3D(so, IBLEND_EQUATION_RGB(i), 6); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_src_factor)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor)); + } + } + } else + if (blend_en) { + SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5); + SB_DATA (so, nvgl_blend_eqn(cso->rt[r].rgb_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[r].rgb_src_factor)); + SB_DATA (so, nvc0_blend_fac(cso->rt[r].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[r].alpha_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[r].alpha_src_factor)); + SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1); + SB_DATA (so, nvc0_blend_fac(cso->rt[r].alpha_dst_factor)); + } + + SB_IMMED_3D(so, COLOR_MASK_COMMON, !indep_masks); + if (indep_masks) { + SB_BEGIN_3D(so, COLOR_MASK(0), 8); + for (i = 0; i < 8; ++i) + SB_DATA(so, nvc0_colormask(cso->rt[i].colormask)); + } else { + SB_BEGIN_3D(so, COLOR_MASK(0), 1); + SB_DATA (so, nvc0_colormask(cso->rt[0].colormask)); + } + } + + ms = 0; + if (cso->alpha_to_coverage) + ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE; + if (cso->alpha_to_one) + ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE; + + SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1); + SB_DATA (so, ms); + + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); + return so; +} + +static void +nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->blend = hwcso; + nvc0->dirty |= NVC0_NEW_BLEND; +} + +static void +nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +/* NOTE: ignoring line_last_pixel, using FALSE (set on screen init) */ +static void * +nvc0_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nvc0_rasterizer_stateobj *so; + uint32_t reg; + + so = CALLOC_STRUCT(nvc0_rasterizer_stateobj); + if (!so) + return NULL; + so->pipe = *cso; + + /* Scissor enables are handled in scissor state, we will not want to + * always emit 16 commands, one for each scissor rectangle, here. + */ + + SB_BEGIN_3D(so, SHADE_MODEL, 1); + SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT : + NVC0_3D_SHADE_MODEL_SMOOTH); + SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first); + SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside); + + SB_IMMED_3D(so, VERT_COLOR_CLAMP_EN, cso->clamp_vertex_color); + SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1); + SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000); + + SB_IMMED_3D(so, MULTISAMPLE_ENABLE, cso->multisample); + + SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth); + if (cso->line_smooth) + SB_BEGIN_3D(so, LINE_WIDTH_SMOOTH, 1); + else + SB_BEGIN_3D(so, LINE_WIDTH_ALIASED, 1); + SB_DATA (so, fui(cso->line_width)); + + SB_IMMED_3D(so, LINE_STIPPLE_ENABLE, cso->line_stipple_enable); + if (cso->line_stipple_enable) { + SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1); + SB_DATA (so, (cso->line_stipple_pattern << 8) | + cso->line_stipple_factor); + + } + + SB_IMMED_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex); + if (!cso->point_size_per_vertex) { + SB_BEGIN_3D(so, POINT_SIZE, 1); + SB_DATA (so, fui(cso->point_size)); + } + + reg = (cso->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT) ? + NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT : + NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT; + + SB_BEGIN_3D(so, POINT_COORD_REPLACE, 1); + SB_DATA (so, ((cso->sprite_coord_enable & 0xff) << 3) | reg); + SB_IMMED_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization); + SB_IMMED_3D(so, POINT_SMOOTH_ENABLE, cso->point_smooth); + + SB_BEGIN_3D(so, MACRO_POLYGON_MODE_FRONT, 1); + SB_DATA (so, nvgl_polygon_mode(cso->fill_front)); + SB_BEGIN_3D(so, MACRO_POLYGON_MODE_BACK, 1); + SB_DATA (so, nvgl_polygon_mode(cso->fill_back)); + SB_IMMED_3D(so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth); + + SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3); + SB_DATA (so, cso->cull_face != PIPE_FACE_NONE); + SB_DATA (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW : + NVC0_3D_FRONT_FACE_CW); + switch (cso->cull_face) { + case PIPE_FACE_FRONT_AND_BACK: + SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK); + break; + case PIPE_FACE_FRONT: + SB_DATA(so, NVC0_3D_CULL_FACE_FRONT); + break; + case PIPE_FACE_BACK: + default: + SB_DATA(so, NVC0_3D_CULL_FACE_BACK); + break; + } + + SB_IMMED_3D(so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable); + SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3); + SB_DATA (so, cso->offset_point); + SB_DATA (so, cso->offset_line); + SB_DATA (so, cso->offset_tri); + + if (cso->offset_point || cso->offset_line || cso->offset_tri) { + SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1); + SB_DATA (so, fui(cso->offset_scale)); + SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1); + SB_DATA (so, fui(cso->offset_units * 2.0f)); + SB_BEGIN_3D(so, POLYGON_OFFSET_CLAMP, 1); + SB_DATA (so, fui(cso->offset_clamp)); + } + + if (cso->depth_clip) + reg = NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1; + else + reg = + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2; + + SB_BEGIN_3D(so, VIEW_VOLUME_CLIP_CTRL, 1); + SB_DATA (so, reg); + + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; +} + +static void +nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->rast = hwcso; + nvc0->dirty |= NVC0_NEW_RASTERIZER; +} + +static void +nvc0_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nvc0_zsa_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nvc0_zsa_stateobj *so = CALLOC_STRUCT(nvc0_zsa_stateobj); + + so->pipe = *cso; + + SB_IMMED_3D(so, DEPTH_TEST_ENABLE, cso->depth.enabled); + if (cso->depth.enabled) { + SB_IMMED_3D(so, DEPTH_WRITE_ENABLE, cso->depth.writemask); + SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1); + SB_DATA (so, nvgl_comparison_op(cso->depth.func)); + } + + if (cso->stencil[0].enabled) { + SB_BEGIN_3D(so, STENCIL_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func)); + SB_BEGIN_3D(so, STENCIL_FRONT_FUNC_MASK, 2); + SB_DATA (so, cso->stencil[0].valuemask); + SB_DATA (so, cso->stencil[0].writemask); + } else { + SB_IMMED_3D(so, STENCIL_ENABLE, 0); + } + + if (cso->stencil[1].enabled) { + assert(cso->stencil[0].enabled); + SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func)); + SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2); + SB_DATA (so, cso->stencil[1].writemask); + SB_DATA (so, cso->stencil[1].valuemask); + } else + if (cso->stencil[0].enabled) { + SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0); + } + + SB_IMMED_3D(so, ALPHA_TEST_ENABLE, cso->alpha.enabled); + if (cso->alpha.enabled) { + SB_BEGIN_3D(so, ALPHA_TEST_REF, 2); + SB_DATA (so, fui(cso->alpha.ref_value)); + SB_DATA (so, nvgl_comparison_op(cso->alpha.func)); + } + + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; +} + +static void +nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->zsa = hwcso; + nvc0->dirty |= NVC0_NEW_ZSA; +} + +static void +nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +/* ====================== SAMPLERS AND TEXTURES ================================ + */ + +#define NV50_TSC_WRAP_CASE(n) \ + case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n + +static INLINE unsigned +nv50_tsc_wrap_mode(unsigned wrap) +{ + switch (wrap) { + NV50_TSC_WRAP_CASE(REPEAT); + NV50_TSC_WRAP_CASE(MIRROR_REPEAT); + NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(CLAMP); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP); + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + return NV50_TSC_WRAP_REPEAT; + } +} + +static void +nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + unsigned s, i; + + for (s = 0; s < 5; ++s) + for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i) + if (nvc0_context(pipe)->samplers[s][i] == hwcso) + nvc0_context(pipe)->samplers[s][i] = NULL; + + nvc0_screen_tsc_free(nvc0_context(pipe)->screen, nv50_tsc_entry(hwcso)); + + FREE(hwcso); +} + +static INLINE void +nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s, + unsigned nr, void **hwcso) +{ + unsigned i; + + for (i = 0; i < nr; ++i) { + struct nv50_tsc_entry *old = nvc0->samplers[s][i]; + + if (hwcso[i] == old) + continue; + nvc0->samplers_dirty[s] |= 1 << i; + + nvc0->samplers[s][i] = nv50_tsc_entry(hwcso[i]); + if (old) + nvc0_screen_tsc_unlock(nvc0->screen, old); + } + for (; i < nvc0->num_samplers[s]; ++i) { + if (nvc0->samplers[s][i]) { + nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]); + nvc0->samplers[s][i] = NULL; + } + } + + nvc0->num_samplers[s] = nr; + + nvc0->dirty |= NVC0_NEW_SAMPLERS; +} + +static void +nvc0_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s); +} + +static void +nvc0_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 4, nr, s); +} + +static void +nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s); +} + +static void +nvc0_stage_sampler_states_bind_range(struct nvc0_context *nvc0, + const unsigned s, + unsigned start, unsigned nr, void **cso) +{ + const unsigned end = start + nr; + int last_valid = -1; + unsigned i; + + if (cso) { + for (i = start; i < end; ++i) { + const unsigned p = i - start; + if (cso[p]) + last_valid = i; + if (cso[p] == nvc0->samplers[s][i]) + continue; + nvc0->samplers_dirty[s] |= 1 << i; + + if (nvc0->samplers[s][i]) + nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]); + nvc0->samplers[s][i] = cso[p]; + } + } else { + for (i = start; i < end; ++i) { + if (nvc0->samplers[s][i]) { + nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]); + nvc0->samplers[s][i] = NULL; + nvc0->samplers_dirty[s] |= 1 << i; + } + } + } + + if (nvc0->num_samplers[s] <= end) { + if (last_valid < 0) { + for (i = start; i && !nvc0->samplers[s][i - 1]; --i); + nvc0->num_samplers[s] = i; + } else { + nvc0->num_samplers[s] = last_valid + 1; + } + } +} + +static void +nvc0_cp_sampler_states_bind(struct pipe_context *pipe, + unsigned start, unsigned nr, void **cso) +{ + nvc0_stage_sampler_states_bind_range(nvc0_context(pipe), 5, start, nr, cso); + + nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SAMPLERS; +} + +/* NOTE: only called when not referenced anywhere, won't be bound */ +static void +nvc0_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + + nvc0_screen_tic_free(nvc0_context(pipe)->screen, nv50_tic_entry(view)); + + FREE(nv50_tic_entry(view)); +} + +static INLINE void +nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, + unsigned nr, + struct pipe_sampler_view **views) +{ + unsigned i; + + for (i = 0; i < nr; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); + + if (views[i] == nvc0->textures[s][i]) + continue; + nvc0->textures_dirty[s] |= 1 << i; + + if (old) { + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); + nvc0_screen_tic_unlock(nvc0->screen, old); + } + + pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]); + } + + for (i = nr; i < nvc0->num_textures[s]; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); + if (old) { + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); + nvc0_screen_tic_unlock(nvc0->screen, old); + pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); + } + } + + nvc0->num_textures[s] = nr; + + nvc0->dirty |= NVC0_NEW_TEXTURES; +} + +static void +nvc0_vp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views); +} + +static void +nvc0_fp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views(nvc0_context(pipe), 4, nr, views); +} + +static void +nvc0_gp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views); +} + +static void +nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + struct nouveau_bufctx *bctx = (s == 5) ? nvc0->bufctx_cp : nvc0->bufctx_3d; + const unsigned end = start + nr; + const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_TEX(s, 0); + int last_valid = -1; + unsigned i; + + if (views) { + for (i = start; i < end; ++i) { + const unsigned p = i - start; + if (views[p]) + last_valid = i; + if (views[p] == nvc0->textures[s][i]) + continue; + nvc0->textures_dirty[s] |= 1 << i; + + if (nvc0->textures[s][i]) { + struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); + nouveau_bufctx_reset(bctx, bin + i); + nvc0_screen_tic_unlock(nvc0->screen, old); + } + pipe_sampler_view_reference(&nvc0->textures[s][i], views[p]); + } + } else { + for (i = start; i < end; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); + if (!old) + continue; + nvc0->textures_dirty[s] |= 1 << i; + + nvc0_screen_tic_unlock(nvc0->screen, old); + pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); + nouveau_bufctx_reset(bctx, bin + i); + } + } + + if (nvc0->num_textures[s] <= end) { + if (last_valid < 0) { + for (i = start; i && !nvc0->textures[s][i - 1]; --i); + nvc0->num_textures[s] = i; + } else { + nvc0->num_textures[s] = last_valid + 1; + } + } +} + +static void +nvc0_cp_set_sampler_views(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views_range(nvc0_context(pipe), 5, start, nr, views); + + nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_TEXTURES; +} + + +/* ============================= SHADERS ======================================= + */ + +static void * +nvc0_sp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso, unsigned type) +{ + struct nvc0_program *prog; + + prog = CALLOC_STRUCT(nvc0_program); + if (!prog) + return NULL; + + prog->type = type; + + if (cso->tokens) + prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + if (cso->stream_output.num_outputs) + prog->pipe.stream_output = cso->stream_output; + + return (void *)prog; +} + +static void +nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_program *prog = (struct nvc0_program *)hwcso; + + nvc0_program_destroy(nvc0_context(pipe), prog); + + FREE((void *)prog->pipe.tokens); + FREE(prog); +} + +static void * +nvc0_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX); +} + +static void +nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->vertprog = hwcso; + nvc0->dirty |= NVC0_NEW_VERTPROG; +} + +static void * +nvc0_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT); +} + +static void +nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->fragprog = hwcso; + nvc0->dirty |= NVC0_NEW_FRAGPROG; +} + +static void * +nvc0_gp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY); +} + +static void +nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->gmtyprog = hwcso; + nvc0->dirty |= NVC0_NEW_GMTYPROG; +} + +static void * +nvc0_cp_state_create(struct pipe_context *pipe, + const struct pipe_compute_state *cso) +{ + struct nvc0_program *prog; + + prog = CALLOC_STRUCT(nvc0_program); + if (!prog) + return NULL; + prog->type = PIPE_SHADER_COMPUTE; + + prog->cp.smem_size = cso->req_local_mem; + prog->cp.lmem_size = cso->req_private_mem; + prog->parm_size = cso->req_input_mem; + + prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog); + + return (void *)prog; +} + +static void +nvc0_cp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->compprog = hwcso; + nvc0->dirty_cp |= NVC0_NEW_CP_PROGRAM; +} + +static void +nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + struct pipe_constant_buffer *cb) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct pipe_resource *res = cb ? cb->buffer : NULL; + const unsigned s = nvc0_shader_stage(shader); + const unsigned i = index; + + if (unlikely(shader == PIPE_SHADER_COMPUTE)) { + assert(!cb || !cb->user_buffer); + if (nvc0->constbuf[s][i].u.buf) + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i)); + + nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF; + } else { + if (nvc0->constbuf[s][i].user) + nvc0->constbuf[s][i].u.buf = NULL; + else + if (nvc0->constbuf[s][i].u.buf) + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i)); + + nvc0->dirty |= NVC0_NEW_CONSTBUF; + } + nvc0->constbuf_dirty[s] |= 1 << i; + + pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res); + + nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE; + if (nvc0->constbuf[s][i].user) { + nvc0->constbuf[s][i].u.data = cb->user_buffer; + nvc0->constbuf[s][i].size = cb->buffer_size; + } else + if (cb) { + nvc0->constbuf[s][i].offset = cb->buffer_offset; + nvc0->constbuf[s][i].size = align(cb->buffer_size, 0x100); + } +} + +/* ============================================================================= + */ + +static void +nvc0_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->blend_colour = *bcol; + nvc0->dirty |= NVC0_NEW_BLEND_COLOUR; +} + +static void +nvc0_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *sr) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->stencil_ref = *sr; + nvc0->dirty |= NVC0_NEW_STENCIL_REF; +} + +static void +nvc0_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + memcpy(nvc0->clip.ucp, clip->ucp, sizeof(clip->ucp)); + + nvc0->dirty |= NVC0_NEW_CLIP; +} + +static void +nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->sample_mask = sample_mask; + nvc0->dirty |= NVC0_NEW_SAMPLE_MASK; +} + + +static void +nvc0_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + unsigned i; + + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + + for (i = 0; i < fb->nr_cbufs; ++i) + pipe_surface_reference(&nvc0->framebuffer.cbufs[i], fb->cbufs[i]); + for (; i < nvc0->framebuffer.nr_cbufs; ++i) + pipe_surface_reference(&nvc0->framebuffer.cbufs[i], NULL); + + nvc0->framebuffer.nr_cbufs = fb->nr_cbufs; + + nvc0->framebuffer.width = fb->width; + nvc0->framebuffer.height = fb->height; + + pipe_surface_reference(&nvc0->framebuffer.zsbuf, fb->zsbuf); + + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +static void +nvc0_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->stipple = *stipple; + nvc0->dirty |= NVC0_NEW_STIPPLE; +} + +static void +nvc0_set_scissor_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *scissor) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->scissor = *scissor; + nvc0->dirty |= NVC0_NEW_SCISSOR; +} + +static void +nvc0_set_viewport_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *vpt) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->viewport = *vpt; + nvc0->dirty |= NVC0_NEW_VIEWPORT; +} + +static void +nvc0_set_vertex_buffers(struct pipe_context *pipe, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + unsigned i; + + util_set_vertex_buffers_count(nvc0->vtxbuf, &nvc0->num_vtxbufs, vb, + start_slot, count); + + if (!vb) { + nvc0->vbo_user &= ~(((1ull << count) - 1) << start_slot); + nvc0->constant_vbos &= ~(((1ull << count) - 1) << start_slot); + return; + } + + for (i = 0; i < count; ++i) { + unsigned dst_index = start_slot + i; + + if (vb[i].user_buffer) { + nvc0->vbo_user |= 1 << dst_index; + if (!vb[i].stride) + nvc0->constant_vbos |= 1 << dst_index; + else + nvc0->constant_vbos &= ~(1 << dst_index); + } else { + nvc0->vbo_user &= ~(1 << dst_index); + nvc0->constant_vbos &= ~(1 << dst_index); + } + } + + nvc0->dirty |= NVC0_NEW_ARRAYS; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); +} + +static void +nvc0_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + if (nvc0->idxbuf.buffer) + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX); + + if (ib) { + pipe_resource_reference(&nvc0->idxbuf.buffer, ib->buffer); + nvc0->idxbuf.index_size = ib->index_size; + if (ib->buffer) { + nvc0->idxbuf.offset = ib->offset; + nvc0->dirty |= NVC0_NEW_IDXBUF; + } else { + nvc0->idxbuf.user_buffer = ib->user_buffer; + nvc0->dirty &= ~NVC0_NEW_IDXBUF; + } + } else { + nvc0->dirty &= ~NVC0_NEW_IDXBUF; + pipe_resource_reference(&nvc0->idxbuf.buffer, NULL); + } +} + +static void +nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->vertex = hwcso; + nvc0->dirty |= NVC0_NEW_VERTEX; +} + +static struct pipe_stream_output_target * +nvc0_so_target_create(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size) +{ + struct nvc0_so_target *targ = MALLOC_STRUCT(nvc0_so_target); + if (!targ) + return NULL; + + targ->pq = pipe->create_query(pipe, NVC0_QUERY_TFB_BUFFER_OFFSET); + if (!targ->pq) { + FREE(targ); + return NULL; + } + targ->clean = TRUE; + + targ->pipe.buffer_size = size; + targ->pipe.buffer_offset = offset; + targ->pipe.context = pipe; + targ->pipe.buffer = NULL; + pipe_resource_reference(&targ->pipe.buffer, res); + pipe_reference_init(&targ->pipe.reference, 1); + + return &targ->pipe; +} + +static void +nvc0_so_target_destroy(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg) +{ + struct nvc0_so_target *targ = nvc0_so_target(ptarg); + pipe->destroy_query(pipe, targ->pq); + pipe_resource_reference(&targ->pipe.buffer, NULL); + FREE(targ); +} + +static void +nvc0_set_transform_feedback_targets(struct pipe_context *pipe, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_mask) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + unsigned i; + boolean serialize = TRUE; + + assert(num_targets <= 4); + + for (i = 0; i < num_targets; ++i) { + if (nvc0->tfbbuf[i] == targets[i] && (append_mask & (1 << i))) + continue; + nvc0->tfbbuf_dirty |= 1 << i; + + if (nvc0->tfbbuf[i] && nvc0->tfbbuf[i] != targets[i]) + nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize); + + if (targets[i] && !(append_mask & (1 << i))) + nvc0_so_target(targets[i])->clean = TRUE; + + pipe_so_target_reference(&nvc0->tfbbuf[i], targets[i]); + } + for (; i < nvc0->num_tfbbufs; ++i) { + nvc0->tfbbuf_dirty |= 1 << i; + nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize); + pipe_so_target_reference(&nvc0->tfbbuf[i], NULL); + } + nvc0->num_tfbbufs = num_targets; + + if (nvc0->tfbbuf_dirty) + nvc0->dirty |= NVC0_NEW_TFB_TARGETS; +} + +static void +nvc0_bind_surfaces_range(struct nvc0_context *nvc0, const unsigned t, + unsigned start, unsigned nr, + struct pipe_surface **psurfaces) +{ + const unsigned end = start + nr; + const unsigned mask = ((1 << nr) - 1) << start; + unsigned i; + + if (psurfaces) { + for (i = start; i < end; ++i) { + const unsigned p = i - start; + if (psurfaces[p]) + nvc0->surfaces_valid[t] |= (1 << i); + else + nvc0->surfaces_valid[t] &= ~(1 << i); + pipe_surface_reference(&nvc0->surfaces[t][i], psurfaces[p]); + } + } else { + for (i = start; i < end; ++i) + pipe_surface_reference(&nvc0->surfaces[t][i], NULL); + nvc0->surfaces_valid[t] &= ~mask; + } + nvc0->surfaces_dirty[t] |= mask; + + if (t == 0) + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_SUF); + else + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF); +} + +static void +nvc0_set_compute_resources(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_surface **resources) +{ + nvc0_bind_surfaces_range(nvc0_context(pipe), 1, start, nr, resources); + + nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SURFACES; +} + +static void +nvc0_set_shader_resources(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_surface **resources) +{ + nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, resources); + + nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES; +} + +static INLINE void +nvc0_set_global_handle(uint32_t *phandle, struct pipe_resource *res) +{ + struct nv04_resource *buf = nv04_resource(res); + if (buf) { + uint64_t limit = (buf->address + buf->base.width0) - 1; + if (limit < (1ULL << 32)) { + *phandle = (uint32_t)buf->address; + } else { + NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: " + "resource not contained within 32-bit address space !\n"); + *phandle = 0; + } + } else { + *phandle = 0; + } +} + +static void +nvc0_set_global_bindings(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_resource **resources, + uint32_t **handles) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct pipe_resource **ptr; + unsigned i; + const unsigned end = start + nr; + + if (nvc0->global_residents.size <= (end * sizeof(struct pipe_resource *))) { + const unsigned old_size = nvc0->global_residents.size; + const unsigned req_size = end * sizeof(struct pipe_resource *); + util_dynarray_resize(&nvc0->global_residents, req_size); + memset((uint8_t *)nvc0->global_residents.data + old_size, 0, + req_size - old_size); + } + + if (resources) { + ptr = util_dynarray_element( + &nvc0->global_residents, struct pipe_resource *, start); + for (i = 0; i < nr; ++i) { + pipe_resource_reference(&ptr[i], resources[i]); + nvc0_set_global_handle(handles[i], resources[i]); + } + } else { + ptr = util_dynarray_element( + &nvc0->global_residents, struct pipe_resource *, start); + for (i = 0; i < nr; ++i) + pipe_resource_reference(&ptr[i], NULL); + } + + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL); + + nvc0->dirty_cp = NVC0_NEW_CP_GLOBALS; +} + +void +nvc0_init_state_functions(struct nvc0_context *nvc0) +{ + struct pipe_context *pipe = &nvc0->base.pipe; + + pipe->create_blend_state = nvc0_blend_state_create; + pipe->bind_blend_state = nvc0_blend_state_bind; + pipe->delete_blend_state = nvc0_blend_state_delete; + + pipe->create_rasterizer_state = nvc0_rasterizer_state_create; + pipe->bind_rasterizer_state = nvc0_rasterizer_state_bind; + pipe->delete_rasterizer_state = nvc0_rasterizer_state_delete; + + pipe->create_depth_stencil_alpha_state = nvc0_zsa_state_create; + pipe->bind_depth_stencil_alpha_state = nvc0_zsa_state_bind; + pipe->delete_depth_stencil_alpha_state = nvc0_zsa_state_delete; + + pipe->create_sampler_state = nv50_sampler_state_create; + pipe->delete_sampler_state = nvc0_sampler_state_delete; + pipe->bind_vertex_sampler_states = nvc0_vp_sampler_states_bind; + pipe->bind_fragment_sampler_states = nvc0_fp_sampler_states_bind; + pipe->bind_geometry_sampler_states = nvc0_gp_sampler_states_bind; + pipe->bind_compute_sampler_states = nvc0_cp_sampler_states_bind; + + pipe->create_sampler_view = nvc0_create_sampler_view; + pipe->sampler_view_destroy = nvc0_sampler_view_destroy; + pipe->set_vertex_sampler_views = nvc0_vp_set_sampler_views; + pipe->set_fragment_sampler_views = nvc0_fp_set_sampler_views; + pipe->set_geometry_sampler_views = nvc0_gp_set_sampler_views; + pipe->set_compute_sampler_views = nvc0_cp_set_sampler_views; + + pipe->create_vs_state = nvc0_vp_state_create; + pipe->create_fs_state = nvc0_fp_state_create; + pipe->create_gs_state = nvc0_gp_state_create; + pipe->bind_vs_state = nvc0_vp_state_bind; + pipe->bind_fs_state = nvc0_fp_state_bind; + pipe->bind_gs_state = nvc0_gp_state_bind; + pipe->delete_vs_state = nvc0_sp_state_delete; + pipe->delete_fs_state = nvc0_sp_state_delete; + pipe->delete_gs_state = nvc0_sp_state_delete; + + pipe->create_compute_state = nvc0_cp_state_create; + pipe->bind_compute_state = nvc0_cp_state_bind; + pipe->delete_compute_state = nvc0_sp_state_delete; + + pipe->set_blend_color = nvc0_set_blend_color; + pipe->set_stencil_ref = nvc0_set_stencil_ref; + pipe->set_clip_state = nvc0_set_clip_state; + pipe->set_sample_mask = nvc0_set_sample_mask; + pipe->set_constant_buffer = nvc0_set_constant_buffer; + pipe->set_framebuffer_state = nvc0_set_framebuffer_state; + pipe->set_polygon_stipple = nvc0_set_polygon_stipple; + pipe->set_scissor_states = nvc0_set_scissor_states; + pipe->set_viewport_states = nvc0_set_viewport_states; + + pipe->create_vertex_elements_state = nvc0_vertex_state_create; + pipe->delete_vertex_elements_state = nvc0_vertex_state_delete; + pipe->bind_vertex_elements_state = nvc0_vertex_state_bind; + + pipe->set_vertex_buffers = nvc0_set_vertex_buffers; + pipe->set_index_buffer = nvc0_set_index_buffer; + + pipe->create_stream_output_target = nvc0_so_target_create; + pipe->stream_output_target_destroy = nvc0_so_target_destroy; + pipe->set_stream_output_targets = nvc0_set_transform_feedback_targets; + + pipe->set_global_binding = nvc0_set_global_bindings; + pipe->set_compute_resources = nvc0_set_compute_resources; + pipe->set_shader_resources = nvc0_set_shader_resources; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c new file mode 100644 index 00000000000..0ba4bad154a --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -0,0 +1,577 @@ + +#include "util/u_math.h" + +#include "nvc0/nvc0_context.h" + +#if 0 +static void +nvc0_validate_zcull(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + struct nv50_surface *sf = nv50_surface(fb->zsbuf); + struct nv50_miptree *mt = nv50_miptree(sf->base.texture); + struct nouveau_bo *bo = mt->base.bo; + uint32_t size; + uint32_t offset = align(mt->total_size, 1 << 17); + unsigned width, height; + + assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2); + + size = mt->total_size * 2; + + height = align(fb->height, 32); + width = fb->width % 224; + if (width) + width = fb->width + (224 - width); + else + width = fb->width; + + BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(ZCULL_ADDRESS_HIGH), 2); + PUSH_DATAh(push, bo->offset + offset); + PUSH_DATA (push, bo->offset + offset); + offset += 1 << 17; + BEGIN_NVC0(push, NVC0_3D(ZCULL_LIMIT_HIGH), 2); + PUSH_DATAh(push, bo->offset + offset); + PUSH_DATA (push, bo->offset + offset); + BEGIN_NVC0(push, SUBC_3D(0x07e0), 2); + PUSH_DATA (push, size); + PUSH_DATA (push, size >> 16); + BEGIN_NVC0(push, SUBC_3D(0x15c8), 1); /* bits 0x3 */ + PUSH_DATA (push, 2); + BEGIN_NVC0(push, NVC0_3D(ZCULL_WIDTH), 4); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(ZCULL_WINDOW_OFFSET_X), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(ZCULL_INVALIDATE), 1); + PUSH_DATA (push, 0); +} +#endif + +static void +nvc0_validate_fb(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + unsigned i; + unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; + boolean serialize = FALSE; + + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + + BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1); + PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs); + BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2); + PUSH_DATA (push, fb->width << 16); + PUSH_DATA (push, fb->height << 16); + + for (i = 0; i < fb->nr_cbufs; ++i) { + struct nv50_surface *sf = nv50_surface(fb->cbufs[i]); + struct nv04_resource *res = nv04_resource(sf->base.texture); + struct nouveau_bo *bo = res->bo; + + BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9); + PUSH_DATAh(push, res->address + sf->offset); + PUSH_DATA (push, res->address + sf->offset); + if (likely(nouveau_bo_memtype(bo))) { + struct nv50_miptree *mt = nv50_miptree(sf->base.texture); + + assert(sf->base.texture->target != PIPE_BUFFER); + + PUSH_DATA(push, sf->width); + PUSH_DATA(push, sf->height); + PUSH_DATA(push, nvc0_format_table[sf->base.format].rt); + PUSH_DATA(push, (mt->layout_3d << 16) | + mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA(push, sf->base.u.tex.first_layer + sf->depth); + PUSH_DATA(push, mt->layer_stride >> 2); + PUSH_DATA(push, sf->base.u.tex.first_layer); + + ms_mode = mt->ms_mode; + } else { + if (res->base.target == PIPE_BUFFER) { + PUSH_DATA(push, 262144); + PUSH_DATA(push, 1); + } else { + PUSH_DATA(push, nv50_miptree(sf->base.texture)->level[0].pitch); + PUSH_DATA(push, sf->height); + } + PUSH_DATA(push, nvc0_format_table[sf->base.format].rt); + PUSH_DATA(push, 1 << 12); + PUSH_DATA(push, 1); + PUSH_DATA(push, 0); + PUSH_DATA(push, 0); + + nvc0_resource_fence(res, NOUVEAU_BO_WR); + + assert(!fb->zsbuf); + } + + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; + + /* only register for writing, otherwise we'd always serialize here */ + BCTX_REFN(nvc0->bufctx_3d, FB, res, WR); + } + + if (fb->zsbuf) { + struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture); + struct nv50_surface *sf = nv50_surface(fb->zsbuf); + int unk = mt->base.base.target == PIPE_TEXTURE_2D; + + BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5); + PUSH_DATAh(push, mt->base.address + sf->offset); + PUSH_DATA (push, mt->base.address + sf->offset); + PUSH_DATA (push, nvc0_format_table[fb->zsbuf->format].rt); + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, mt->layer_stride >> 2); + BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3); + PUSH_DATA (push, sf->width); + PUSH_DATA (push, sf->height); + PUSH_DATA (push, (unk << 16) | + (sf->base.u.tex.first_layer + sf->depth)); + BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1); + PUSH_DATA (push, sf->base.u.tex.first_layer); + + ms_mode = mt->ms_mode; + + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; + + BCTX_REFN(nvc0->bufctx_3d, FB, &mt->base, WR); + } else { + BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 0); + } + + IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode); + + if (serialize) + IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); + + NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, serialize); +} + +static void +nvc0_validate_blend_colour(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + BEGIN_NVC0(push, NVC0_3D(BLEND_COLOR(0)), 4); + PUSH_DATAf(push, nvc0->blend_colour.color[0]); + PUSH_DATAf(push, nvc0->blend_colour.color[1]); + PUSH_DATAf(push, nvc0->blend_colour.color[2]); + PUSH_DATAf(push, nvc0->blend_colour.color[3]); +} + +static void +nvc0_validate_stencil_ref(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const ubyte *ref = &nvc0->stencil_ref.ref_value[0]; + + IMMED_NVC0(push, NVC0_3D(STENCIL_FRONT_FUNC_REF), ref[0]); + IMMED_NVC0(push, NVC0_3D(STENCIL_BACK_FUNC_REF), ref[1]); +} + +static void +nvc0_validate_stipple(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned i; + + BEGIN_NVC0(push, NVC0_3D(POLYGON_STIPPLE_PATTERN(0)), 32); + for (i = 0; i < 32; ++i) + PUSH_DATA(push, util_bswap32(nvc0->stipple.stipple[i])); +} + +static void +nvc0_validate_scissor(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_scissor_state *s = &nvc0->scissor; + + if (!(nvc0->dirty & NVC0_NEW_SCISSOR) && + nvc0->rast->pipe.scissor == nvc0->state.scissor) + return; + nvc0->state.scissor = nvc0->rast->pipe.scissor; + + BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2); + if (nvc0->rast->pipe.scissor) { + PUSH_DATA(push, (s->maxx << 16) | s->minx); + PUSH_DATA(push, (s->maxy << 16) | s->miny); + } else { + PUSH_DATA(push, (0xffff << 16) | 0); + PUSH_DATA(push, (0xffff << 16) | 0); + } +} + +static void +nvc0_validate_viewport(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_viewport_state *vp = &nvc0->viewport; + int x, y, w, h; + float zmin, zmax; + + BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSLATE_X(0)), 3); + PUSH_DATAf(push, vp->translate[0]); + PUSH_DATAf(push, vp->translate[1]); + PUSH_DATAf(push, vp->translate[2]); + BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SCALE_X(0)), 3); + PUSH_DATAf(push, vp->scale[0]); + PUSH_DATAf(push, vp->scale[1]); + PUSH_DATAf(push, vp->scale[2]); + + /* now set the viewport rectangle to viewport dimensions for clipping */ + + x = util_iround(MAX2(0.0f, vp->translate[0] - fabsf(vp->scale[0]))); + y = util_iround(MAX2(0.0f, vp->translate[1] - fabsf(vp->scale[1]))); + w = util_iround(vp->translate[0] + fabsf(vp->scale[0])) - x; + h = util_iround(vp->translate[1] + fabsf(vp->scale[1])) - y; + + zmin = vp->translate[2] - fabsf(vp->scale[2]); + zmax = vp->translate[2] + fabsf(vp->scale[2]); + + nvc0->vport_int[0] = (w << 16) | x; + nvc0->vport_int[1] = (h << 16) | y; + BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2); + PUSH_DATA (push, nvc0->vport_int[0]); + PUSH_DATA (push, nvc0->vport_int[1]); + BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(0)), 2); + PUSH_DATAf(push, zmin); + PUSH_DATAf(push, zmax); +} + +static INLINE void +nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nouveau_bo *bo = nvc0->screen->uniform_bo; + + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 512); + PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9)); + PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1); + PUSH_DATA (push, 256); + PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4); +} + +static INLINE void +nvc0_check_program_ucps(struct nvc0_context *nvc0, + struct nvc0_program *vp, uint8_t mask) +{ + const unsigned n = util_logbase2(mask) + 1; + + if (vp->vp.num_ucps >= n) + return; + nvc0_program_destroy(nvc0, vp); + + vp->vp.num_ucps = n; + if (likely(vp == nvc0->vertprog)) + nvc0_vertprog_validate(nvc0); + else + if (likely(vp == nvc0->gmtyprog)) + nvc0_vertprog_validate(nvc0); + else + nvc0_tevlprog_validate(nvc0); +} + +static void +nvc0_validate_clip(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *vp; + unsigned stage; + uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable; + + if (nvc0->gmtyprog) { + stage = 3; + vp = nvc0->gmtyprog; + } else + if (nvc0->tevlprog) { + stage = 2; + vp = nvc0->tevlprog; + } else { + stage = 0; + vp = nvc0->vertprog; + } + + if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES) + nvc0_check_program_ucps(nvc0, vp, clip_enable); + + if (nvc0->dirty & (NVC0_NEW_CLIP | (NVC0_NEW_VERTPROG << stage))) + if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES) + nvc0_upload_uclip_planes(nvc0, stage); + + clip_enable &= vp->vp.clip_enable; + + if (nvc0->state.clip_enable != clip_enable) { + nvc0->state.clip_enable = clip_enable; + IMMED_NVC0(push, NVC0_3D(CLIP_DISTANCE_ENABLE), clip_enable); + } + if (nvc0->state.clip_mode != vp->vp.clip_mode) { + nvc0->state.clip_mode = vp->vp.clip_mode; + BEGIN_NVC0(push, NVC0_3D(CLIP_DISTANCE_MODE), 1); + PUSH_DATA (push, vp->vp.clip_mode); + } +} + +static void +nvc0_validate_blend(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + PUSH_SPACE(push, nvc0->blend->size); + PUSH_DATAp(push, nvc0->blend->state, nvc0->blend->size); +} + +static void +nvc0_validate_zsa(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + PUSH_SPACE(push, nvc0->zsa->size); + PUSH_DATAp(push, nvc0->zsa->state, nvc0->zsa->size); +} + +static void +nvc0_validate_rasterizer(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + PUSH_SPACE(push, nvc0->rast->size); + PUSH_DATAp(push, nvc0->rast->state, nvc0->rast->size); +} + +static void +nvc0_constbufs_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned s; + + for (s = 0; s < 5; ++s) { + while (nvc0->constbuf_dirty[s]) { + int i = ffs(nvc0->constbuf_dirty[s]) - 1; + nvc0->constbuf_dirty[s] &= ~(1 << i); + + if (nvc0->constbuf[s][i].user) { + struct nouveau_bo *bo = nvc0->screen->uniform_bo; + const unsigned base = s << 16; + const unsigned size = nvc0->constbuf[s][0].size; + assert(i == 0); /* we really only want OpenGL uniforms here */ + assert(nvc0->constbuf[s][0].u.data); + + if (nvc0->state.uniform_buffer_bound[s] < size) { + nvc0->state.uniform_buffer_bound[s] = align(size, 0x100); + + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]); + PUSH_DATAh(push, bo->offset + base); + PUSH_DATA (push, bo->offset + base); + BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); + PUSH_DATA (push, (0 << 4) | 1); + } + nvc0_cb_push(&nvc0->base, bo, NOUVEAU_BO_VRAM, + base, nvc0->state.uniform_buffer_bound[s], + 0, (size + 3) / 4, + nvc0->constbuf[s][0].u.data); + } else { + struct nv04_resource *res = + nv04_resource(nvc0->constbuf[s][i].u.buf); + if (res) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, nvc0->constbuf[s][i].size); + PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); + PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); + BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); + PUSH_DATA (push, (i << 4) | 1); + + BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD); + } else { + BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); + PUSH_DATA (push, (i << 4) | 0); + } + if (i == 0) + nvc0->state.uniform_buffer_bound[s] = 0; + } + } + } +} + +static void +nvc0_validate_sample_mask(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + unsigned mask[4] = + { + nvc0->sample_mask & 0xffff, + nvc0->sample_mask & 0xffff, + nvc0->sample_mask & 0xffff, + nvc0->sample_mask & 0xffff + }; + + BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4); + PUSH_DATA (push, mask[0]); + PUSH_DATA (push, mask[1]); + PUSH_DATA (push, mask[2]); + PUSH_DATA (push, mask[3]); + BEGIN_NVC0(push, NVC0_3D(SAMPLE_SHADING), 1); + PUSH_DATA (push, 0x01); +} + +void +nvc0_validate_global_residents(struct nvc0_context *nvc0, + struct nouveau_bufctx *bctx, int bin) +{ + unsigned i; + + for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *); + ++i) { + struct pipe_resource *res = *util_dynarray_element( + &nvc0->global_residents, struct pipe_resource *, i); + if (res) + nvc0_add_resident(bctx, bin, nv04_resource(res), NOUVEAU_BO_RDWR); + } +} + +static void +nvc0_validate_derived_1(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + boolean rasterizer_discard; + + if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) { + rasterizer_discard = TRUE; + } else { + boolean zs = nvc0->zsa && + (nvc0->zsa->pipe.depth.enabled || nvc0->zsa->pipe.stencil[0].enabled); + rasterizer_discard = !zs && + (!nvc0->fragprog || !nvc0->fragprog->hdr[18]); + } + + if (rasterizer_discard != nvc0->state.rasterizer_discard) { + nvc0->state.rasterizer_discard = rasterizer_discard; + IMMED_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), !rasterizer_discard); + } +} + +static void +nvc0_switch_pipe_context(struct nvc0_context *ctx_to) +{ + struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx; + unsigned s; + + if (ctx_from) + ctx_to->state = ctx_from->state; + + ctx_to->dirty = ~0; + + for (s = 0; s < 5; ++s) { + ctx_to->samplers_dirty[s] = ~0; + ctx_to->textures_dirty[s] = ~0; + } + + if (!ctx_to->vertex) + ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS); + if (!ctx_to->idxbuf.buffer) + ctx_to->dirty &= ~NVC0_NEW_IDXBUF; + + if (!ctx_to->vertprog) + ctx_to->dirty &= ~NVC0_NEW_VERTPROG; + if (!ctx_to->fragprog) + ctx_to->dirty &= ~NVC0_NEW_FRAGPROG; + + if (!ctx_to->blend) + ctx_to->dirty &= ~NVC0_NEW_BLEND; + if (!ctx_to->rast) + ctx_to->dirty &= ~(NVC0_NEW_RASTERIZER | NVC0_NEW_SCISSOR); + if (!ctx_to->zsa) + ctx_to->dirty &= ~NVC0_NEW_ZSA; + + ctx_to->screen->cur_ctx = ctx_to; +} + +static struct state_validate { + void (*func)(struct nvc0_context *); + uint32_t states; +} validate_list[] = { + { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER }, + { nvc0_validate_blend, NVC0_NEW_BLEND }, + { nvc0_validate_zsa, NVC0_NEW_ZSA }, + { nvc0_validate_sample_mask, NVC0_NEW_SAMPLE_MASK }, + { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER }, + { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR }, + { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF }, + { nvc0_validate_stipple, NVC0_NEW_STIPPLE }, + { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER }, + { nvc0_validate_viewport, NVC0_NEW_VIEWPORT }, + { nvc0_vertprog_validate, NVC0_NEW_VERTPROG }, + { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG }, + { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, + { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, + { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, + { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA | + NVC0_NEW_RASTERIZER }, + { nvc0_validate_clip, NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER | + NVC0_NEW_VERTPROG | + NVC0_NEW_TEVLPROG | + NVC0_NEW_GMTYPROG }, + { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, + { nvc0_validate_textures, NVC0_NEW_TEXTURES }, + { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, + { nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS }, + { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, + { nvc0_validate_surfaces, NVC0_NEW_SURFACES }, + { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF }, + { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG } +}; +#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) + +boolean +nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask, unsigned words) +{ + uint32_t state_mask; + int ret; + unsigned i; + + if (nvc0->screen->cur_ctx != nvc0) + nvc0_switch_pipe_context(nvc0); + + state_mask = nvc0->dirty & mask; + + if (state_mask) { + for (i = 0; i < validate_list_len; ++i) { + struct state_validate *validate = &validate_list[i]; + + if (state_mask & validate->states) + validate->func(nvc0); + } + nvc0->dirty &= ~state_mask; + + nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, FALSE); + } + + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_3d); + ret = nouveau_pushbuf_validate(nvc0->base.pushbuf); + + if (unlikely(nvc0->state.flushed)) { + nvc0->state.flushed = FALSE; + nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, TRUE); + } + return !ret; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h new file mode 100644 index 00000000000..80c33424032 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h @@ -0,0 +1,77 @@ + +#ifndef __NVC0_STATEOBJ_H__ +#define __NVC0_STATEOBJ_H__ + +#include "pipe/p_state.h" + +#define SB_BEGIN_3D(so, m, s) \ + (so)->state[(so)->size++] = NVC0_FIFO_PKHDR_SQ(NVC0_3D(m), s) + +#define SB_IMMED_3D(so, m, d) \ + (so)->state[(so)->size++] = NVC0_FIFO_PKHDR_IL(NVC0_3D(m), d) + +#define SB_DATA(so, u) (so)->state[(so)->size++] = (u) + +#include "nv50/nv50_stateobj_tex.h" + +struct nvc0_blend_stateobj { + struct pipe_blend_state pipe; + int size; + uint32_t state[72]; +}; + +struct nvc0_rasterizer_stateobj { + struct pipe_rasterizer_state pipe; + int size; + uint32_t state[43]; +}; + +struct nvc0_zsa_stateobj { + struct pipe_depth_stencil_alpha_state pipe; + int size; + uint32_t state[26]; +}; + +struct nvc0_constbuf { + union { + struct pipe_resource *buf; + const void *data; + } u; + uint32_t size; + uint32_t offset; + boolean user; /* should only be TRUE if u.data is valid and non-NULL */ +}; + +struct nvc0_vertex_element { + struct pipe_vertex_element pipe; + uint32_t state; + uint32_t state_alt; /* buffer 0 and with source offset (for translate) */ +}; + +struct nvc0_vertex_stateobj { + uint32_t min_instance_div[PIPE_MAX_ATTRIBS]; + uint16_t vb_access_size[PIPE_MAX_ATTRIBS]; + struct translate *translate; + unsigned num_elements; + uint32_t instance_elts; + uint32_t instance_bufs; + boolean shared_slots; + boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */ + unsigned size; /* size of vertex in bytes (when packed) */ + struct nvc0_vertex_element element[0]; +}; + +struct nvc0_so_target { + struct pipe_stream_output_target pipe; + struct pipe_query *pq; + unsigned stride; + boolean clean; +}; + +static INLINE struct nvc0_so_target * +nvc0_so_target(struct pipe_stream_output_target *ptarg) +{ + return (struct nvc0_so_target *)ptarg; +} + +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c new file mode 100644 index 00000000000..5070df80671 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -0,0 +1,1265 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <stdint.h> + +#include "pipe/p_defines.h" + +#include "util/u_inlines.h" +#include "util/u_pack_color.h" +#include "util/u_format.h" +#include "util/u_surface.h" + +#include "os/os_thread.h" + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_resource.h" + +#include "nv50/nv50_defs.xml.h" +#include "nv50/nv50_texture.xml.h" + +/* these are used in nv50_blit.h */ +#define NV50_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL +#define NV50_ENG2D_NOCONVERT_FORMATS 0x009cc02000000000ULL +#define NV50_ENG2D_LUMINANCE_FORMATS 0x001cc02000000000ULL +#define NV50_ENG2D_INTENSITY_FORMATS 0x0080000000000000ULL +#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000638000ULL + +#define NOUVEAU_DRIVER 0xc0 +#include "nv50/nv50_blit.h" + +static INLINE uint8_t +nvc0_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal) +{ + uint8_t id = nvc0_format_table[format].rt; + + /* A8_UNORM is treated as I8_UNORM as far as the 2D engine is concerned. */ + if (!dst && unlikely(format == PIPE_FORMAT_I8_UNORM) && !dst_src_equal) + return NV50_SURFACE_FORMAT_A8_UNORM; + + /* Hardware values for color formats range from 0xc0 to 0xff, + * but the 2D engine doesn't support all of them. + */ + if (nv50_2d_format_supported(format)) + return id; + assert(dst_src_equal); + + switch (util_format_get_blocksize(format)) { + case 1: + return NV50_SURFACE_FORMAT_R8_UNORM; + case 2: + return NV50_SURFACE_FORMAT_R16_UNORM; + case 4: + return NV50_SURFACE_FORMAT_BGRA8_UNORM; + case 8: + return NV50_SURFACE_FORMAT_RGBA16_UNORM; + case 16: + return NV50_SURFACE_FORMAT_RGBA32_FLOAT; + default: + assert(0); + return 0; + } +} + +static int +nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst, + struct nv50_miptree *mt, unsigned level, unsigned layer, + enum pipe_format pformat, boolean dst_src_pformat_equal) +{ + struct nouveau_bo *bo = mt->base.bo; + uint32_t width, height, depth; + uint32_t format; + uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT; + uint32_t offset = mt->level[level].offset; + + format = nvc0_2d_format(pformat, dst, dst_src_pformat_equal); + if (!format) { + NOUVEAU_ERR("invalid/unsupported surface format: %s\n", + util_format_name(pformat)); + return 1; + } + + width = u_minify(mt->base.base.width0, level) << mt->ms_x; + height = u_minify(mt->base.base.height0, level) << mt->ms_y; + depth = u_minify(mt->base.base.depth0, level); + + /* layer has to be < depth, and depth > tile depth / 2 */ + + if (!mt->layout_3d) { + offset += mt->layer_stride * layer; + layer = 0; + depth = 1; + } else + if (!dst) { + offset += nvc0_mt_zslice_offset(mt, level, layer); + layer = 0; + } + + if (!nouveau_bo_memtype(bo)) { + BEGIN_NVC0(push, SUBC_2D(mthd), 2); + PUSH_DATA (push, format); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, SUBC_2D(mthd + 0x14), 5); + PUSH_DATA (push, mt->level[level].pitch); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + PUSH_DATAh(push, bo->offset + offset); + PUSH_DATA (push, bo->offset + offset); + } else { + BEGIN_NVC0(push, SUBC_2D(mthd), 5); + PUSH_DATA (push, format); + PUSH_DATA (push, 0); + PUSH_DATA (push, mt->level[level].tile_mode); + PUSH_DATA (push, depth); + PUSH_DATA (push, layer); + BEGIN_NVC0(push, SUBC_2D(mthd + 0x18), 4); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + PUSH_DATAh(push, bo->offset + offset); + PUSH_DATA (push, bo->offset + offset); + } + +#if 0 + if (dst) { + BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + } +#endif + return 0; +} + +static int +nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push, + struct nv50_miptree *dst, unsigned dst_level, + unsigned dx, unsigned dy, unsigned dz, + struct nv50_miptree *src, unsigned src_level, + unsigned sx, unsigned sy, unsigned sz, + unsigned w, unsigned h) +{ + const enum pipe_format dfmt = dst->base.base.format; + const enum pipe_format sfmt = src->base.base.format; + int ret; + boolean eqfmt = dfmt == sfmt; + + if (!PUSH_SPACE(push, 2 * 16 + 32)) + return PIPE_ERROR; + + ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt, eqfmt); + if (ret) + return ret; + + ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt, eqfmt); + if (ret) + return ret; + + IMMED_NVC0(push, NVC0_2D(BLIT_CONTROL), 0x00); + BEGIN_NVC0(push, NVC0_2D(BLIT_DST_X), 4); + PUSH_DATA (push, dx << dst->ms_x); + PUSH_DATA (push, dy << dst->ms_y); + PUSH_DATA (push, w << dst->ms_x); + PUSH_DATA (push, h << dst->ms_y); + BEGIN_NVC0(push, NVC0_2D(BLIT_DU_DX_FRACT), 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_X_FRACT), 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, sx << src->ms_x); + PUSH_DATA (push, 0); + PUSH_DATA (push, sy << src->ms_x); + + return 0; +} + +static void +nvc0_resource_copy_region(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + int ret; + boolean m2mf; + unsigned dst_layer = dstz, src_layer = src_box->z; + + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + nouveau_copy_buffer(&nvc0->base, + nv04_resource(dst), dstx, + nv04_resource(src), src_box->x, src_box->width); + NOUVEAU_DRV_STAT(&nvc0->screen->base, buf_copy_bytes, src_box->width); + return; + } + NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_copy_count, 1); + + /* 0 and 1 are equal, only supporting 0/1, 2, 4 and 8 */ + assert((src->nr_samples | 1) == (dst->nr_samples | 1)); + + m2mf = (src->format == dst->format) || + (util_format_get_blocksizebits(src->format) == + util_format_get_blocksizebits(dst->format)); + + nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + + if (m2mf) { + struct nv50_m2mf_rect drect, srect; + unsigned i; + unsigned nx = util_format_get_nblocksx(src->format, src_box->width); + unsigned ny = util_format_get_nblocksy(src->format, src_box->height); + + nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz); + nv50_m2mf_rect_setup(&srect, src, src_level, + src_box->x, src_box->y, src_box->z); + + for (i = 0; i < src_box->depth; ++i) { + nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny); + + if (nv50_miptree(dst)->layout_3d) + drect.z++; + else + drect.base += nv50_miptree(dst)->layer_stride; + + if (nv50_miptree(src)->layout_3d) + srect.z++; + else + srect.base += nv50_miptree(src)->layer_stride; + } + return; + } + + assert(nv50_2d_dst_format_faithful(dst->format)); + assert(nv50_2d_src_format_faithful(src->format)); + + BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(src), RD); + BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(dst), WR); + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx); + nouveau_pushbuf_validate(nvc0->base.pushbuf); + + for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) { + ret = nvc0_2d_texture_do_copy(nvc0->base.pushbuf, + nv50_miptree(dst), dst_level, + dstx, dsty, dst_layer, + nv50_miptree(src), src_level, + src_box->x, src_box->y, src_layer, + src_box->width, src_box->height); + if (ret) + break; + } + nouveau_bufctx_reset(nvc0->bufctx, 0); +} + +static void +nvc0_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv50_surface *sf = nv50_surface(dst); + struct nv04_resource *res = nv04_resource(sf->base.texture); + unsigned z; + + if (!PUSH_SPACE(push, 32 + sf->depth)) + return; + + PUSH_REFN (push, res->bo, res->domain | NOUVEAU_BO_WR); + + BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4); + PUSH_DATAf(push, color->f[0]); + PUSH_DATAf(push, color->f[1]); + PUSH_DATAf(push, color->f[2]); + PUSH_DATAf(push, color->f[3]); + + BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2); + PUSH_DATA (push, ( width << 16) | dstx); + PUSH_DATA (push, (height << 16) | dsty); + + BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9); + PUSH_DATAh(push, res->address + sf->offset); + PUSH_DATA (push, res->address + sf->offset); + if (likely(nouveau_bo_memtype(res->bo))) { + struct nv50_miptree *mt = nv50_miptree(dst->texture); + + PUSH_DATA(push, sf->width); + PUSH_DATA(push, sf->height); + PUSH_DATA(push, nvc0_format_table[dst->format].rt); + PUSH_DATA(push, (mt->layout_3d << 16) | + mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA(push, dst->u.tex.first_layer + sf->depth); + PUSH_DATA(push, mt->layer_stride >> 2); + PUSH_DATA(push, dst->u.tex.first_layer); + } else { + if (res->base.target == PIPE_BUFFER) { + PUSH_DATA(push, 262144); + PUSH_DATA(push, 1); + } else { + PUSH_DATA(push, nv50_miptree(&res->base)->level[0].pitch); + PUSH_DATA(push, sf->height); + } + PUSH_DATA(push, nvc0_format_table[sf->base.format].rt); + PUSH_DATA(push, 1 << 12); + PUSH_DATA(push, 1); + PUSH_DATA(push, 0); + PUSH_DATA(push, 0); + + IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0); + + /* tiled textures don't have to be fenced, they're not mapped directly */ + nvc0_resource_fence(res, NOUVEAU_BO_WR); + } + + BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); + for (z = 0; z < sf->depth; ++z) { + PUSH_DATA (push, 0x3c | + (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); + } + + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +static void +nvc0_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nv50_surface *sf = nv50_surface(dst); + uint32_t mode = 0; + int unk = mt->base.base.target == PIPE_TEXTURE_2D; + unsigned z; + + if (!PUSH_SPACE(push, 32 + sf->depth)) + return; + + PUSH_REFN (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR); + + if (clear_flags & PIPE_CLEAR_DEPTH) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1); + PUSH_DATAf(push, depth); + mode |= NVC0_3D_CLEAR_BUFFERS_Z; + } + + if (clear_flags & PIPE_CLEAR_STENCIL) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1); + PUSH_DATA (push, stencil & 0xff); + mode |= NVC0_3D_CLEAR_BUFFERS_S; + } + + BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2); + PUSH_DATA (push, ( width << 16) | dstx); + PUSH_DATA (push, (height << 16) | dsty); + + BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5); + PUSH_DATAh(push, mt->base.address + sf->offset); + PUSH_DATA (push, mt->base.address + sf->offset); + PUSH_DATA (push, nvc0_format_table[dst->format].rt); + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, mt->layer_stride >> 2); + BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3); + PUSH_DATA (push, sf->width); + PUSH_DATA (push, sf->height); + PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth)); + BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1); + PUSH_DATA (push, dst->u.tex.first_layer); + + BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); + for (z = 0; z < sf->depth; ++z) { + PUSH_DATA (push, mode | + (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); + } + + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +void +nvc0_clear(struct pipe_context *pipe, unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + unsigned i; + uint32_t mode = 0; + + /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ + if (!nvc0_state_validate(nvc0, NVC0_NEW_FRAMEBUFFER, 9 + (fb->nr_cbufs * 2))) + return; + + if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4); + PUSH_DATAf(push, color->f[0]); + PUSH_DATAf(push, color->f[1]); + PUSH_DATAf(push, color->f[2]); + PUSH_DATAf(push, color->f[3]); + mode = + NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G | + NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A; + } + + if (buffers & PIPE_CLEAR_DEPTH) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1); + PUSH_DATA (push, fui(depth)); + mode |= NVC0_3D_CLEAR_BUFFERS_Z; + } + + if (buffers & PIPE_CLEAR_STENCIL) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1); + PUSH_DATA (push, stencil & 0xff); + mode |= NVC0_3D_CLEAR_BUFFERS_S; + } + + BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1); + PUSH_DATA (push, mode); + + for (i = 1; i < fb->nr_cbufs; i++) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1); + PUSH_DATA (push, (i << 6) | 0x3c); + } +} + + +/* =============================== BLIT CODE =================================== + */ + +struct nvc0_blitter +{ + struct nvc0_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES]; + struct nvc0_program vp; + + struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */ + + pipe_mutex mutex; + + struct nvc0_screen *screen; +}; + +struct nvc0_blitctx +{ + struct nvc0_context *nvc0; + struct nvc0_program *fp; + uint8_t mode; + uint16_t color_mask; + uint8_t filter; + enum pipe_texture_target target; + struct { + struct pipe_framebuffer_state fb; + struct nvc0_rasterizer_stateobj *rast; + struct nvc0_program *vp; + struct nvc0_program *tcp; + struct nvc0_program *tep; + struct nvc0_program *gp; + struct nvc0_program *fp; + unsigned num_textures[5]; + unsigned num_samplers[5]; + struct pipe_sampler_view *texture[2]; + struct nv50_tsc_entry *sampler[2]; + uint32_t dirty; + } saved; + struct nvc0_rasterizer_stateobj rast; +}; + +static void +nvc0_blitter_make_vp(struct nvc0_blitter *blit) +{ + static const uint32_t code_nvc0[] = + { + 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */ + 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */ + 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */ + 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */ + 0x00001de7, 0x80000000, /* exit */ + }; + static const uint32_t code_nve4[] = + { + 0x00000007, 0x20000000, /* sched */ + 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */ + 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */ + 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */ + 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */ + 0x00001de7, 0x80000000, /* exit */ + }; + + blit->vp.type = PIPE_SHADER_VERTEX; + blit->vp.translated = TRUE; + if (blit->screen->base.class_3d >= NVE4_3D_CLASS) { + blit->vp.code = (uint32_t *)code_nve4; /* const_cast */ + blit->vp.code_size = sizeof(code_nve4); + } else { + blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */ + blit->vp.code_size = sizeof(code_nvc0); + } + blit->vp.num_gprs = 6; + blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS; + + blit->vp.hdr[0] = 0x00020461; /* vertprog magic */ + blit->vp.hdr[4] = 0x000ff000; /* no outputs read */ + blit->vp.hdr[6] = 0x00000073; /* a[0x80].xy, a[0x90].xyz */ + blit->vp.hdr[13] = 0x00073000; /* o[0x70].xy, o[0x80].xyz */ +} + +static void +nvc0_blitter_make_sampler(struct nvc0_blitter *blit) +{ + /* clamp to edge, min/max lod = 0, nearest filtering */ + + blit->sampler[0].id = -1; + + blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED | + (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) | + (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) | + (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT); + blit->sampler[0].tsc[1] = + NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE; + + /* clamp to edge, min/max lod = 0, bilinear filtering */ + + blit->sampler[1].id = -1; + + blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0]; + blit->sampler[1].tsc[1] = + NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE; +} + +static void +nvc0_blit_select_fp(struct nvc0_blitctx *ctx, const struct pipe_blit_info *info) +{ + struct nvc0_blitter *blitter = ctx->nvc0->screen->blitter; + + const enum pipe_texture_target ptarg = + nv50_blit_reinterpret_pipe_texture_target(info->src.resource->target); + + const unsigned targ = nv50_blit_texture_type(ptarg); + const unsigned mode = ctx->mode; + + if (!blitter->fp[targ][mode]) { + pipe_mutex_lock(blitter->mutex); + if (!blitter->fp[targ][mode]) + blitter->fp[targ][mode] = + nv50_blitter_make_fp(&ctx->nvc0->base.pipe, mode, ptarg); + pipe_mutex_unlock(blitter->mutex); + } + ctx->fp = blitter->fp[targ][mode]; +} + +static void +nvc0_blit_set_dst(struct nvc0_blitctx *ctx, + struct pipe_resource *res, unsigned level, unsigned layer, + enum pipe_format format) +{ + struct nvc0_context *nvc0 = ctx->nvc0; + struct pipe_context *pipe = &nvc0->base.pipe; + struct pipe_surface templ; + + if (util_format_is_depth_or_stencil(format)) + templ.format = nv50_blit_zeta_to_colour_format(format); + else + templ.format = format; + + templ.u.tex.level = level; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + + if (layer == -1) { + templ.u.tex.first_layer = 0; + templ.u.tex.last_layer = + (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1; + } + + nvc0->framebuffer.cbufs[0] = nvc0_miptree_surface_new(pipe, res, &templ); + nvc0->framebuffer.nr_cbufs = 1; + nvc0->framebuffer.zsbuf = NULL; + nvc0->framebuffer.width = nvc0->framebuffer.cbufs[0]->width; + nvc0->framebuffer.height = nvc0->framebuffer.cbufs[0]->height; +} + +static void +nvc0_blit_set_src(struct nvc0_blitctx *ctx, + struct pipe_resource *res, unsigned level, unsigned layer, + enum pipe_format format, const uint8_t filter) +{ + struct nvc0_context *nvc0 = ctx->nvc0; + struct pipe_context *pipe = &nvc0->base.pipe; + struct pipe_sampler_view templ; + uint32_t flags; + unsigned s; + enum pipe_texture_target target; + + target = nv50_blit_reinterpret_pipe_texture_target(res->target); + + templ.format = format; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + templ.u.tex.first_level = templ.u.tex.last_level = level; + templ.swizzle_r = PIPE_SWIZZLE_RED; + templ.swizzle_g = PIPE_SWIZZLE_GREEN; + templ.swizzle_b = PIPE_SWIZZLE_BLUE; + templ.swizzle_a = PIPE_SWIZZLE_ALPHA; + + if (layer == -1) { + templ.u.tex.first_layer = 0; + templ.u.tex.last_layer = + (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1; + } + + flags = res->last_level ? 0 : NV50_TEXVIEW_SCALED_COORDS; + flags |= NV50_TEXVIEW_ACCESS_RESOLVE; + if (filter && res->nr_samples == 8) + flags |= NV50_TEXVIEW_FILTER_MSAA8; + + nvc0->textures[4][0] = nvc0_create_texture_view( + pipe, res, &templ, flags, target); + nvc0->textures[4][1] = NULL; + + for (s = 0; s <= 3; ++s) + nvc0->num_textures[s] = 0; + nvc0->num_textures[4] = 1; + + templ.format = nv50_zs_to_s_format(format); + if (templ.format != format) { + nvc0->textures[4][1] = nvc0_create_texture_view( + pipe, res, &templ, flags, target); + nvc0->num_textures[4] = 2; + } +} + +static void +nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit) +{ + struct nouveau_pushbuf *push = blit->nvc0->base.pushbuf; + + /* TODO: maybe make this a MACRO (if we need more logic) ? */ + + if (blit->nvc0->cond_query) + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + + /* blend state */ + BEGIN_NVC0(push, NVC0_3D(COLOR_MASK(0)), 1); + PUSH_DATA (push, blit->color_mask); + IMMED_NVC0(push, NVC0_3D(BLEND_ENABLE(0)), 0); + IMMED_NVC0(push, NVC0_3D(LOGIC_OP_ENABLE), 0); + + /* rasterizer state */ + IMMED_NVC0(push, NVC0_3D(FRAG_COLOR_CLAMP_EN), 0); + IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_ENABLE), 0); + BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4); + PUSH_DATA (push, 0xffff); + PUSH_DATA (push, 0xffff); + PUSH_DATA (push, 0xffff); + PUSH_DATA (push, 0xffff); + BEGIN_NVC0(push, NVC0_3D(MACRO_POLYGON_MODE_FRONT), 1); + PUSH_DATA (push, NVC0_3D_MACRO_POLYGON_MODE_FRONT_FILL); + BEGIN_NVC0(push, NVC0_3D(MACRO_POLYGON_MODE_BACK), 1); + PUSH_DATA (push, NVC0_3D_MACRO_POLYGON_MODE_BACK_FILL); + IMMED_NVC0(push, NVC0_3D(POLYGON_SMOOTH_ENABLE), 0); + IMMED_NVC0(push, NVC0_3D(POLYGON_OFFSET_FILL_ENABLE), 0); + IMMED_NVC0(push, NVC0_3D(POLYGON_STIPPLE_ENABLE), 0); + IMMED_NVC0(push, NVC0_3D(CULL_FACE_ENABLE), 0); + + /* zsa state */ + IMMED_NVC0(push, NVC0_3D(DEPTH_TEST_ENABLE), 0); + IMMED_NVC0(push, NVC0_3D(STENCIL_ENABLE), 0); + IMMED_NVC0(push, NVC0_3D(ALPHA_TEST_ENABLE), 0); + + /* disable transform feedback */ + IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), 0); +} + +static void +nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx) +{ + struct nvc0_context *nvc0 = ctx->nvc0; + struct nvc0_blitter *blitter = nvc0->screen->blitter; + int s; + + ctx->saved.fb.width = nvc0->framebuffer.width; + ctx->saved.fb.height = nvc0->framebuffer.height; + ctx->saved.fb.nr_cbufs = nvc0->framebuffer.nr_cbufs; + ctx->saved.fb.cbufs[0] = nvc0->framebuffer.cbufs[0]; + ctx->saved.fb.zsbuf = nvc0->framebuffer.zsbuf; + + ctx->saved.rast = nvc0->rast; + + ctx->saved.vp = nvc0->vertprog; + ctx->saved.tcp = nvc0->tctlprog; + ctx->saved.tep = nvc0->tevlprog; + ctx->saved.gp = nvc0->gmtyprog; + ctx->saved.fp = nvc0->fragprog; + + nvc0->rast = &ctx->rast; + + nvc0->vertprog = &blitter->vp; + nvc0->tctlprog = NULL; + nvc0->tevlprog = NULL; + nvc0->gmtyprog = NULL; + nvc0->fragprog = ctx->fp; + + for (s = 0; s <= 4; ++s) { + ctx->saved.num_textures[s] = nvc0->num_textures[s]; + ctx->saved.num_samplers[s] = nvc0->num_samplers[s]; + nvc0->textures_dirty[s] = (1 << nvc0->num_textures[s]) - 1; + nvc0->samplers_dirty[s] = (1 << nvc0->num_samplers[s]) - 1; + } + ctx->saved.texture[0] = nvc0->textures[4][0]; + ctx->saved.texture[1] = nvc0->textures[4][1]; + ctx->saved.sampler[0] = nvc0->samplers[4][0]; + ctx->saved.sampler[1] = nvc0->samplers[4][1]; + + nvc0->samplers[4][0] = &blitter->sampler[ctx->filter]; + nvc0->samplers[4][1] = &blitter->sampler[ctx->filter]; + + for (s = 0; s <= 3; ++s) + nvc0->num_samplers[s] = 0; + nvc0->num_samplers[4] = 2; + + ctx->saved.dirty = nvc0->dirty; + + nvc0->textures_dirty[4] |= 3; + nvc0->samplers_dirty[4] |= 3; + + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1)); + + nvc0->dirty = NVC0_NEW_FRAMEBUFFER | + NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG | + NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG | + NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS; +} + +static void +nvc0_blitctx_post_blit(struct nvc0_blitctx *blit) +{ + struct nvc0_context *nvc0 = blit->nvc0; + int s; + + pipe_surface_reference(&nvc0->framebuffer.cbufs[0], NULL); + + nvc0->framebuffer.width = blit->saved.fb.width; + nvc0->framebuffer.height = blit->saved.fb.height; + nvc0->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs; + nvc0->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0]; + nvc0->framebuffer.zsbuf = blit->saved.fb.zsbuf; + + nvc0->rast = blit->saved.rast; + + nvc0->vertprog = blit->saved.vp; + nvc0->tctlprog = blit->saved.tcp; + nvc0->tevlprog = blit->saved.tep; + nvc0->gmtyprog = blit->saved.gp; + nvc0->fragprog = blit->saved.fp; + + pipe_sampler_view_reference(&nvc0->textures[4][0], NULL); + pipe_sampler_view_reference(&nvc0->textures[4][1], NULL); + + for (s = 0; s <= 4; ++s) { + nvc0->num_textures[s] = blit->saved.num_textures[s]; + nvc0->num_samplers[s] = blit->saved.num_samplers[s]; + nvc0->textures_dirty[s] = (1 << nvc0->num_textures[s]) - 1; + nvc0->samplers_dirty[s] = (1 << nvc0->num_samplers[s]) - 1; + } + nvc0->textures[4][0] = blit->saved.texture[0]; + nvc0->textures[4][1] = blit->saved.texture[1]; + nvc0->samplers[4][0] = blit->saved.sampler[0]; + nvc0->samplers[4][1] = blit->saved.sampler[1]; + + nvc0->textures_dirty[4] |= 3; + nvc0->samplers_dirty[4] |= 3; + + if (nvc0->cond_query) + nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query, + nvc0->cond_cond, nvc0->cond_mode); + + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1)); + + nvc0->dirty = blit->saved.dirty | + (NVC0_NEW_FRAMEBUFFER | NVC0_NEW_SCISSOR | NVC0_NEW_SAMPLE_MASK | + NVC0_NEW_RASTERIZER | NVC0_NEW_ZSA | NVC0_NEW_BLEND | + NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS | + NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG | + NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG | + NVC0_NEW_TFB_TARGETS); +} + +static void +nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) +{ + struct nvc0_blitctx *blit = nvc0->blit; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_resource *src = info->src.resource; + struct pipe_resource *dst = info->dst.resource; + int32_t minx, maxx, miny, maxy; + int32_t i; + float x0, x1, y0, y1, z; + float dz; + float x_range, y_range; + + blit->mode = nv50_blit_select_mode(info); + blit->color_mask = nv50_blit_derive_color_mask(info); + blit->filter = nv50_blit_get_filter(info); + + nvc0_blit_select_fp(blit, info); + nvc0_blitctx_pre_blit(blit); + + nvc0_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format); + nvc0_blit_set_src(blit, src, info->src.level, -1, info->src.format, + blit->filter); + + nvc0_blitctx_prepare_state(blit); + + nvc0_state_validate(nvc0, ~0, 48); + + x_range = (float)info->src.box.width / (float)info->dst.box.width; + y_range = (float)info->src.box.height / (float)info->dst.box.height; + + x0 = (float)info->src.box.x - x_range * (float)info->dst.box.x; + y0 = (float)info->src.box.y - y_range * (float)info->dst.box.y; + + x1 = x0 + 16384.0f * x_range; + y1 = y0 + 16384.0f * y_range; + + x0 *= (float)(1 << nv50_miptree(src)->ms_x); + x1 *= (float)(1 << nv50_miptree(src)->ms_x); + y0 *= (float)(1 << nv50_miptree(src)->ms_y); + y1 *= (float)(1 << nv50_miptree(src)->ms_y); + + if (src->last_level > 0) { + /* If there are mip maps, GPU always assumes normalized coordinates. */ + const unsigned l = info->src.level; + const float fh = u_minify(src->width0 << nv50_miptree(src)->ms_x, l); + const float fv = u_minify(src->height0 << nv50_miptree(src)->ms_y, l); + x0 /= fh; + x1 /= fh; + y0 /= fv; + y1 /= fv; + } + + dz = (float)info->src.box.depth / (float)info->dst.box.depth; + z = (float)info->src.box.z; + if (nv50_miptree(src)->layout_3d) + z += 0.5f * dz; + + IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0); + IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1); + BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2); + PUSH_DATA (push, nvc0->framebuffer.width << 16); + PUSH_DATA (push, nvc0->framebuffer.height << 16); + + /* Draw a large triangle in screen coordinates covering the whole + * render target, with scissors defining the destination region. + * The vertex is supplied with non-normalized texture coordinates + * arranged in a way to yield the desired offset and scale. + */ + + minx = info->dst.box.x; + maxx = info->dst.box.x + info->dst.box.width; + miny = info->dst.box.y; + maxy = info->dst.box.y + info->dst.box.height; + if (info->scissor_enable) { + minx = MAX2(minx, info->scissor.minx); + maxx = MIN2(maxx, info->scissor.maxx); + miny = MAX2(miny, info->scissor.miny); + maxy = MIN2(maxy, info->scissor.maxy); + } + BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2); + PUSH_DATA (push, (maxx << 16) | minx); + PUSH_DATA (push, (maxy << 16) | miny); + + for (i = 0; i < info->dst.box.depth; ++i, z += dz) { + if (info->dst.box.z + i) { + BEGIN_NVC0(push, NVC0_3D(LAYER), 1); + PUSH_DATA (push, info->dst.box.z + i); + } + + IMMED_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), + NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); + + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 4); + PUSH_DATA (push, 0x74301); + PUSH_DATAf(push, x0); + PUSH_DATAf(push, y0); + PUSH_DATAf(push, z); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 3); + PUSH_DATA (push, 0x74200); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 4); + PUSH_DATA (push, 0x74301); + PUSH_DATAf(push, x1); + PUSH_DATAf(push, y0); + PUSH_DATAf(push, z); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 3); + PUSH_DATA (push, 0x74200); + PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_x); + PUSH_DATAf(push, 0.0f); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 4); + PUSH_DATA (push, 0x74301); + PUSH_DATAf(push, x0); + PUSH_DATAf(push, y1); + PUSH_DATAf(push, z); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 3); + PUSH_DATA (push, 0x74200); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_y); + + IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0); + } + if (info->dst.box.z + info->dst.box.depth - 1) + IMMED_NVC0(push, NVC0_3D(LAYER), 0); + + nvc0_blitctx_post_blit(blit); + + /* restore viewport */ + + BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2); + PUSH_DATA (push, nvc0->vport_int[0]); + PUSH_DATA (push, nvc0->vport_int[1]); + IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1); +} + +static void +nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv50_miptree *dst = nv50_miptree(info->dst.resource); + struct nv50_miptree *src = nv50_miptree(info->src.resource); + const int32_t srcx_adj = info->src.box.width < 0 ? -1 : 0; + const int32_t srcy_adj = info->src.box.height < 0 ? -1 : 0; + const int dz = info->dst.box.z; + const int sz = info->src.box.z; + uint32_t dstw, dsth; + int32_t dstx, dsty; + int64_t srcx, srcy; + int64_t du_dx, dv_dy; + int i; + uint32_t mode; + uint32_t mask = nv50_blit_eng2d_get_mask(info); + boolean b; + + mode = nv50_blit_get_filter(info) ? + NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR : + NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE; + mode |= (src->base.base.nr_samples > dst->base.base.nr_samples) ? + NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER : NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER; + + du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width; + dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height; + + b = info->dst.format == info->src.format; + nvc0_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b); + nvc0_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b); + + if (info->scissor_enable) { + BEGIN_NVC0(push, NVC0_2D(CLIP_X), 5); + PUSH_DATA (push, info->scissor.minx << dst->ms_x); + PUSH_DATA (push, info->scissor.miny << dst->ms_y); + PUSH_DATA (push, (info->scissor.maxx - info->scissor.minx) << dst->ms_x); + PUSH_DATA (push, (info->scissor.maxy - info->scissor.miny) << dst->ms_y); + PUSH_DATA (push, 1); /* enable */ + } + + if (mask != 0xffffffff) { + IMMED_NVC0(push, NVC0_2D(ROP), 0xca); /* DPSDxax */ + IMMED_NVC0(push, NVC0_2D(PATTERN_COLOR_FORMAT), + NVC0_2D_PATTERN_COLOR_FORMAT_32BPP); + BEGIN_NVC0(push, NVC0_2D(PATTERN_COLOR(0)), 4); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, mask); + PUSH_DATA (push, 0xffffffff); + PUSH_DATA (push, 0xffffffff); + IMMED_NVC0(push, NVC0_2D(OPERATION), NVC0_2D_OPERATION_ROP); + } else + if (info->src.format != info->dst.format) { + if (info->src.format == PIPE_FORMAT_R8_UNORM || + info->src.format == PIPE_FORMAT_R8_SNORM || + info->src.format == PIPE_FORMAT_R16_UNORM || + info->src.format == PIPE_FORMAT_R16_SNORM || + info->src.format == PIPE_FORMAT_R16_FLOAT || + info->src.format == PIPE_FORMAT_R32_FLOAT) { + mask = 0xffff0000; /* also makes condition for OPERATION reset true */ + BEGIN_NVC0(push, NVC0_2D(BETA4), 2); + PUSH_DATA (push, mask); + PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT); + } else + if (info->src.format == PIPE_FORMAT_A8_UNORM) { + mask = 0xff000000; + BEGIN_NVC0(push, NVC0_2D(BETA4), 2); + PUSH_DATA (push, mask); + PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT); + } + } + + if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) { + /* ms_x is always >= ms_y */ + du_dx <<= src->ms_x - dst->ms_x; + dv_dy <<= src->ms_y - dst->ms_y; + } else { + du_dx >>= dst->ms_x - src->ms_x; + dv_dy >>= dst->ms_y - src->ms_y; + } + + srcx = (int64_t)(info->src.box.x + srcx_adj) << (src->ms_x + 32); + srcy = (int64_t)(info->src.box.y + srcy_adj) << (src->ms_y + 32); + + if (src->base.base.nr_samples > dst->base.base.nr_samples) { + /* center src coorinates for proper MS resolve filtering */ + srcx += (int64_t)(src->ms_x + 0) << 32; + srcy += (int64_t)(src->ms_y + 1) << 31; + } + + dstx = info->dst.box.x << dst->ms_x; + dsty = info->dst.box.y << dst->ms_y; + + dstw = info->dst.box.width << dst->ms_x; + dsth = info->dst.box.height << dst->ms_y; + + if (dstx < 0) { + dstw += dstx; + srcx -= du_dx * dstx; + dstx = 0; + } + if (dsty < 0) { + dsth += dsty; + srcy -= dv_dy * dsty; + dsty = 0; + } + + IMMED_NVC0(push, NVC0_2D(BLIT_CONTROL), mode); + BEGIN_NVC0(push, NVC0_2D(BLIT_DST_X), 4); + PUSH_DATA (push, dstx); + PUSH_DATA (push, dsty); + PUSH_DATA (push, dstw); + PUSH_DATA (push, dsth); + BEGIN_NVC0(push, NVC0_2D(BLIT_DU_DX_FRACT), 4); + PUSH_DATA (push, du_dx); + PUSH_DATA (push, du_dx >> 32); + PUSH_DATA (push, dv_dy); + PUSH_DATA (push, dv_dy >> 32); + + BCTX_REFN(nvc0->bufctx, 2D, &dst->base, WR); + BCTX_REFN(nvc0->bufctx, 2D, &src->base, RD); + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx); + if (nouveau_pushbuf_validate(nvc0->base.pushbuf)) + return; + + for (i = 0; i < info->dst.box.depth; ++i) { + if (i > 0) { + /* no scaling in z-direction possible for eng2d blits */ + if (dst->layout_3d) { + BEGIN_NVC0(push, NVC0_2D(DST_LAYER), 1); + PUSH_DATA (push, info->dst.box.z + i); + } else { + const unsigned z = info->dst.box.z + i; + BEGIN_NVC0(push, NVC0_2D(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, dst->base.address + z * dst->layer_stride); + PUSH_DATA (push, dst->base.address + z * dst->layer_stride); + } + if (src->layout_3d) { + /* not possible because of depth tiling */ + assert(0); + } else { + const unsigned z = info->src.box.z + i; + BEGIN_NVC0(push, NVC0_2D(SRC_ADDRESS_HIGH), 2); + PUSH_DATAh(push, src->base.address + z * src->layer_stride); + PUSH_DATA (push, src->base.address + z * src->layer_stride); + } + BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_Y_INT), 1); /* trigger */ + PUSH_DATA (push, srcy >> 32); + } else { + BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_X_FRACT), 4); + PUSH_DATA (push, srcx); + PUSH_DATA (push, srcx >> 32); + PUSH_DATA (push, srcy); + PUSH_DATA (push, srcy >> 32); + } + } + nvc0_resource_validate(&dst->base, NOUVEAU_BO_WR); + nvc0_resource_validate(&src->base, NOUVEAU_BO_RD); + + nouveau_bufctx_reset(nvc0->bufctx, NVC0_BIND_2D); + + if (info->scissor_enable) + IMMED_NVC0(push, NVC0_2D(CLIP_ENABLE), 0); + if (mask != 0xffffffff) + IMMED_NVC0(push, NVC0_2D(OPERATION), NVC0_2D_OPERATION_SRCCOPY); +} + +static void +nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + boolean eng3d = FALSE; + + if (util_format_is_depth_or_stencil(info->dst.resource->format)) { + if (!(info->mask & PIPE_MASK_ZS)) + return; + if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT || + info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + eng3d = TRUE; + if (info->filter != PIPE_TEX_FILTER_NEAREST) + eng3d = TRUE; + } else { + if (!(info->mask & PIPE_MASK_RGBA)) + return; + if (info->mask != PIPE_MASK_RGBA) + eng3d = TRUE; + } + + if (nv50_miptree(info->src.resource)->layout_3d) { + eng3d = TRUE; + } else + if (info->src.box.depth != info->dst.box.depth) { + eng3d = TRUE; + debug_printf("blit: cannot filter array or cube textures in z direction"); + } + + if (!eng3d && info->dst.format != info->src.format) { + if (!nv50_2d_dst_format_faithful(info->dst.format)) { + eng3d = TRUE; + } else + if (!nv50_2d_src_format_faithful(info->src.format)) { + if (!util_format_is_luminance(info->src.format)) { + if (util_format_is_intensity(info->src.format)) + eng3d = info->src.format != PIPE_FORMAT_I8_UNORM; + else + if (!nv50_2d_dst_format_ops_supported(info->dst.format)) + eng3d = TRUE; + else + eng3d = !nv50_2d_format_supported(info->src.format); + } + } else + if (util_format_is_luminance_alpha(info->src.format)) + eng3d = TRUE; + } + + if (info->src.resource->nr_samples == 8 && + info->dst.resource->nr_samples <= 1) + eng3d = TRUE; +#if 0 + /* FIXME: can't make this work with eng2d anymore, at least not on nv50 */ + if (info->src.resource->nr_samples > 1 || + info->dst.resource->nr_samples > 1) + eng3d = TRUE; +#endif + /* FIXME: find correct src coordinates adjustments */ + if ((info->src.box.width != info->dst.box.width && + info->src.box.width != -info->dst.box.width) || + (info->src.box.height != info->dst.box.height && + info->src.box.height != -info->dst.box.height)) + eng3d = TRUE; + + if (!eng3d) + nvc0_blit_eng2d(nvc0, info); + else + nvc0_blit_3d(nvc0, info); + + NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_blit_count, 1); +} + +boolean +nvc0_blitter_create(struct nvc0_screen *screen) +{ + screen->blitter = CALLOC_STRUCT(nvc0_blitter); + if (!screen->blitter) { + NOUVEAU_ERR("failed to allocate blitter struct\n"); + return FALSE; + } + screen->blitter->screen = screen; + + pipe_mutex_init(screen->blitter->mutex); + + nvc0_blitter_make_vp(screen->blitter); + nvc0_blitter_make_sampler(screen->blitter); + + return TRUE; +} + +void +nvc0_blitter_destroy(struct nvc0_screen *screen) +{ + struct nvc0_blitter *blitter = screen->blitter; + unsigned i, m; + + for (i = 0; i < NV50_BLIT_MAX_TEXTURE_TYPES; ++i) { + for (m = 0; m < NV50_BLIT_MODES; ++m) { + struct nvc0_program *prog = blitter->fp[i][m]; + if (prog) { + nvc0_program_destroy(NULL, prog); + FREE((void *)prog->pipe.tokens); + FREE(prog); + } + } + } + + FREE(blitter); +} + +boolean +nvc0_blitctx_create(struct nvc0_context *nvc0) +{ + nvc0->blit = CALLOC_STRUCT(nvc0_blitctx); + if (!nvc0->blit) { + NOUVEAU_ERR("failed to allocate blit context\n"); + return FALSE; + } + + nvc0->blit->nvc0 = nvc0; + + nvc0->blit->rast.pipe.half_pixel_center = 1; + + return TRUE; +} + +void +nvc0_blitctx_destroy(struct nvc0_context *nvc0) +{ + if (nvc0->blit) + FREE(nvc0->blit); +} + +void +nvc0_init_surface_functions(struct nvc0_context *nvc0) +{ + struct pipe_context *pipe = &nvc0->base.pipe; + + pipe->resource_copy_region = nvc0_resource_copy_region; + pipe->blit = nvc0_blit; + pipe->clear_render_target = nvc0_clear_render_target; + pipe->clear_depth_stencil = nvc0_clear_depth_stencil; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c new file mode 100644 index 00000000000..765cd2d2bab --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -0,0 +1,814 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_resource.h" +#include "nv50/nv50_texture.xml.h" +#include "nv50/nv50_defs.xml.h" + +#include "util/u_format.h" + +#define NVE4_TIC_ENTRY_INVALID 0x000fffff +#define NVE4_TSC_ENTRY_INVALID 0xfff00000 + +#define NV50_TIC_0_SWIZZLE__MASK \ + (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \ + NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK) + +static INLINE uint32_t +nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int) +{ + switch (swz) { + case PIPE_SWIZZLE_RED: + return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT; + case PIPE_SWIZZLE_GREEN: + return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT; + case PIPE_SWIZZLE_BLUE: + return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT; + case PIPE_SWIZZLE_ALPHA: + return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT; + case PIPE_SWIZZLE_ONE: + return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT; + case PIPE_SWIZZLE_ZERO: + default: + return NV50_TIC_MAP_ZERO; + } +} + +struct pipe_sampler_view * +nvc0_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *res, + const struct pipe_sampler_view *templ) +{ + uint32_t flags = 0; + + if (res->target == PIPE_TEXTURE_RECT || res->target == PIPE_BUFFER) + flags |= NV50_TEXVIEW_SCALED_COORDS; + + return nvc0_create_texture_view(pipe, res, templ, flags, res->target); +} + +struct pipe_sampler_view * +nvc0_create_texture_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ, + uint32_t flags, + enum pipe_texture_target target) +{ + const struct util_format_description *desc; + uint64_t address; + uint32_t *tic; + uint32_t swz[4]; + uint32_t width, height; + uint32_t depth; + struct nv50_tic_entry *view; + struct nv50_miptree *mt; + boolean tex_int; + + view = MALLOC_STRUCT(nv50_tic_entry); + if (!view) + return NULL; + mt = nv50_miptree(texture); + + view->pipe = *templ; + view->pipe.reference.count = 1; + view->pipe.texture = NULL; + view->pipe.context = pipe; + + view->id = -1; + + pipe_resource_reference(&view->pipe.texture, texture); + + tic = &view->tic[0]; + + desc = util_format_description(view->pipe.format); + + tic[0] = nvc0_format_table[view->pipe.format].tic; + + tex_int = util_format_is_pure_integer(view->pipe.format); + + swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int); + swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int); + swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int); + swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int); + tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | + (swz[0] << NV50_TIC_0_MAPR__SHIFT) | + (swz[1] << NV50_TIC_0_MAPG__SHIFT) | + (swz[2] << NV50_TIC_0_MAPB__SHIFT) | + (swz[3] << NV50_TIC_0_MAPA__SHIFT); + + address = mt->base.address; + + tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER; + + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; + + if (!(flags & NV50_TEXVIEW_SCALED_COORDS)) + tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; + + /* check for linear storage type */ + if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) { + if (texture->target == PIPE_BUFFER) { + assert(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS)); + address += + view->pipe.u.buf.first_element * desc->block.bits / 8; + tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER; + tic[3] = 0; + tic[4] = /* width */ + view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1; + tic[5] = 0; + } else { + /* must be 2D texture without mip maps */ + tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT; + tic[3] = mt->level[0].pitch; + tic[4] = mt->base.base.width0; + tic[5] = (1 << 16) | mt->base.base.height0; + } + tic[6] = + tic[7] = 0; + tic[1] = address; + tic[2] |= address >> 32; + return &view->pipe; + } + + tic[2] |= + ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) | + ((mt->level[0].tile_mode & 0xf00) << (25 - 8)); + + depth = MAX2(mt->base.base.array_size, mt->base.base.depth0); + + if (mt->base.base.array_size > 1) { + /* there doesn't seem to be a base layer field in TIC */ + address += view->pipe.u.tex.first_layer * mt->layer_stride; + depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1; + } + tic[1] = address; + tic[2] |= address >> 32; + + switch (target) { + case PIPE_TEXTURE_1D: + tic[2] |= NV50_TIC_2_TARGET_1D; + break; + case PIPE_TEXTURE_2D: + tic[2] |= NV50_TIC_2_TARGET_2D; + break; + case PIPE_TEXTURE_RECT: + tic[2] |= NV50_TIC_2_TARGET_RECT; + break; + case PIPE_TEXTURE_3D: + tic[2] |= NV50_TIC_2_TARGET_3D; + break; + case PIPE_TEXTURE_CUBE: + depth /= 6; + tic[2] |= NV50_TIC_2_TARGET_CUBE; + break; + case PIPE_TEXTURE_1D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; + break; + case PIPE_TEXTURE_2D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; + break; + case PIPE_TEXTURE_CUBE_ARRAY: + depth /= 6; + tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; + break; + default: + NOUVEAU_ERR("unexpected/invalid texture target: %d\n", + mt->base.base.target); + return FALSE; + } + + tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000; + + if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) { + width = mt->base.base.width0 << mt->ms_x; + height = mt->base.base.height0 << mt->ms_y; + } else { + width = mt->base.base.width0; + height = mt->base.base.height0; + } + + tic[4] = (1 << 31) | width; + + tic[5] = height & 0xffff; + tic[5] |= depth << 16; + tic[5] |= mt->base.base.last_level << 28; + + /* sampling points: (?) */ + if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) + tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; + else + tic[6] = 0x03000000; + + tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; + tic[7] |= mt->ms_mode << 12; + + return &view->pipe; +} + +static boolean +nvc0_validate_tic(struct nvc0_context *nvc0, int s) +{ + uint32_t commands[32]; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nouveau_bo *txc = nvc0->screen->txc; + unsigned i; + unsigned n = 0; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_textures[s]; ++i) { + struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); + struct nv04_resource *res; + const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i)); + + if (!tic) { + if (dirty) + commands[n++] = (i << 1) | 0; + continue; + } + res = nv04_resource(tic->pipe.texture); + + if (tic->id < 0) { + tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); + + PUSH_SPACE(push, 17); + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); + PUSH_DATAh(push, txc->offset + (tic->id * 32)); + PUSH_DATA (push, txc->offset + (tic->id * 32)); + BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, 32); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); + PUSH_DATA (push, 0x100111); + BEGIN_NIC0(push, NVC0_M2MF(DATA), 8); + PUSH_DATAp(push, &tic->tic[0], 8); + + need_flush = TRUE; + } else + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); + PUSH_DATA (push, (tic->id << 4) | 1); + NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1); + } + nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + if (!dirty) + continue; + commands[n++] = (tic->id << 9) | (i << 1) | 1; + + BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD); + } + for (; i < nvc0->state.num_textures[s]; ++i) + commands[n++] = (i << 1) | 0; + + nvc0->state.num_textures[s] = nvc0->num_textures[s]; + + if (n) { + BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n); + PUSH_DATAp(push, commands, n); + } + nvc0->textures_dirty[s] = 0; + + return need_flush; +} + +static boolean +nve4_validate_tic(struct nvc0_context *nvc0, unsigned s) +{ + struct nouveau_bo *txc = nvc0->screen->txc; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_textures[s]; ++i) { + struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); + struct nv04_resource *res; + const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i)); + + if (!tic) { + nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; + continue; + } + res = nv04_resource(tic->pipe.texture); + + if (tic->id < 0) { + tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); + + PUSH_SPACE(push, 16); + BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, txc->offset + (tic->id * 32)); + PUSH_DATA (push, txc->offset + (tic->id * 32)); + BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, 32); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9); + PUSH_DATA (push, 0x1001); + PUSH_DATAp(push, &tic->tic[0], 8); + + need_flush = TRUE; + } else + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); + PUSH_DATA (push, (tic->id << 4) | 1); + } + nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID; + nvc0->tex_handles[s][i] |= tic->id; + if (dirty) + BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD); + } + for (; i < nvc0->state.num_textures[s]; ++i) { + nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; + nvc0->textures_dirty[s] |= 1 << i; + } + + nvc0->state.num_textures[s] = nvc0->num_textures[s]; + + return need_flush; +} + +void nvc0_validate_textures(struct nvc0_context *nvc0) +{ + boolean need_flush; + + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { + need_flush = nve4_validate_tic(nvc0, 0); + need_flush |= nve4_validate_tic(nvc0, 3); + need_flush |= nve4_validate_tic(nvc0, 4); + } else { + need_flush = nvc0_validate_tic(nvc0, 0); + need_flush |= nvc0_validate_tic(nvc0, 3); + need_flush |= nvc0_validate_tic(nvc0, 4); + } + + if (need_flush) { + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, 0); + } +} + +static boolean +nvc0_validate_tsc(struct nvc0_context *nvc0, int s) +{ + uint32_t commands[16]; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned i; + unsigned n = 0; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_samplers[s]; ++i) { + struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]); + + if (!(nvc0->samplers_dirty[s] & (1 << i))) + continue; + if (!tsc) { + commands[n++] = (i << 4) | 0; + continue; + } + if (tsc->id < 0) { + tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); + + nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc, + 65536 + tsc->id * 32, NOUVEAU_BO_VRAM, + 32, tsc->tsc); + need_flush = TRUE; + } + nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); + + commands[n++] = (tsc->id << 12) | (i << 4) | 1; + } + for (; i < nvc0->state.num_samplers[s]; ++i) + commands[n++] = (i << 4) | 0; + + nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; + + if (n) { + BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n); + PUSH_DATAp(push, commands, n); + } + nvc0->samplers_dirty[s] = 0; + + return need_flush; +} + +boolean +nve4_validate_tsc(struct nvc0_context *nvc0, int s) +{ + struct nouveau_bo *txc = nvc0->screen->txc; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_samplers[s]; ++i) { + struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]); + + if (!tsc) { + nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID; + continue; + } + if (tsc->id < 0) { + tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); + + PUSH_SPACE(push, 16); + BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, txc->offset + 65536 + (tsc->id * 32)); + PUSH_DATA (push, txc->offset + 65536 + (tsc->id * 32)); + BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, 32); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9); + PUSH_DATA (push, 0x1001); + PUSH_DATAp(push, &tsc->tsc[0], 8); + + need_flush = TRUE; + } + nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); + + nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID; + nvc0->tex_handles[s][i] |= tsc->id << 20; + } + for (; i < nvc0->state.num_samplers[s]; ++i) { + nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID; + nvc0->samplers_dirty[s] |= 1 << i; + } + + nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; + + return need_flush; +} + +void nvc0_validate_samplers(struct nvc0_context *nvc0) +{ + boolean need_flush; + + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { + need_flush = nve4_validate_tsc(nvc0, 0); + need_flush |= nve4_validate_tsc(nvc0, 3); + need_flush |= nve4_validate_tsc(nvc0, 4); + } else { + need_flush = nvc0_validate_tsc(nvc0, 0); + need_flush |= nvc0_validate_tsc(nvc0, 3); + need_flush |= nvc0_validate_tsc(nvc0, 4); + } + + if (need_flush) { + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, 0); + } +} + +/* Upload the "diagonal" entries for the possible texture sources ($t == $s). + * At some point we might want to get a list of the combinations used by a + * shader and fill in those entries instead of having it extract the handles. + */ +void +nve4_set_tex_handles(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + uint64_t address; + unsigned s; + + if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) + return; + address = nvc0->screen->uniform_bo->offset + (5 << 16); + + for (s = 0; s < 5; ++s, address += (1 << 9)) { + uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s]; + if (!dirty) + continue; + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 512); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + do { + int i = ffs(dirty) - 1; + dirty &= ~(1 << i); + + BEGIN_NVC0(push, NVC0_3D(CB_POS), 2); + PUSH_DATA (push, (8 + i) * 4); + PUSH_DATA (push, nvc0->tex_handles[s][i]); + } while (dirty); + + nvc0->textures_dirty[s] = 0; + nvc0->samplers_dirty[s] = 0; + } +} + + +static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT]; +static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT]; +static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT]; + +void +nve4_set_surface_info(struct nouveau_pushbuf *push, + struct pipe_surface *psf, + struct nvc0_screen *screen) +{ + struct nv50_surface *sf = nv50_surface(psf); + struct nv04_resource *res; + uint64_t address; + uint32_t *const info = push->cur; + uint8_t log2cpp; + + if (psf && !nve4_su_format_map[psf->format]) + NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n"); + + push->cur += 16; + + if (!psf || !nve4_su_format_map[psf->format]) { + memset(info, 0, 16 * sizeof(*info)); + + info[0] = 0xbadf0000; + info[1] = 0x80004000; + info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] + + screen->lib_code->start; + return; + } + res = nv04_resource(sf->base.texture); + + address = res->address + sf->offset; + + info[8] = sf->width; + info[9] = sf->height; + info[10] = sf->depth; + switch (res->base.target) { + case PIPE_TEXTURE_1D_ARRAY: + info[11] = 1; + break; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + info[11] = 2; + break; + case PIPE_TEXTURE_3D: + info[11] = 3; + break; + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + info[11] = 4; + break; + default: + info[11] = 0; + break; + } + log2cpp = (0xf000 & nve4_su_format_aux_map[sf->base.format]) >> 12; + + info[12] = nve4_suldp_lib_offset[sf->base.format] + screen->lib_code->start; + + /* limit in bytes for raw access */ + info[13] = (0x06 << 22) | ((sf->width << log2cpp) - 1); + + info[1] = nve4_su_format_map[sf->base.format]; + +#if 0 + switch (util_format_get_blocksizebits(sf->base.format)) { + case 16: info[1] |= 1 << 16; break; + case 32: info[1] |= 2 << 16; break; + case 64: info[1] |= 3 << 16; break; + case 128: info[1] |= 4 << 16; break; + default: + break; + } +#else + info[1] |= log2cpp << 16; + info[1] |= 0x4000; + info[1] |= (0x0f00 & nve4_su_format_aux_map[sf->base.format]); +#endif + + if (res->base.target == PIPE_BUFFER) { + info[0] = address >> 8; + info[2] = sf->width - 1; + info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22; + info[3] = 0; + info[4] = 0; + info[5] = 0; + info[6] = 0; + info[7] = 0; + info[14] = 0; + info[15] = 0; + } else { + struct nv50_miptree *mt = nv50_miptree(&res->base); + struct nv50_miptree_level *lvl = &mt->level[sf->base.u.tex.level]; + const unsigned z = sf->base.u.tex.first_layer; + + if (z) { + if (mt->layout_3d) { + address += nvc0_mt_zslice_offset(mt, psf->u.tex.level, z); + /* doesn't work if z passes z-tile boundary */ + assert(sf->depth == 1); + } else { + address += mt->layer_stride * z; + } + } + info[0] = address >> 8; + info[2] = sf->width - 1; + /* NOTE: this is really important: */ + info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22; + info[3] = (0x88 << 24) | (lvl->pitch / 64); + info[4] = sf->height - 1; + info[4] |= (lvl->tile_mode & 0x0f0) << 25; + info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22; + info[5] = mt->layer_stride >> 8; + info[6] = sf->depth - 1; + info[6] |= (lvl->tile_mode & 0xf00) << 21; + info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22; + info[7] = 0; + info[14] = mt->ms_x; + info[15] = mt->ms_y; + } +} + +static INLINE void +nvc0_update_surface_bindings(struct nvc0_context *nvc0) +{ + /* TODO */ +} + +static INLINE void +nve4_update_surface_bindings(struct nvc0_context *nvc0) +{ + /* TODO */ +} + +void +nvc0_validate_surfaces(struct nvc0_context *nvc0) +{ + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { + nve4_update_surface_bindings(nvc0); + } else { + nvc0_update_surface_bindings(nvc0); + } +} + + +static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_R32G32B32A32_FLOAT] = NVE4_IMAGE_FORMAT_RGBA32_FLOAT, + [PIPE_FORMAT_R32G32B32A32_SINT] = NVE4_IMAGE_FORMAT_RGBA32_SINT, + [PIPE_FORMAT_R32G32B32A32_UINT] = NVE4_IMAGE_FORMAT_RGBA32_UINT, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = NVE4_IMAGE_FORMAT_RGBA16_FLOAT, + [PIPE_FORMAT_R16G16B16A16_UNORM] = NVE4_IMAGE_FORMAT_RGBA16_UNORM, + [PIPE_FORMAT_R16G16B16A16_SNORM] = NVE4_IMAGE_FORMAT_RGBA16_SNORM, + [PIPE_FORMAT_R16G16B16A16_SINT] = NVE4_IMAGE_FORMAT_RGBA16_SINT, + [PIPE_FORMAT_R16G16B16A16_UINT] = NVE4_IMAGE_FORMAT_RGBA16_UINT, + [PIPE_FORMAT_R8G8B8A8_UNORM] = NVE4_IMAGE_FORMAT_RGBA8_UNORM, + [PIPE_FORMAT_R8G8B8A8_SNORM] = NVE4_IMAGE_FORMAT_RGBA8_SNORM, + [PIPE_FORMAT_R8G8B8A8_SINT] = NVE4_IMAGE_FORMAT_RGBA8_SINT, + [PIPE_FORMAT_R8G8B8A8_UINT] = NVE4_IMAGE_FORMAT_RGBA8_UINT, + [PIPE_FORMAT_R11G11B10_FLOAT] = NVE4_IMAGE_FORMAT_R11G11B10_FLOAT, + [PIPE_FORMAT_R10G10B10A2_UNORM] = NVE4_IMAGE_FORMAT_RGB10_A2_UNORM, +/* [PIPE_FORMAT_R10G10B10A2_UINT] = NVE4_IMAGE_FORMAT_RGB10_A2_UINT, */ + [PIPE_FORMAT_R32G32_FLOAT] = NVE4_IMAGE_FORMAT_RG32_FLOAT, + [PIPE_FORMAT_R32G32_SINT] = NVE4_IMAGE_FORMAT_RG32_SINT, + [PIPE_FORMAT_R32G32_UINT] = NVE4_IMAGE_FORMAT_RG32_UINT, + [PIPE_FORMAT_R16G16_FLOAT] = NVE4_IMAGE_FORMAT_RG16_FLOAT, + [PIPE_FORMAT_R16G16_UNORM] = NVE4_IMAGE_FORMAT_RG16_UNORM, + [PIPE_FORMAT_R16G16_SNORM] = NVE4_IMAGE_FORMAT_RG16_SNORM, + [PIPE_FORMAT_R16G16_SINT] = NVE4_IMAGE_FORMAT_RG16_SINT, + [PIPE_FORMAT_R16G16_UINT] = NVE4_IMAGE_FORMAT_RG16_UINT, + [PIPE_FORMAT_R8G8_UNORM] = NVE4_IMAGE_FORMAT_RG8_UNORM, + [PIPE_FORMAT_R8G8_SNORM] = NVE4_IMAGE_FORMAT_RG8_SNORM, + [PIPE_FORMAT_R8G8_SINT] = NVE4_IMAGE_FORMAT_RG8_SINT, + [PIPE_FORMAT_R8G8_UINT] = NVE4_IMAGE_FORMAT_RG8_UINT, + [PIPE_FORMAT_R32_FLOAT] = NVE4_IMAGE_FORMAT_R32_FLOAT, + [PIPE_FORMAT_R32_SINT] = NVE4_IMAGE_FORMAT_R32_SINT, + [PIPE_FORMAT_R32_UINT] = NVE4_IMAGE_FORMAT_R32_UINT, + [PIPE_FORMAT_R16_FLOAT] = NVE4_IMAGE_FORMAT_R16_FLOAT, + [PIPE_FORMAT_R16_UNORM] = NVE4_IMAGE_FORMAT_R16_UNORM, + [PIPE_FORMAT_R16_SNORM] = NVE4_IMAGE_FORMAT_R16_SNORM, + [PIPE_FORMAT_R16_SINT] = NVE4_IMAGE_FORMAT_R16_SINT, + [PIPE_FORMAT_R16_UINT] = NVE4_IMAGE_FORMAT_R16_UINT, + [PIPE_FORMAT_R8_UNORM] = NVE4_IMAGE_FORMAT_R8_UNORM, + [PIPE_FORMAT_R8_SNORM] = NVE4_IMAGE_FORMAT_R8_SNORM, + [PIPE_FORMAT_R8_SINT] = NVE4_IMAGE_FORMAT_R8_SINT, + [PIPE_FORMAT_R8_UINT] = NVE4_IMAGE_FORMAT_R8_UINT, +}; + +/* Auxiliary format description values for surface instructions. + * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22 + */ +static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842, + [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842, + [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842, + + [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933, + [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933, + [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933, + [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933, + + [PIPE_FORMAT_R32G32_FLOAT] = 0x3433, + [PIPE_FORMAT_R32G32_SINT] = 0x3433, + [PIPE_FORMAT_R32G32_UINT] = 0x3433, + + [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24, +/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24, */ + [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24, + [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24, + [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24, + [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24, + [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24, + + [PIPE_FORMAT_R16G16_UNORM] = 0x2524, + [PIPE_FORMAT_R16G16_SNORM] = 0x2524, + [PIPE_FORMAT_R16G16_SINT] = 0x2524, + [PIPE_FORMAT_R16G16_UINT] = 0x2524, + [PIPE_FORMAT_R16G16_FLOAT] = 0x2524, + + [PIPE_FORMAT_R32_SINT] = 0x2024, + [PIPE_FORMAT_R32_UINT] = 0x2024, + [PIPE_FORMAT_R32_FLOAT] = 0x2024, + + [PIPE_FORMAT_R8G8_UNORM] = 0x1615, + [PIPE_FORMAT_R8G8_SNORM] = 0x1615, + [PIPE_FORMAT_R8G8_SINT] = 0x1615, + [PIPE_FORMAT_R8G8_UINT] = 0x1615, + + [PIPE_FORMAT_R16_UNORM] = 0x1115, + [PIPE_FORMAT_R16_SNORM] = 0x1115, + [PIPE_FORMAT_R16_SINT] = 0x1115, + [PIPE_FORMAT_R16_UINT] = 0x1115, + [PIPE_FORMAT_R16_FLOAT] = 0x1115, + + [PIPE_FORMAT_R8_UNORM] = 0x0206, + [PIPE_FORMAT_R8_SNORM] = 0x0206, + [PIPE_FORMAT_R8_SINT] = 0x0206, + [PIPE_FORMAT_R8_UINT] = 0x0206 +}; + +/* NOTE: These are hardcoded offsets for the shader library. + * TODO: Automate them. + */ +static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218, + [PIPE_FORMAT_R32G32B32A32_SINT] = 0x218, + [PIPE_FORMAT_R32G32B32A32_UINT] = 0x218, + [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248, + [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8, + [PIPE_FORMAT_R16G16B16A16_SINT] = 0x330, + [PIPE_FORMAT_R16G16B16A16_UINT] = 0x388, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8, + [PIPE_FORMAT_R32G32_FLOAT] = 0x428, + [PIPE_FORMAT_R32G32_SINT] = 0x468, + [PIPE_FORMAT_R32G32_UINT] = 0x468, + [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8, +/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x530, */ + [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588, + [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8, + [PIPE_FORMAT_R8G8B8A8_SINT] = 0x670, + [PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8, + [PIPE_FORMAT_B5G6R5_UNORM] = 0x718, + [PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0, + [PIPE_FORMAT_R16G16_UNORM] = 0x828, + [PIPE_FORMAT_R16G16_SNORM] = 0x890, + [PIPE_FORMAT_R16G16_SINT] = 0x8f0, + [PIPE_FORMAT_R16G16_UINT] = 0x948, + [PIPE_FORMAT_R16G16_FLOAT] = 0x998, + [PIPE_FORMAT_R32_FLOAT] = 0x9e8, + [PIPE_FORMAT_R32_SINT] = 0xa30, + [PIPE_FORMAT_R32_UINT] = 0xa30, + [PIPE_FORMAT_R8G8_UNORM] = 0xa78, + [PIPE_FORMAT_R8G8_SNORM] = 0xae0, + [PIPE_FORMAT_R8G8_UINT] = 0xb48, + [PIPE_FORMAT_R8G8_SINT] = 0xb98, + [PIPE_FORMAT_R16_UNORM] = 0xbe8, + [PIPE_FORMAT_R16_SNORM] = 0xc48, + [PIPE_FORMAT_R16_SINT] = 0xca0, + [PIPE_FORMAT_R16_UINT] = 0xce8, + [PIPE_FORMAT_R16_FLOAT] = 0xd30, + [PIPE_FORMAT_R8_UNORM] = 0xd88, + [PIPE_FORMAT_R8_SNORM] = 0xde0, + [PIPE_FORMAT_R8_SINT] = 0xe38, + [PIPE_FORMAT_R8_UINT] = 0xe88, + [PIPE_FORMAT_R11G11B10_FLOAT] = 0xed0 +}; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c new file mode 100644 index 00000000000..82f1ffcd329 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c @@ -0,0 +1,558 @@ + +#include "util/u_format.h" + +#include "nvc0/nvc0_context.h" + +#include "nv50/nv50_defs.xml.h" + +struct nvc0_transfer { + struct pipe_transfer base; + struct nv50_m2mf_rect rect[2]; + uint32_t nblocksx; + uint16_t nblocksy; + uint16_t nlayers; +}; + +static void +nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nouveau_bufctx *bctx = nvc0->bufctx; + const int cpp = dst->cpp; + uint32_t src_ofst = src->base; + uint32_t dst_ofst = dst->base; + uint32_t height = nblocksy; + uint32_t sy = src->y; + uint32_t dy = dst->y; + uint32_t exec = (1 << 20); + + assert(dst->cpp == src->cpp); + + nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD); + nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + if (nouveau_bo_memtype(src->bo)) { + BEGIN_NVC0(push, NVC0_M2MF(TILING_MODE_IN), 5); + PUSH_DATA (push, src->tile_mode); + PUSH_DATA (push, src->width * cpp); + PUSH_DATA (push, src->height); + PUSH_DATA (push, src->depth); + PUSH_DATA (push, src->z); + } else { + src_ofst += src->y * src->pitch + src->x * cpp; + + BEGIN_NVC0(push, NVC0_M2MF(PITCH_IN), 1); + PUSH_DATA (push, src->width * cpp); + + exec |= NVC0_M2MF_EXEC_LINEAR_IN; + } + + if (nouveau_bo_memtype(dst->bo)) { + BEGIN_NVC0(push, NVC0_M2MF(TILING_MODE_OUT), 5); + PUSH_DATA (push, dst->tile_mode); + PUSH_DATA (push, dst->width * cpp); + PUSH_DATA (push, dst->height); + PUSH_DATA (push, dst->depth); + PUSH_DATA (push, dst->z); + } else { + dst_ofst += dst->y * dst->pitch + dst->x * cpp; + + BEGIN_NVC0(push, NVC0_M2MF(PITCH_OUT), 1); + PUSH_DATA (push, dst->width * cpp); + + exec |= NVC0_M2MF_EXEC_LINEAR_OUT; + } + + while (height) { + int line_count = height > 2047 ? 2047 : height; + + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_IN_HIGH), 2); + PUSH_DATAh(push, src->bo->offset + src_ofst); + PUSH_DATA (push, src->bo->offset + src_ofst); + + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); + PUSH_DATAh(push, dst->bo->offset + dst_ofst); + PUSH_DATA (push, dst->bo->offset + dst_ofst); + + if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) { + BEGIN_NVC0(push, NVC0_M2MF(TILING_POSITION_IN_X), 2); + PUSH_DATA (push, src->x * cpp); + PUSH_DATA (push, sy); + } else { + src_ofst += line_count * src->pitch; + } + if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) { + BEGIN_NVC0(push, NVC0_M2MF(TILING_POSITION_OUT_X), 2); + PUSH_DATA (push, dst->x * cpp); + PUSH_DATA (push, dy); + } else { + dst_ofst += line_count * dst->pitch; + } + + BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, nblocksx * cpp); + PUSH_DATA (push, line_count); + BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); + PUSH_DATA (push, exec); + + height -= line_count; + sy += line_count; + dy += line_count; + } + + nouveau_bufctx_reset(bctx, 0); +} + +static void +nve4_m2mf_transfer_rect(struct nvc0_context *nvc0, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nouveau_bufctx *bctx = nvc0->bufctx; + uint32_t exec; + uint32_t src_base = src->base; + uint32_t dst_base = dst->base; + const int cpp = dst->cpp; + + assert(dst->cpp == src->cpp); + + nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR); + nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + exec = 0x200 /* 2D_ENABLE */ | 0x6 /* UNK */; + + if (!nouveau_bo_memtype(dst->bo)) { + assert(!dst->z); + dst_base += dst->y * dst->pitch + dst->x * cpp; + exec |= 0x100; /* DST_MODE_2D_LINEAR */ + } + if (!nouveau_bo_memtype(src->bo)) { + assert(!src->z); + src_base += src->y * src->pitch + src->x * cpp; + exec |= 0x080; /* SRC_MODE_2D_LINEAR */ + } + + BEGIN_NVC0(push, SUBC_COPY(0x070c), 6); + PUSH_DATA (push, 0x1000 | dst->tile_mode); + PUSH_DATA (push, dst->pitch); + PUSH_DATA (push, dst->height); + PUSH_DATA (push, dst->depth); + PUSH_DATA (push, dst->z); + PUSH_DATA (push, (dst->y << 16) | (dst->x * cpp)); + + BEGIN_NVC0(push, SUBC_COPY(0x0728), 6); + PUSH_DATA (push, 0x1000 | src->tile_mode); + PUSH_DATA (push, src->pitch); + PUSH_DATA (push, src->height); + PUSH_DATA (push, src->depth); + PUSH_DATA (push, src->z); + PUSH_DATA (push, (src->y << 16) | (src->x * cpp)); + + BEGIN_NVC0(push, SUBC_COPY(0x0400), 8); + PUSH_DATAh(push, src->bo->offset + src_base); + PUSH_DATA (push, src->bo->offset + src_base); + PUSH_DATAh(push, dst->bo->offset + dst_base); + PUSH_DATA (push, dst->bo->offset + dst_base); + PUSH_DATA (push, src->pitch); + PUSH_DATA (push, dst->pitch); + PUSH_DATA (push, nblocksx * cpp); + PUSH_DATA (push, nblocksy); + + BEGIN_NVC0(push, SUBC_COPY(0x0300), 1); + PUSH_DATA (push, exec); + + nouveau_bufctx_reset(bctx, 0); +} + +void +nvc0_m2mf_push_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data) +{ + struct nvc0_context *nvc0 = nvc0_context(&nv->pipe); + struct nouveau_pushbuf *push = nv->pushbuf; + uint32_t *src = (uint32_t *)data; + unsigned count = (size + 3) / 4; + + nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nvc0->bufctx); + nouveau_pushbuf_validate(push); + + while (count) { + unsigned nr; + + if (!PUSH_SPACE(push, 16)) + break; + nr = PUSH_AVAIL(push); + assert(nr >= 16); + nr = MIN2(count, nr - 9); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); + PUSH_DATAh(push, dst->offset + offset); + PUSH_DATA (push, dst->offset + offset); + BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, MIN2(size, nr * 4)); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); + PUSH_DATA (push, 0x100111); + + /* must not be interrupted (trap on QUERY fence, 0x50 works however) */ + BEGIN_NIC0(push, NVC0_M2MF(DATA), nr); + PUSH_DATAp(push, src, nr); + + count -= nr; + src += nr; + offset += nr * 4; + size -= nr * 4; + } + + nouveau_bufctx_reset(nvc0->bufctx, 0); +} + +void +nve4_p2mf_push_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data) +{ + struct nvc0_context *nvc0 = nvc0_context(&nv->pipe); + struct nouveau_pushbuf *push = nv->pushbuf; + uint32_t *src = (uint32_t *)data; + unsigned count = (size + 3) / 4; + + nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nvc0->bufctx); + nouveau_pushbuf_validate(push); + + while (count) { + unsigned nr; + + if (!PUSH_SPACE(push, 16)) + break; + nr = PUSH_AVAIL(push); + assert(nr >= 16); + nr = MIN2(count, nr - 8); + nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1)); + + BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, dst->offset + offset); + PUSH_DATA (push, dst->offset + offset); + BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, MIN2(size, nr * 4)); + PUSH_DATA (push, 1); + /* must not be interrupted (trap on QUERY fence, 0x50 works however) */ + BEGIN_1IC0(push, NVE4_P2MF(EXEC), nr + 1); + PUSH_DATA (push, 0x1001); + PUSH_DATAp(push, src, nr); + + count -= nr; + src += nr; + offset += nr * 4; + size -= nr * 4; + } + + nouveau_bufctx_reset(nvc0->bufctx, 0); +} + +static void +nvc0_m2mf_copy_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size) +{ + struct nouveau_pushbuf *push = nv->pushbuf; + struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx; + + nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD); + nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + while (size) { + unsigned bytes = MIN2(size, 1 << 17); + + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); + PUSH_DATAh(push, dst->offset + dstoff); + PUSH_DATA (push, dst->offset + dstoff); + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_IN_HIGH), 2); + PUSH_DATAh(push, src->offset + srcoff); + PUSH_DATA (push, src->offset + srcoff); + BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, bytes); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); + PUSH_DATA (push, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | + NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT); + + srcoff += bytes; + dstoff += bytes; + size -= bytes; + } + + nouveau_bufctx_reset(bctx, 0); +} + +static void +nve4_m2mf_copy_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size) +{ + struct nouveau_pushbuf *push = nv->pushbuf; + struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx; + + nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD); + nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + BEGIN_NVC0(push, SUBC_COPY(0x0400), 4); + PUSH_DATAh(push, src->offset + srcoff); + PUSH_DATA (push, src->offset + srcoff); + PUSH_DATAh(push, dst->offset + dstoff); + PUSH_DATA (push, dst->offset + dstoff); + BEGIN_NVC0(push, SUBC_COPY(0x0418), 1); + PUSH_DATA (push, size); + BEGIN_NVC0(push, SUBC_COPY(0x0300), 1); + PUSH_DATA (push, 0x186); + + nouveau_bufctx_reset(bctx, 0); +} + + +static INLINE boolean +nvc0_mt_transfer_can_map_directly(struct nv50_miptree *mt) +{ + if (mt->base.domain == NOUVEAU_BO_VRAM) + return FALSE; + if (mt->base.base.usage != PIPE_USAGE_STAGING) + return FALSE; + return !nouveau_bo_memtype(mt->base.bo); +} + +static INLINE boolean +nvc0_mt_sync(struct nvc0_context *nvc0, struct nv50_miptree *mt, unsigned usage) +{ + if (!mt->base.mm) { + uint32_t access = (usage & PIPE_TRANSFER_WRITE) ? + NOUVEAU_BO_WR : NOUVEAU_BO_RD; + return !nouveau_bo_wait(mt->base.bo, access, nvc0->base.client); + } + if (usage & PIPE_TRANSFER_WRITE) + return !mt->base.fence || nouveau_fence_wait(mt->base.fence); + return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr); +} + +void * +nvc0_miptree_transfer_map(struct pipe_context *pctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer) +{ + struct nvc0_context *nvc0 = nvc0_context(pctx); + struct nouveau_device *dev = nvc0->screen->base.device; + struct nv50_miptree *mt = nv50_miptree(res); + struct nvc0_transfer *tx; + uint32_t size; + int ret; + unsigned flags = 0; + + if (nvc0_mt_transfer_can_map_directly(mt)) { + ret = !nvc0_mt_sync(nvc0, mt, usage); + if (!ret) + ret = nouveau_bo_map(mt->base.bo, 0, NULL); + if (ret && + (usage & PIPE_TRANSFER_MAP_DIRECTLY)) + return NULL; + if (!ret) + usage |= PIPE_TRANSFER_MAP_DIRECTLY; + } else + if (usage & PIPE_TRANSFER_MAP_DIRECTLY) + return NULL; + + tx = CALLOC_STRUCT(nvc0_transfer); + if (!tx) + return NULL; + + pipe_resource_reference(&tx->base.resource, res); + + tx->base.level = level; + tx->base.usage = usage; + tx->base.box = *box; + + if (util_format_is_plain(res->format)) { + tx->nblocksx = box->width << mt->ms_x; + tx->nblocksy = box->height << mt->ms_y; + } else { + tx->nblocksx = util_format_get_nblocksx(res->format, box->width); + tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + } + tx->nlayers = box->depth; + + tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); + tx->base.layer_stride = tx->nblocksy * tx->base.stride; + + if (usage & PIPE_TRANSFER_MAP_DIRECTLY) { + tx->base.stride = align(tx->base.stride, 128); + *ptransfer = &tx->base; + return mt->base.bo->map + mt->base.offset; + } + + nv50_m2mf_rect_setup(&tx->rect[0], res, level, box->x, box->y, box->z); + + size = tx->base.layer_stride; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, + size * tx->nlayers, NULL, &tx->rect[1].bo); + if (ret) { + pipe_resource_reference(&tx->base.resource, NULL); + FREE(tx); + return NULL; + } + + tx->rect[1].cpp = tx->rect[0].cpp; + tx->rect[1].width = tx->nblocksx; + tx->rect[1].height = tx->nblocksy; + tx->rect[1].depth = 1; + tx->rect[1].pitch = tx->base.stride; + tx->rect[1].domain = NOUVEAU_BO_GART; + + if (usage & PIPE_TRANSFER_READ) { + unsigned base = tx->rect[0].base; + unsigned z = tx->rect[0].z; + unsigned i; + for (i = 0; i < tx->nlayers; ++i) { + nvc0->m2mf_copy_rect(nvc0, &tx->rect[1], &tx->rect[0], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += size; + } + tx->rect[0].z = z; + tx->rect[0].base = base; + tx->rect[1].base = 0; + } + + if (tx->rect[1].bo->map) { + *ptransfer = &tx->base; + return tx->rect[1].bo->map; + } + + if (usage & PIPE_TRANSFER_READ) + flags = NOUVEAU_BO_RD; + if (usage & PIPE_TRANSFER_WRITE) + flags |= NOUVEAU_BO_WR; + + ret = nouveau_bo_map(tx->rect[1].bo, flags, nvc0->screen->base.client); + if (ret) { + pipe_resource_reference(&tx->base.resource, NULL); + nouveau_bo_ref(NULL, &tx->rect[1].bo); + FREE(tx); + return NULL; + } + + *ptransfer = &tx->base; + return tx->rect[1].bo->map; +} + +void +nvc0_miptree_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct nvc0_context *nvc0 = nvc0_context(pctx); + struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; + struct nv50_miptree *mt = nv50_miptree(tx->base.resource); + unsigned i; + + if (tx->base.usage & PIPE_TRANSFER_MAP_DIRECTLY) { + pipe_resource_reference(&transfer->resource, NULL); + + FREE(tx); + return; + } + + if (tx->base.usage & PIPE_TRANSFER_WRITE) { + for (i = 0; i < tx->nlayers; ++i) { + nvc0->m2mf_copy_rect(nvc0, &tx->rect[0], &tx->rect[1], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += tx->nblocksy * tx->base.stride; + } + NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_wr, 1); + } + if (tx->base.usage & PIPE_TRANSFER_READ) + NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_rd, 1); + + nouveau_bo_ref(NULL, &tx->rect[1].bo); + pipe_resource_reference(&transfer->resource, NULL); + + FREE(tx); +} + +/* This happens rather often with DTD9/st. */ +void +nvc0_cb_push(struct nouveau_context *nv, + struct nouveau_bo *bo, unsigned domain, + unsigned base, unsigned size, + unsigned offset, unsigned words, const uint32_t *data) +{ + struct nouveau_pushbuf *push = nv->pushbuf; + + NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_count, 1); + NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_bytes, words * 4); + + assert(!(offset & 3)); + size = align(size, 0x100); + + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, size); + PUSH_DATAh(push, bo->offset + base); + PUSH_DATA (push, bo->offset + base); + + while (words) { + unsigned nr = PUSH_AVAIL(push); + nr = MIN2(nr, words); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1); + + PUSH_SPACE(push, nr + 2); + PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain); + BEGIN_1IC0(push, NVC0_3D(CB_POS), nr + 1); + PUSH_DATA (push, offset); + PUSH_DATAp(push, data, nr); + + words -= nr; + data += nr; + offset += nr * 4; + } +} + +void +nvc0_init_transfer_functions(struct nvc0_context *nvc0) +{ + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { + nvc0->m2mf_copy_rect = nve4_m2mf_transfer_rect; + nvc0->base.copy_data = nve4_m2mf_copy_linear; + nvc0->base.push_data = nve4_p2mf_push_linear; + } else { + nvc0->m2mf_copy_rect = nvc0_m2mf_transfer_rect; + nvc0->base.copy_data = nvc0_m2mf_copy_linear; + nvc0->base.push_data = nvc0_m2mf_push_linear; + } + nvc0->base.push_cb = nvc0_cb_push; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c new file mode 100644 index 00000000000..c4bc7dc693b --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -0,0 +1,891 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define NVC0_PUSH_EXPLICIT_SPACE_CHECKING + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_resource.h" + +#include "nvc0/nvc0_3d.xml.h" + +void +nvc0_vertex_state_delete(struct pipe_context *pipe, + void *hwcso) +{ + struct nvc0_vertex_stateobj *so = hwcso; + + if (so->translate) + so->translate->release(so->translate); + FREE(hwcso); +} + +void * +nvc0_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nvc0_vertex_stateobj *so; + struct translate_key transkey; + unsigned i; + unsigned src_offset_max = 0; + + so = MALLOC(sizeof(*so) + + num_elements * sizeof(struct nvc0_vertex_element)); + if (!so) + return NULL; + so->num_elements = num_elements; + so->instance_elts = 0; + so->instance_bufs = 0; + so->shared_slots = FALSE; + so->need_conversion = FALSE; + + memset(so->vb_access_size, 0, sizeof(so->vb_access_size)); + + for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) + so->min_instance_div[i] = 0xffffffff; + + transkey.nr_elements = 0; + transkey.output_stride = 0; + + for (i = 0; i < num_elements; ++i) { + const struct pipe_vertex_element *ve = &elements[i]; + const unsigned vbi = ve->vertex_buffer_index; + unsigned size; + enum pipe_format fmt = ve->src_format; + + so->element[i].pipe = elements[i]; + so->element[i].state = nvc0_format_table[fmt].vtx; + + if (!so->element[i].state) { + switch (util_format_get_nr_components(fmt)) { + case 1: fmt = PIPE_FORMAT_R32_FLOAT; break; + case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break; + case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break; + case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break; + default: + assert(0); + FREE(so); + return NULL; + } + so->element[i].state = nvc0_format_table[fmt].vtx; + so->need_conversion = TRUE; + } + size = util_format_get_blocksize(fmt); + + src_offset_max = MAX2(src_offset_max, ve->src_offset); + + if (so->vb_access_size[vbi] < (ve->src_offset + size)) + so->vb_access_size[vbi] = ve->src_offset + size; + + if (unlikely(ve->instance_divisor)) { + so->instance_elts |= 1 << i; + so->instance_bufs |= 1 << vbi; + if (ve->instance_divisor < so->min_instance_div[vbi]) + so->min_instance_div[vbi] = ve->instance_divisor; + } + + if (1) { + unsigned ca; + unsigned j = transkey.nr_elements++; + + ca = util_format_description(fmt)->channel[0].size / 8; + if (ca != 1 && ca != 2) + ca = 4; + + transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL; + transkey.element[j].input_format = ve->src_format; + transkey.element[j].input_buffer = vbi; + transkey.element[j].input_offset = ve->src_offset; + transkey.element[j].instance_divisor = ve->instance_divisor; + + transkey.output_stride = align(transkey.output_stride, ca); + transkey.element[j].output_format = fmt; + transkey.element[j].output_offset = transkey.output_stride; + transkey.output_stride += size; + + so->element[i].state_alt = so->element[i].state; + so->element[i].state_alt |= transkey.element[j].output_offset << 7; + } + + so->element[i].state |= i << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT; + } + transkey.output_stride = align(transkey.output_stride, 4); + + so->size = transkey.output_stride; + so->translate = translate_create(&transkey); + + if (so->instance_elts || src_offset_max >= (1 << 14)) + return so; + so->shared_slots = TRUE; + + for (i = 0; i < num_elements; ++i) { + const unsigned b = elements[i].vertex_buffer_index; + const unsigned s = elements[i].src_offset; + so->element[i].state &= ~NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK; + so->element[i].state |= b << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT; + so->element[i].state |= s << NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT; + } + return so; +} + +#define NVC0_3D_VERTEX_ATTRIB_INACTIVE \ + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | \ + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST + +#define VTX_ATTR(a, c, t, s) \ + ((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \ + (NVC0_3D_VTX_ATTR_DEFINE_SIZE_##s) | \ + ((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \ + ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT)) + +static void +nvc0_set_constant_vertex_attrib(struct nvc0_context *nvc0, const unsigned a) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_vertex_element *ve = &nvc0->vertex->element[a].pipe; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; + uint32_t mode; + const struct util_format_description *desc; + void *dst; + const void *src = (const uint8_t *)vb->user_buffer + ve->src_offset; + assert(!vb->buffer); + + desc = util_format_description(ve->src_format); + + PUSH_SPACE(push, 6); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 5); + dst = &push->cur[1]; + if (desc->channel[0].pure_integer) { + if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + mode = VTX_ATTR(a, 4, SINT, 32); + desc->unpack_rgba_sint(dst, 0, src, 0, 1, 1); + } else { + mode = VTX_ATTR(a, 4, UINT, 32); + desc->unpack_rgba_uint(dst, 0, src, 0, 1, 1); + } + } else { + mode = VTX_ATTR(a, 4, FLOAT, 32); + desc->unpack_rgba_float(dst, 0, src, 0, 1, 1); + } + push->cur[0] = mode; + push->cur += 5; +} + +static INLINE void +nvc0_user_vbuf_range(struct nvc0_context *nvc0, int vbi, + uint32_t *base, uint32_t *size) +{ + if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) { + const uint32_t div = nvc0->vertex->min_instance_div[vbi]; + *base = nvc0->instance_off * nvc0->vtxbuf[vbi].stride; + *size = (nvc0->instance_max / div) * nvc0->vtxbuf[vbi].stride + + nvc0->vertex->vb_access_size[vbi]; + } else { + /* NOTE: if there are user buffers, we *must* have index bounds */ + assert(nvc0->vb_elt_limit != ~0); + *base = nvc0->vb_elt_first * nvc0->vtxbuf[vbi].stride; + *size = nvc0->vb_elt_limit * nvc0->vtxbuf[vbi].stride + + nvc0->vertex->vb_access_size[vbi]; + } +} + +static INLINE void +nvc0_release_user_vbufs(struct nvc0_context *nvc0) +{ + if (nvc0->vbo_user) { + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); + nouveau_scratch_done(&nvc0->base); + } +} + +static void +nvc0_update_user_vbufs(struct nvc0_context *nvc0) +{ + uint64_t address[PIPE_MAX_ATTRIBS]; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + int i; + uint32_t written = 0; + + PUSH_SPACE(push, nvc0->vertex->num_elements * 8); + for (i = 0; i < nvc0->vertex->num_elements; ++i) { + struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe; + const unsigned b = ve->vertex_buffer_index; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b]; + uint32_t base, size; + + if (!(nvc0->vbo_user & (1 << b))) + continue; + if (!vb->stride) { + nvc0_set_constant_vertex_attrib(nvc0, i); + continue; + } + nvc0_user_vbuf_range(nvc0, b, &base, &size); + + if (!(written & (1 << b))) { + struct nouveau_bo *bo; + const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART; + written |= 1 << b; + address[b] = nouveau_scratch_data(&nvc0->base, vb->user_buffer, + base, size, &bo); + if (bo) + BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo); + + NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size); + } + + BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5); + PUSH_DATA (push, i); + PUSH_DATAh(push, address[b] + base + size - 1); + PUSH_DATA (push, address[b] + base + size - 1); + PUSH_DATAh(push, address[b] + ve->src_offset); + PUSH_DATA (push, address[b] + ve->src_offset); + } + nvc0->base.vbo_dirty = TRUE; +} + +static void +nvc0_update_user_vbufs_shared(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + uint32_t mask = nvc0->vbo_user & ~nvc0->constant_vbos; + + PUSH_SPACE(push, nvc0->num_vtxbufs * 8); + while (mask) { + struct nouveau_bo *bo; + const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART; + uint64_t address; + uint32_t base, size; + const int b = ffs(mask) - 1; + mask &= ~(1 << b); + + nvc0_user_vbuf_range(nvc0, b, &base, &size); + + address = nouveau_scratch_data(&nvc0->base, nvc0->vtxbuf[b].user_buffer, + base, size, &bo); + if (bo) + BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo); + + BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5); + PUSH_DATA (push, b); + PUSH_DATAh(push, address + base + size - 1); + PUSH_DATA (push, address + base + size - 1); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + + NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size); + } + + mask = nvc0->state.constant_elts; + while (mask) { + int i = ffs(mask) - 1; + mask &= ~(1 << i); + nvc0_set_constant_vertex_attrib(nvc0, i); + } +} + +static void +nvc0_validate_vertex_buffers(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const struct nvc0_vertex_stateobj *vertex = nvc0->vertex; + uint32_t refd = 0; + unsigned i; + + PUSH_SPACE(push, vertex->num_elements * 8); + for (i = 0; i < vertex->num_elements; ++i) { + const struct nvc0_vertex_element *ve; + const struct pipe_vertex_buffer *vb; + struct nv04_resource *res; + unsigned b; + unsigned limit, offset; + + if (nvc0->state.constant_elts & (1 << i)) + continue; + ve = &vertex->element[i]; + b = ve->pipe.vertex_buffer_index; + vb = &nvc0->vtxbuf[b]; + + if (!vb->buffer) { + if (vb->stride) { + if (ve->pipe.instance_divisor) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1); + PUSH_DATA (push, ve->pipe.instance_divisor); + } + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1); + PUSH_DATA (push, (1 << 12) | vb->stride); + } + /* address/value set in nvc0_update_user_vbufs */ + continue; + } + res = nv04_resource(vb->buffer); + offset = ve->pipe.src_offset + vb->buffer_offset; + limit = vb->buffer->width0 - 1; + + if (unlikely(ve->pipe.instance_divisor)) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4); + PUSH_DATA (push, (1 << 12) | vb->stride); + PUSH_DATAh(push, res->address + offset); + PUSH_DATA (push, res->address + offset); + PUSH_DATA (push, ve->pipe.instance_divisor); + } else { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3); + PUSH_DATA (push, (1 << 12) | vb->stride); + PUSH_DATAh(push, res->address + offset); + PUSH_DATA (push, res->address + offset); + } + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); + PUSH_DATAh(push, res->address + limit); + PUSH_DATA (push, res->address + limit); + + if (!(refd & (1 << b))) { + refd |= 1 << b; + BCTX_REFN(nvc0->bufctx_3d, VTX, res, RD); + } + } + if (nvc0->vbo_user) + nvc0_update_user_vbufs(nvc0); +} + +static void +nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned b; + const uint32_t mask = nvc0->vbo_user; + + PUSH_SPACE(push, nvc0->num_vtxbufs * 8); + for (b = 0; b < nvc0->num_vtxbufs; ++b) { + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b]; + struct nv04_resource *buf; + uint32_t offset, limit; + + if (mask & (1 << b)) { + if (vb->stride) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 1); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + } + /* address/value set in nvc0_update_user_vbufs_shared */ + continue; + } + buf = nv04_resource(vb->buffer); + offset = vb->buffer_offset; + limit = buf->base.width0 - 1; + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 3); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2); + PUSH_DATAh(push, buf->address + limit); + PUSH_DATA (push, buf->address + limit); + + BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD); + } + if (nvc0->vbo_user) + nvc0_update_user_vbufs_shared(nvc0); +} + +void +nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_vertex_stateobj *vertex = nvc0->vertex; + struct nvc0_vertex_element *ve; + uint32_t const_vbos; + unsigned i; + uint8_t vbo_mode; + boolean update_vertex; + + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); + + if (unlikely(vertex->need_conversion) || + unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) { + vbo_mode = 3; + } else { + vbo_mode = (nvc0->vbo_user && nvc0->vbo_push_hint) ? 1 : 0; + } + const_vbos = vbo_mode ? 0 : nvc0->constant_vbos; + + update_vertex = (nvc0->dirty & NVC0_NEW_VERTEX) || + (const_vbos != nvc0->state.constant_vbos) || + (vbo_mode != nvc0->state.vbo_mode); + + if (update_vertex) { + const unsigned n = MAX2(vertex->num_elements, nvc0->state.num_vtxelts); + + nvc0->state.constant_vbos = const_vbos; + nvc0->state.constant_elts = 0; + nvc0->state.num_vtxelts = vertex->num_elements; + nvc0->state.vbo_mode = vbo_mode; + + if (unlikely(vbo_mode)) { + if (unlikely(nvc0->state.instance_elts & 3)) { + /* translate mode uses only 2 vertex buffers */ + nvc0->state.instance_elts &= ~3; + PUSH_SPACE(push, 3); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(0)), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + } + + PUSH_SPACE(push, n * 2 + 4); + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n); + for (i = 0; i < vertex->num_elements; ++i) + PUSH_DATA(push, vertex->element[i].state_alt); + for (; i < n; ++i) + PUSH_DATA(push, NVC0_3D_VERTEX_ATTRIB_INACTIVE); + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(0)), 1); + PUSH_DATA (push, (1 << 12) | vertex->size); + for (i = 1; i < n; ++i) + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0); + } else { + uint32_t *restrict data; + + if (unlikely(vertex->instance_elts != nvc0->state.instance_elts)) { + nvc0->state.instance_elts = vertex->instance_elts; + assert(n); /* if (n == 0), both masks should be 0 */ + PUSH_SPACE(push, 3); + BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2); + PUSH_DATA (push, n); + PUSH_DATA (push, vertex->instance_elts); + } + + PUSH_SPACE(push, n * 2 + 1); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n); + data = push->cur; + push->cur += n; + for (i = 0; i < vertex->num_elements; ++i) { + ve = &vertex->element[i]; + data[i] = ve->state; + if (unlikely(const_vbos & (1 << ve->pipe.vertex_buffer_index))) { + nvc0->state.constant_elts |= 1 << i; + data[i] |= NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST; + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0); + } + } + for (; i < n; ++i) { + data[i] = NVC0_3D_VERTEX_ATTRIB_INACTIVE; + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0); + } + } + } + if (nvc0->state.vbo_mode) /* using translate, don't set up arrays here */ + return; + + if (vertex->shared_slots) + nvc0_validate_vertex_buffers_shared(nvc0); + else + nvc0_validate_vertex_buffers(nvc0); +} + +void +nvc0_idxbuf_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv04_resource *buf = nv04_resource(nvc0->idxbuf.buffer); + + assert(buf); + assert(nouveau_resource_mapped_by_gpu(&buf->base)); + + PUSH_SPACE(push, 6); + BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5); + PUSH_DATAh(push, buf->address + nvc0->idxbuf.offset); + PUSH_DATA (push, buf->address + nvc0->idxbuf.offset); + PUSH_DATAh(push, buf->address + buf->base.width0 - 1); + PUSH_DATA (push, buf->address + buf->base.width0 - 1); + PUSH_DATA (push, nvc0->idxbuf.index_size >> 1); + + BCTX_REFN(nvc0->bufctx_3d, IDX, buf, RD); +} + +#define NVC0_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ + switch (prim) { + NVC0_PRIM_GL_CASE(POINTS); + NVC0_PRIM_GL_CASE(LINES); + NVC0_PRIM_GL_CASE(LINE_LOOP); + NVC0_PRIM_GL_CASE(LINE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLES); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLE_FAN); + NVC0_PRIM_GL_CASE(QUADS); + NVC0_PRIM_GL_CASE(QUAD_STRIP); + NVC0_PRIM_GL_CASE(POLYGON); + NVC0_PRIM_GL_CASE(LINES_ADJACENCY); + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NVC0_PRIM_GL_CASE(PATCHES); */ + default: + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + } +} + +static void +nvc0_draw_vbo_kick_notify(struct nouveau_pushbuf *push) +{ + struct nvc0_screen *screen = push->user_priv; + + nouveau_fence_update(&screen->base, TRUE); + + NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1); +} + +static void +nvc0_draw_arrays(struct nvc0_context *nvc0, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned prim; + + if (nvc0->state.index_bias) { + PUSH_SPACE(push, 1); + IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 0); + nvc0->state.index_bias = 0; + } + + prim = nvc0_prim_gl(mode); + + while (instance_count--) { + PUSH_SPACE(push, 6); + BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, prim); + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, start); + PUSH_DATA (push, count); + IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0); + + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_array, 1); +} + +static void +nvc0_draw_elements_inline_u08(struct nouveau_pushbuf *push, const uint8_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 3) { + unsigned i; + PUSH_SPACE(push, 4); + BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U32), count & 3); + for (i = 0; i < (count & 3); ++i) + PUSH_DATA(push, *map++); + count &= ~3; + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4; + + PUSH_SPACE(push, nr + 1); + BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U8), nr); + for (i = 0; i < nr; ++i) { + PUSH_DATA(push, + (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]); + map += 4; + } + count -= nr * 4; + } +} + +static void +nvc0_draw_elements_inline_u16(struct nouveau_pushbuf *push, const uint16_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + count &= ~1; + PUSH_SPACE(push, 2); + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + PUSH_SPACE(push, nr + 1); + BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + PUSH_DATA(push, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } +} + +static void +nvc0_draw_elements_inline_u32(struct nouveau_pushbuf *push, const uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + while (count) { + const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); + + PUSH_SPACE(push, nr + 1); + BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U32), nr); + PUSH_DATAp(push, map, nr); + + map += nr; + count -= nr; + } +} + +static void +nvc0_draw_elements_inline_u32_short(struct nouveau_pushbuf *push, + const uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + count--; + PUSH_SPACE(push, 1); + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + PUSH_SPACE(push, nr + 1); + BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + PUSH_DATA(push, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } +} + +static void +nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count, int32_t index_bias) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned prim; + const unsigned index_size = nvc0->idxbuf.index_size; + + prim = nvc0_prim_gl(mode); + + if (index_bias != nvc0->state.index_bias) { + PUSH_SPACE(push, 2); + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 1); + PUSH_DATA (push, index_bias); + nvc0->state.index_bias = index_bias; + } + + if (nvc0->idxbuf.buffer) { + PUSH_SPACE(push, 1); + IMMED_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), prim); + do { + PUSH_SPACE(push, 7); + BEGIN_NVC0(push, NVC0_3D(INDEX_BATCH_FIRST), 2); + PUSH_DATA (push, start); + PUSH_DATA (push, count); + if (--instance_count) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, prim | NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT); + } + } while (instance_count); + IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0); + } else { + const void *data = nvc0->idxbuf.user_buffer; + + while (instance_count--) { + PUSH_SPACE(push, 2); + BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, prim); + switch (index_size) { + case 1: + nvc0_draw_elements_inline_u08(push, data, start, count); + break; + case 2: + nvc0_draw_elements_inline_u16(push, data, start, count); + break; + case 4: + if (shorten) + nvc0_draw_elements_inline_u32_short(push, data, start, count); + else + nvc0_draw_elements_inline_u32(push, data, start, count); + break; + default: + assert(0); + return; + } + PUSH_SPACE(push, 1); + IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0); + + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + } + NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_indexed, 1); +} + +static void +nvc0_draw_stream_output(struct nvc0_context *nvc0, + const struct pipe_draw_info *info) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_so_target *so = nvc0_so_target(info->count_from_stream_output); + struct nv04_resource *res = nv04_resource(so->pipe.buffer); + unsigned mode = nvc0_prim_gl(info->mode); + unsigned num_instances = info->instance_count; + + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + PUSH_SPACE(push, 2); + IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); + nvc0_query_fifo_wait(push, so->pq); + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); + + NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, 1); + } + + while (num_instances--) { + PUSH_SPACE(push, 8); + BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, mode); + BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BASE), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_STRIDE), 1); + PUSH_DATA (push, so->stride); + BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BYTES), 1); + nvc0_query_pushbuf_submit(push, so->pq, 0x4); + IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0); + + mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } +} + +void +nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ + nvc0->vb_elt_first = info->min_index + info->index_bias; + nvc0->vb_elt_limit = info->max_index - info->min_index; + nvc0->instance_off = info->start_instance; + nvc0->instance_max = info->instance_count - 1; + + /* For picking only a few vertices from a large user buffer, push is better, + * if index count is larger and we expect repeated vertices, suggest upload. + */ + nvc0->vbo_push_hint = + info->indexed && (nvc0->vb_elt_limit >= (info->count * 2)); + + /* Check whether we want to switch vertex-submission mode. */ + if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_ARRAYS | NVC0_NEW_VERTEX))) { + if (nvc0->vbo_push_hint != !!nvc0->state.vbo_mode) + if (nvc0->state.vbo_mode != 3) + nvc0->dirty |= NVC0_NEW_ARRAYS; + + if (!(nvc0->dirty & NVC0_NEW_ARRAYS) && nvc0->state.vbo_mode == 0) { + if (nvc0->vertex->shared_slots) + nvc0_update_user_vbufs_shared(nvc0); + else + nvc0_update_user_vbufs(nvc0); + } + } + + /* 8 as minimum to avoid immediate double validation of new buffers */ + nvc0_state_validate(nvc0, ~0, 8); + + push->kick_notify = nvc0_draw_vbo_kick_notify; + + if (nvc0->state.vbo_mode) { + nvc0_push_vbo(nvc0, info); + push->kick_notify = nvc0_default_kick_notify; + return; + } + + /* space for base instance, flush, and prim restart */ + PUSH_SPACE(push, 8); + + if (nvc0->state.instance_base != info->start_instance) { + nvc0->state.instance_base = info->start_instance; + /* NOTE: this does not affect the shader input, should it ? */ + BEGIN_NVC0(push, NVC0_3D(VB_INSTANCE_BASE), 1); + PUSH_DATA (push, info->start_instance); + } + + if (nvc0->base.vbo_dirty) { + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); + nvc0->base.vbo_dirty = FALSE; + } + + if (info->indexed) { + boolean shorten = info->max_index <= 65535; + + if (info->primitive_restart != nvc0->state.prim_restart) { + if (info->primitive_restart) { + BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 2); + PUSH_DATA (push, 1); + PUSH_DATA (push, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } else { + IMMED_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 0); + } + nvc0->state.prim_restart = info->primitive_restart; + } else + if (info->primitive_restart) { + BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_INDEX), 1); + PUSH_DATA (push, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } + + nvc0_draw_elements(nvc0, shorten, + info->mode, info->start, info->count, + info->instance_count, info->index_bias); + } else + if (unlikely(info->count_from_stream_output)) { + nvc0_draw_stream_output(nvc0, info); + } else { + nvc0_draw_arrays(nvc0, + info->mode, info->start, info->count, + info->instance_count); + } + push->kick_notify = nvc0_default_kick_notify; + + nvc0_release_user_vbufs(nvc0); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c new file mode 100644 index 00000000000..51e751cfa57 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c @@ -0,0 +1,649 @@ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_resource.h" + +#include "nvc0/nvc0_3d.xml.h" + +struct push_context { + struct nouveau_pushbuf *push; + + struct translate *translate; + void *dest; + const void *idxbuf; + + uint32_t vertex_size; + uint32_t restart_index; + uint32_t instance_id; + + boolean prim_restart; + boolean need_vertex_id; + + struct { + boolean enabled; + boolean value; + unsigned stride; + const uint8_t *data; + } edgeflag; +}; + +static void nvc0_push_upload_vertex_ids(struct push_context *, + struct nvc0_context *, + const struct pipe_draw_info *); + +static void +nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx) +{ + ctx->push = nvc0->base.pushbuf; + + ctx->translate = nvc0->vertex->translate; + ctx->vertex_size = nvc0->vertex->size; + + ctx->need_vertex_id = + nvc0->vertprog->vp.need_vertex_id && (nvc0->vertex->num_elements < 32); + + ctx->edgeflag.value = TRUE; + ctx->edgeflag.enabled = nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS; + + /* silence warnings */ + ctx->edgeflag.data = NULL; + ctx->edgeflag.stride = 0; +} + +static INLINE void +nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias) +{ + struct translate *translate = nvc0->vertex->translate; + unsigned i; + + for (i = 0; i < nvc0->num_vtxbufs; ++i) { + const uint8_t *map; + const struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; + + if (likely(!vb->buffer)) + map = (const uint8_t *)vb->user_buffer; + else + map = nouveau_resource_map_offset(&nvc0->base, + nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD); + + if (index_bias && !unlikely(nvc0->vertex->instance_bufs & (1 << i))) + map += (intptr_t)index_bias * vb->stride; + + translate->set_buffer(translate, i, map, vb->stride, ~0); + } +} + +static INLINE void +nvc0_push_map_idxbuf(struct push_context *ctx, struct nvc0_context *nvc0) +{ + if (nvc0->idxbuf.buffer) { + struct nv04_resource *buf = nv04_resource(nvc0->idxbuf.buffer); + ctx->idxbuf = nouveau_resource_map_offset(&nvc0->base, + buf, nvc0->idxbuf.offset, NOUVEAU_BO_RD); + } else { + ctx->idxbuf = nvc0->idxbuf.user_buffer; + } +} + +static INLINE void +nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0, + int32_t index_bias) +{ + unsigned attr = nvc0->vertprog->vp.edgeflag; + struct pipe_vertex_element *ve = &nvc0->vertex->element[attr].pipe; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; + struct nv04_resource *buf = nv04_resource(vb->buffer); + unsigned offset = vb->buffer_offset + ve->src_offset; + + ctx->edgeflag.stride = vb->stride; + ctx->edgeflag.data = nouveau_resource_map_offset(&nvc0->base, + buf, offset, NOUVEAU_BO_RD); + if (index_bias) + ctx->edgeflag.data += (intptr_t)index_bias * vb->stride; +} + +static INLINE unsigned +prim_restart_search_i08(const uint8_t *elts, unsigned push, uint8_t index) +{ + unsigned i; + for (i = 0; i < push && elts[i] != index; ++i); + return i; +} + +static INLINE unsigned +prim_restart_search_i16(const uint16_t *elts, unsigned push, uint16_t index) +{ + unsigned i; + for (i = 0; i < push && elts[i] != index; ++i); + return i; +} + +static INLINE unsigned +prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index) +{ + unsigned i; + for (i = 0; i < push && elts[i] != index; ++i); + return i; +} + +static INLINE boolean +ef_value(const struct push_context *ctx, uint32_t index) +{ + float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride]; + return *pf ? TRUE : FALSE; +} + +static INLINE boolean +ef_toggle(struct push_context *ctx) +{ + ctx->edgeflag.value = !ctx->edgeflag.value; + return ctx->edgeflag.value; +} + +static INLINE unsigned +ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n) +{ + unsigned i; + for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); + return i; +} + +static INLINE unsigned +ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n) +{ + unsigned i; + for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); + return i; +} + +static INLINE unsigned +ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n) +{ + unsigned i; + for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); + return i; +} + +static INLINE unsigned +ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n) +{ + unsigned i; + for (i = 0; i < n && ef_value(ctx, start++) == ctx->edgeflag.value; ++i); + return i; +} + +static INLINE void * +nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nouveau_bo *bo; + uint64_t va; + const unsigned size = count * nvc0->vertex->size; + + void *const dest = nouveau_scratch_get(&nvc0->base, size, &va, &bo); + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2); + PUSH_DATAh(push, va); + PUSH_DATA (push, va); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); + PUSH_DATAh(push, va + size - 1); + PUSH_DATA (push, va + size - 1); + + BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, + bo); + nouveau_pushbuf_validate(push); + + return dest; +} + +static void +disp_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) +{ + struct nouveau_pushbuf *push = ctx->push; + struct translate *translate = ctx->translate; + const uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start; + unsigned pos = 0; + + do { + unsigned nR = count; + + if (unlikely(ctx->prim_restart)) + nR = prim_restart_search_i08(elts, nR, ctx->restart_index); + + translate->run_elts8(translate, elts, nR, 0, ctx->instance_id, ctx->dest); + count -= nR; + ctx->dest += nR * ctx->vertex_size; + + while (nR) { + unsigned nE = nR; + + if (unlikely(ctx->edgeflag.enabled)) + nE = ef_toggle_search_i08(ctx, elts, nR); + + PUSH_SPACE(push, 4); + if (likely(nE >= 2)) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, pos); + PUSH_DATA (push, nE); + } else + if (nE) { + if (pos <= 0xff) { + IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos); + } else { + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, pos); + } + } + if (unlikely(nE != nR)) + IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); + + pos += nE; + elts += nE; + nR -= nE; + } + if (count) { + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, ctx->restart_index); + ++elts; + ctx->dest += ctx->vertex_size; + ++pos; + --count; + } + } while (count); +} + +static void +disp_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) +{ + struct nouveau_pushbuf *push = ctx->push; + struct translate *translate = ctx->translate; + const uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start; + unsigned pos = 0; + + do { + unsigned nR = count; + + if (unlikely(ctx->prim_restart)) + nR = prim_restart_search_i16(elts, nR, ctx->restart_index); + + translate->run_elts16(translate, elts, nR, 0, ctx->instance_id, ctx->dest); + count -= nR; + ctx->dest += nR * ctx->vertex_size; + + while (nR) { + unsigned nE = nR; + + if (unlikely(ctx->edgeflag.enabled)) + nE = ef_toggle_search_i16(ctx, elts, nR); + + PUSH_SPACE(push, 4); + if (likely(nE >= 2)) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, pos); + PUSH_DATA (push, nE); + } else + if (nE) { + if (pos <= 0xff) { + IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos); + } else { + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, pos); + } + } + if (unlikely(nE != nR)) + IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); + + pos += nE; + elts += nE; + nR -= nE; + } + if (count) { + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, ctx->restart_index); + ++elts; + ctx->dest += ctx->vertex_size; + ++pos; + --count; + } + } while (count); +} + +static void +disp_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) +{ + struct nouveau_pushbuf *push = ctx->push; + struct translate *translate = ctx->translate; + const uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start; + unsigned pos = 0; + + do { + unsigned nR = count; + + if (unlikely(ctx->prim_restart)) + nR = prim_restart_search_i32(elts, nR, ctx->restart_index); + + translate->run_elts(translate, elts, nR, 0, ctx->instance_id, ctx->dest); + count -= nR; + ctx->dest += nR * ctx->vertex_size; + + while (nR) { + unsigned nE = nR; + + if (unlikely(ctx->edgeflag.enabled)) + nE = ef_toggle_search_i32(ctx, elts, nR); + + PUSH_SPACE(push, 4); + if (likely(nE >= 2)) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, pos); + PUSH_DATA (push, nE); + } else + if (nE) { + if (pos <= 0xff) { + IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos); + } else { + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, pos); + } + } + if (unlikely(nE != nR)) + IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); + + pos += nE; + elts += nE; + nR -= nE; + } + if (count) { + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, ctx->restart_index); + ++elts; + ctx->dest += ctx->vertex_size; + ++pos; + --count; + } + } while (count); +} + +static void +disp_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) +{ + struct nouveau_pushbuf *push = ctx->push; + struct translate *translate = ctx->translate; + unsigned pos = 0; + + translate->run(translate, start, count, 0, ctx->instance_id, ctx->dest); + do { + unsigned nr = count; + + if (unlikely(ctx->edgeflag.enabled)) + nr = ef_toggle_search_seq(ctx, start + pos, nr); + + PUSH_SPACE(push, 4); + if (likely(nr)) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, pos); + PUSH_DATA (push, nr); + } + if (unlikely(nr != count)) + IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); + + pos += nr; + count -= nr; + } while (count); +} + + +#define NVC0_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ + switch (prim) { + NVC0_PRIM_GL_CASE(POINTS); + NVC0_PRIM_GL_CASE(LINES); + NVC0_PRIM_GL_CASE(LINE_LOOP); + NVC0_PRIM_GL_CASE(LINE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLES); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLE_FAN); + NVC0_PRIM_GL_CASE(QUADS); + NVC0_PRIM_GL_CASE(QUAD_STRIP); + NVC0_PRIM_GL_CASE(POLYGON); + NVC0_PRIM_GL_CASE(LINES_ADJACENCY); + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NVC0_PRIM_GL_CASE(PATCHES); */ + default: + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + } +} + +void +nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) +{ + struct push_context ctx; + unsigned i, index_size; + unsigned inst_count = info->instance_count; + unsigned vert_count = info->count; + unsigned prim; + + nvc0_push_context_init(nvc0, &ctx); + + nvc0_vertex_configure_translate(nvc0, info->index_bias); + + if (unlikely(ctx.edgeflag.enabled)) + nvc0_push_map_edgeflag(&ctx, nvc0, info->index_bias); + + ctx.prim_restart = info->primitive_restart; + ctx.restart_index = info->restart_index; + + if (info->indexed) { + nvc0_push_map_idxbuf(&ctx, nvc0); + index_size = nvc0->idxbuf.index_size; + + if (info->primitive_restart) { + BEGIN_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 2); + PUSH_DATA (ctx.push, 1); + PUSH_DATA (ctx.push, info->restart_index); + } else + if (nvc0->state.prim_restart) { + IMMED_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 0); + } + nvc0->state.prim_restart = info->primitive_restart; + } else { + if (unlikely(info->count_from_stream_output)) { + struct pipe_context *pipe = &nvc0->base.pipe; + struct nvc0_so_target *targ; + targ = nvc0_so_target(info->count_from_stream_output); + pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count); + vert_count /= targ->stride; + } + ctx.idxbuf = NULL; /* shut up warnings */ + index_size = 0; + } + + ctx.instance_id = info->start_instance; + + prim = nvc0_prim_gl(info->mode); + do { + PUSH_SPACE(ctx.push, 9); + + ctx.dest = nvc0_push_setup_vertex_array(nvc0, vert_count); + if (unlikely(!ctx.dest)) + break; + + if (unlikely(ctx.need_vertex_id)) + nvc0_push_upload_vertex_ids(&ctx, nvc0, info); + + IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); + BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (ctx.push, prim); + switch (index_size) { + case 1: + disp_vertices_i08(&ctx, info->start, vert_count); + break; + case 2: + disp_vertices_i16(&ctx, info->start, vert_count); + break; + case 4: + disp_vertices_i32(&ctx, info->start, vert_count); + break; + default: + assert(index_size == 0); + disp_vertices_seq(&ctx, info->start, vert_count); + break; + } + PUSH_SPACE(ctx.push, 1); + IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0); + + if (--inst_count) { + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + ++ctx.instance_id; + } + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); + nouveau_scratch_done(&nvc0->base); + } while (inst_count); + + + /* reset state and unmap buffers (no-op) */ + + if (unlikely(!ctx.edgeflag.value)) { + PUSH_SPACE(ctx.push, 1); + IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1); + } + + if (unlikely(ctx.need_vertex_id)) { + PUSH_SPACE(ctx.push, 4); + IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0); + BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(1)), 1); + PUSH_DATA (ctx.push, + NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST | + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); + IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 0); + } + + if (info->indexed) + nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer)); + for (i = 0; i < nvc0->num_vtxbufs; ++i) + nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer)); + + NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1); +} + +static INLINE void +copy_indices_u8(uint32_t *dst, const uint8_t *elts, uint32_t bias, unsigned n) +{ + unsigned i; + for (i = 0; i < n; ++i) + dst[i] = elts[i] + bias; +} + +static INLINE void +copy_indices_u16(uint32_t *dst, const uint16_t *elts, uint32_t bias, unsigned n) +{ + unsigned i; + for (i = 0; i < n; ++i) + dst[i] = elts[i] + bias; +} + +static INLINE void +copy_indices_u32(uint32_t *dst, const uint32_t *elts, uint32_t bias, unsigned n) +{ + unsigned i; + for (i = 0; i < n; ++i) + dst[i] = elts[i] + bias; +} + +static void +nvc0_push_upload_vertex_ids(struct push_context *ctx, + struct nvc0_context *nvc0, + const struct pipe_draw_info *info) + +{ + struct nouveau_pushbuf *push = ctx->push; + struct nouveau_bo *bo; + uint64_t va; + uint32_t *data; + uint32_t format; + unsigned index_size = nvc0->idxbuf.index_size; + unsigned i; + unsigned a = nvc0->vertex->num_elements; + + if (!index_size || info->index_bias) + index_size = 4; + data = (uint32_t *)nouveau_scratch_get(&nvc0->base, + info->count * index_size, &va, &bo); + + BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, + bo); + nouveau_pushbuf_validate(push); + + if (info->indexed) { + if (!info->index_bias) { + memcpy(data, ctx->idxbuf, info->count * index_size); + } else { + switch (nvc0->idxbuf.index_size) { + case 1: + copy_indices_u8(data, ctx->idxbuf, info->index_bias, info->count); + break; + case 2: + copy_indices_u16(data, ctx->idxbuf, info->index_bias, info->count); + break; + default: + copy_indices_u32(data, ctx->idxbuf, info->index_bias, info->count); + break; + } + } + } else { + for (i = 0; i < info->count; ++i) + data[i] = i + (info->start + info->index_bias); + } + + format = (1 << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) | + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT; + + switch (index_size) { + case 1: + format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8; + break; + case 2: + format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16; + break; + default: + format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32; + break; + } + + PUSH_SPACE(push, 12); + + if (unlikely(nvc0->state.instance_elts & 2)) { + nvc0->state.instance_elts &= ~2; + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(1)), 0); + } + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1); + PUSH_DATA (push, format); + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 3); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size); + PUSH_DATAh(push, va); + PUSH_DATA (push, va); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2); + PUSH_DATAh(push, va + info->count * index_size - 1); + PUSH_DATA (push, va + info->count * index_size - 1); + +#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) \ + (((0x80 + (a) * 0x10) / 4) << NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT) + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ID_REPLACE), 1); + PUSH_DATA (push, NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) | 1); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c new file mode 100644 index 00000000000..5871f590e0e --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c @@ -0,0 +1,331 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_video.h" + +#include "util/u_sampler.h" +#include "util/u_format.h" + +static void +nvc0_decoder_decode_bitstream(struct pipe_video_codec *decoder, + struct pipe_video_buffer *video_target, + struct pipe_picture_desc *picture, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes) +{ + struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder; + struct nouveau_vp3_video_buffer *target = (struct nouveau_vp3_video_buffer *)video_target; + uint32_t comm_seq = ++dec->fence_seq; + union pipe_desc desc; + + unsigned vp_caps, is_ref, ret; + struct nouveau_vp3_video_buffer *refs[16] = {}; + + desc.base = picture; + + assert(target->base.buffer_format == PIPE_FORMAT_NV12); + + ret = nvc0_decoder_bsp(dec, desc, target, comm_seq, + num_buffers, data, num_bytes, + &vp_caps, &is_ref, refs); + + /* did we decode bitstream correctly? */ + assert(ret == 2); + + nvc0_decoder_vp(dec, desc, target, comm_seq, vp_caps, is_ref, refs); + nvc0_decoder_ppp(dec, desc, target, comm_seq); +} + +struct pipe_video_codec * +nvc0_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ) +{ + struct nouveau_screen *screen = &((struct nvc0_context *)context)->screen->base; + struct nouveau_vp3_decoder *dec; + struct nouveau_pushbuf **push; + union nouveau_bo_config cfg; + bool kepler = screen->device->chipset >= 0xe0; + + cfg.nvc0.tile_mode = 0x10; + cfg.nvc0.memtype = 0xfe; + + int ret, i; + uint32_t codec = 1, ppp_codec = 3; + uint32_t timeout; + u32 tmp_size = 0; + + if (getenv("XVMC_VL")) + return vl_create_decoder(context, templ); + + if (templ->entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + debug_printf("%x\n", templ->entrypoint); + return NULL; + } + + dec = CALLOC_STRUCT(nouveau_vp3_decoder); + if (!dec) + return NULL; + dec->client = screen->client; + dec->base = *templ; + nouveau_vp3_decoder_init_common(&dec->base); + + if (!kepler) { + dec->bsp_idx = 5; + dec->vp_idx = 6; + dec->ppp_idx = 7; + } else { + dec->bsp_idx = 2; + dec->vp_idx = 2; + dec->ppp_idx = 2; + } + + for (i = 0; i < 3; ++i) + if (i && !kepler) { + dec->channel[i] = dec->channel[0]; + dec->pushbuf[i] = dec->pushbuf[0]; + } else { + void *data; + u32 size; + struct nvc0_fifo nvc0_args = {}; + struct nve0_fifo nve0_args = {}; + + if (!kepler) { + size = sizeof(nvc0_args); + data = &nvc0_args; + } else { + unsigned engine[] = { + NVE0_FIFO_ENGINE_BSP, + NVE0_FIFO_ENGINE_VP, + NVE0_FIFO_ENGINE_PPP + }; + + nve0_args.engine = engine[i]; + size = sizeof(nve0_args); + data = &nve0_args; + } + + ret = nouveau_object_new(&screen->device->object, 0, + NOUVEAU_FIFO_CHANNEL_CLASS, + data, size, &dec->channel[i]); + + if (!ret) + ret = nouveau_pushbuf_new(screen->client, dec->channel[i], 4, + 32 * 1024, true, &dec->pushbuf[i]); + if (ret) + break; + } + push = dec->pushbuf; + + if (!kepler) { + if (!ret) + ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x90b1, NULL, 0, &dec->bsp); + if (!ret) + ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x90b2, NULL, 0, &dec->vp); + if (!ret) + ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x90b3, NULL, 0, &dec->ppp); + } else { + if (!ret) + ret = nouveau_object_new(dec->channel[0], 0x95b1, 0x95b1, NULL, 0, &dec->bsp); + if (!ret) + ret = nouveau_object_new(dec->channel[1], 0x95b2, 0x95b2, NULL, 0, &dec->vp); + if (!ret) + ret = nouveau_object_new(dec->channel[2], 0x90b3, 0x90b3, NULL, 0, &dec->ppp); + } + if (ret) + goto fail; + + BEGIN_NVC0(push[0], SUBC_BSP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[0], dec->bsp->handle); + + BEGIN_NVC0(push[1], SUBC_VP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[1], dec->vp->handle); + + BEGIN_NVC0(push[2], SUBC_PPP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[2], dec->ppp->handle); + + dec->base.context = context; + dec->base.decode_bitstream = nvc0_decoder_decode_bitstream; + + for (i = 0; i < NOUVEAU_VP3_VIDEO_QDEPTH && !ret; ++i) + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0, 1 << 20, &cfg, &dec->bsp_bo[i]); + if (!ret) + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0x100, 4 << 20, &cfg, &dec->inter_bo[0]); + if (!ret) { + if (!kepler) + nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]); + else + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0x100, dec->inter_bo[0]->size, &cfg, + &dec->inter_bo[1]); + } + if (ret) + goto fail; + + switch (u_reduce_video_profile(templ->profile)) { + case PIPE_VIDEO_FORMAT_MPEG12: { + codec = 1; + assert(templ->max_references <= 2); + break; + } + case PIPE_VIDEO_FORMAT_MPEG4: { + codec = 4; + tmp_size = mb(templ->height)*16 * mb(templ->width)*16; + assert(templ->max_references <= 2); + break; + } + case PIPE_VIDEO_FORMAT_VC1: { + ppp_codec = codec = 2; + tmp_size = mb(templ->height)*16 * mb(templ->width)*16; + assert(templ->max_references <= 2); + break; + } + case PIPE_VIDEO_FORMAT_MPEG4_AVC: { + codec = 3; + dec->tmp_stride = 16 * mb_half(templ->width) * nouveau_vp3_video_align(templ->height) * 3 / 2; + tmp_size = dec->tmp_stride * (templ->max_references + 1); + assert(templ->max_references <= 16); + break; + } + default: + fprintf(stderr, "invalid codec\n"); + goto fail; + } + + if (screen->device->chipset < 0xd0) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + 0x4000, &cfg, &dec->fw_bo); + if (ret) + goto fail; + + ret = nouveau_vp3_load_firmware(dec, templ->profile, screen->device->chipset); + if (ret) + goto fw_fail; + } + + if (codec != 3) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + 0x400, &cfg, &dec->bitplane_bo); + if (ret) + goto fail; + } + + dec->ref_stride = mb(templ->width)*16 * (mb_half(templ->height)*32 + nouveau_vp3_video_align(templ->height)/2); + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + dec->ref_stride * (templ->max_references+2) + tmp_size, + &cfg, &dec->ref_bo); + if (ret) + goto fail; + + timeout = 0; + + BEGIN_NVC0(push[0], SUBC_BSP(0x200), 2); + PUSH_DATA (push[0], codec); + PUSH_DATA (push[0], timeout); + + BEGIN_NVC0(push[1], SUBC_VP(0x200), 2); + PUSH_DATA (push[1], codec); + PUSH_DATA (push[1], timeout); + + BEGIN_NVC0(push[2], SUBC_PPP(0x200), 2); + PUSH_DATA (push[2], ppp_codec); + PUSH_DATA (push[2], timeout); + + ++dec->fence_seq; + +#if NOUVEAU_VP3_DEBUG_FENCE + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART|NOUVEAU_BO_MAP, + 0, 0x1000, NULL, &dec->fence_bo); + if (ret) + goto fail; + + nouveau_bo_map(dec->fence_bo, NOUVEAU_BO_RDWR, screen->client); + dec->fence_map = dec->fence_bo->map; + dec->fence_map[0] = dec->fence_map[4] = dec->fence_map[8] = 0; + dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map))); + + /* So lets test if the fence is working? */ + nouveau_pushbuf_space(push[0], 6, 1, 0); + PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); + BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3); + PUSH_DATAh(push[0], dec->fence_bo->offset); + PUSH_DATA (push[0], dec->fence_bo->offset); + PUSH_DATA (push[0], dec->fence_seq); + + BEGIN_NVC0(push[0], SUBC_BSP(0x304), 1); + PUSH_DATA (push[0], 0); + PUSH_KICK (push[0]); + + nouveau_pushbuf_space(push[1], 6, 1, 0); + PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); + BEGIN_NVC0(push[1], SUBC_VP(0x240), 3); + PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push[1], (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push[1], dec->fence_seq); + + BEGIN_NVC0(push[1], SUBC_VP(0x304), 1); + PUSH_DATA (push[1], 0); + PUSH_KICK (push[1]); + + nouveau_pushbuf_space(push[2], 6, 1, 0); + PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); + BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3); + PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push[2], (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push[2], dec->fence_seq); + + BEGIN_NVC0(push[2], SUBC_PPP(0x304), 1); + PUSH_DATA (push[2], 0); + PUSH_KICK (push[2]); + + usleep(100); + while (dec->fence_seq > dec->fence_map[0] || + dec->fence_seq > dec->fence_map[4] || + dec->fence_seq > dec->fence_map[8]) { + debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); + usleep(100); + } + debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); +#endif + + return &dec->base; + +fw_fail: + debug_printf("Cannot create decoder without firmware..\n"); + dec->base.destroy(&dec->base); + return NULL; + +fail: + debug_printf("Creation failed: %s (%i)\n", strerror(-ret), ret); + dec->base.destroy(&dec->base); + return NULL; +} + +struct pipe_video_buffer * +nvc0_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *templat) +{ + return nouveau_vp3_video_buffer_create( + pipe, templat, NVC0_RESOURCE_FLAG_VIDEO); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video.h b/src/gallium/drivers/nouveau/nvc0/nvc0_video.h new file mode 100644 index 00000000000..9ee0280f8ea --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.h @@ -0,0 +1,48 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_screen.h" +#include "nouveau_vp3_video.h" + +#include "vl/vl_decoder.h" +#include "vl/vl_types.h" + +#include "util/u_video.h" + +extern unsigned +nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, + unsigned comm_seq, unsigned num_buffers, + const void *const *data, const unsigned *num_bytes, + unsigned *vp_caps, unsigned *is_ref, + struct nouveau_vp3_video_buffer *refs[16]); + +extern void +nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, unsigned comm_seq, + unsigned caps, unsigned is_ref, + struct nouveau_vp3_video_buffer *refs[16]); + +extern void +nvc0_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, unsigned comm_seq); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c new file mode 100644 index 00000000000..40696fa779f --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c @@ -0,0 +1,155 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_video.h" + +#if NOUVEAU_VP3_DEBUG_FENCE +static void dump_comm_bsp(struct comm *comm) +{ + unsigned idx = comm->bsp_cur_index & 0xf; + debug_printf("Cur seq: %x, bsp byte ofs: %x\n", comm->bsp_cur_index, comm->byte_ofs); + debug_printf("Status: %08x, pos: %08x\n", comm->status[idx], comm->pos[idx]); +} +#endif + +unsigned +nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, + unsigned comm_seq, unsigned num_buffers, + const void *const *data, const unsigned *num_bytes, + unsigned *vp_caps, unsigned *is_ref, + struct nouveau_vp3_video_buffer *refs[16]) +{ + struct nouveau_pushbuf *push = dec->pushbuf[0]; + enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); + uint32_t bsp_addr, comm_addr, inter_addr; + uint32_t slice_size, bucket_size, ring_size; + uint32_t caps; + int ret; + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH]; + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; + unsigned fence_extra = 0; + struct nouveau_pushbuf_refn bo_refs[] = { + { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, + { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, +#if NOUVEAU_VP3_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + + if (!dec->bitplane_bo) + num_refs--; + +#if NOUVEAU_VP3_DEBUG_FENCE + fence_extra = 4; +#endif + + ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client); + if (ret) { + debug_printf("map failed: %i %s\n", ret, strerror(-ret)); + return -1; + } + + caps = nouveau_vp3_bsp(dec, desc, target, comm_seq, + num_buffers, data, num_bytes); + + nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs); + + nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 7) + fence_extra + 2, num_refs, 0); + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + bsp_addr = bsp_bo->offset >> 8; + inter_addr = inter_bo->offset >> 8; + +#if NOUVEAU_VP3_DEBUG_FENCE + memset(dec->comm, 0, 0x200); + comm_addr = (dec->fence_bo->offset + COMM_OFFSET) >> 8; +#else + comm_addr = bsp_addr + (COMM_OFFSET>>8); +#endif + + BEGIN_NVC0(push, SUBC_BSP(0x700), 5); + PUSH_DATA (push, caps); // 700 cmd + PUSH_DATA (push, bsp_addr + 1); // 704 strparm_bsp + PUSH_DATA (push, bsp_addr + 7); // 708 str addr + PUSH_DATA (push, comm_addr); // 70c comm + PUSH_DATA (push, comm_seq); // 710 seq + + if (codec != PIPE_VIDEO_FORMAT_MPEG4_AVC) { + u32 bitplane_addr; + + bitplane_addr = dec->bitplane_bo->offset >> 8; + + nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size); + BEGIN_NVC0(push, SUBC_BSP(0x400), 6); + PUSH_DATA (push, bsp_addr); // 400 picparm addr + PUSH_DATA (push, inter_addr); // 404 interparm addr + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 408 interdata addr + PUSH_DATA (push, ring_size << 8); // 40c interdata_size + PUSH_DATA (push, bitplane_addr); // 410 BITPLANE_DATA + PUSH_DATA (push, 0x400); // 414 BITPLANE_DATA_SIZE + } else { + nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); + BEGIN_NVC0(push, SUBC_BSP(0x400), 8); + PUSH_DATA (push, bsp_addr); // 400 picparm addr + PUSH_DATA (push, inter_addr); // 404 interparm addr + PUSH_DATA (push, slice_size << 8); // 408 interparm size? + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 40c interdata addr + PUSH_DATA (push, ring_size << 8); // 410 interdata size + PUSH_DATA (push, inter_addr + slice_size); // 414 bucket? + PUSH_DATA (push, bucket_size << 8); // 418 bucket size? unshifted.. + PUSH_DATA (push, 0); // 41c targets + // TODO: Double check 414 / 418 with nvidia trace + } + +#if NOUVEAU_VP3_DEBUG_FENCE + BEGIN_NVC0(push, SUBC_BSP(0x240), 3); + PUSH_DATAh(push, dec->fence_bo->offset); + PUSH_DATA (push, dec->fence_bo->offset); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NVC0(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK (push); + + { + unsigned spin = 0; + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) { + debug_printf("b%u: %u\n", dec->fence_seq, dec->fence_map[0]); + dump_comm_bsp(dec->comm); + } + } while (dec->fence_seq > dec->fence_map[0]); + } + + dump_comm_bsp(dec->comm); + return dec->comm->status[comm_seq & 0xf]; +#else + BEGIN_NVC0(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); + return 2; +#endif +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c new file mode 100644 index 00000000000..4ceec4fbffc --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c @@ -0,0 +1,143 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_video.h" + +static void +nvc0_decoder_setup_ppp(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target, uint32_t low700) { + struct nouveau_pushbuf *push = dec->pushbuf[2]; + + uint32_t stride_in = mb(dec->base.width); + uint32_t stride_out = mb(target->resources[0]->width0); + uint32_t dec_h = mb(dec->base.height); + uint32_t dec_w = mb(dec->base.width); + uint64_t in_addr; + uint32_t y2, cbcr, cbcr2, i; + struct nouveau_pushbuf_refn bo_refs[] = { + { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { dec->ref_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, +#if NOUVEAU_VP3_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + }; + unsigned num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + + for (i = 0; i < 2; ++i) { + struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i]; + bo_refs[i].bo = mt->base.bo; + } + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + nouveau_vp3_ycbcr_offsets(dec, &y2, &cbcr, &cbcr2); + + BEGIN_NVC0(push, SUBC_PPP(0x700), 10); + in_addr = nouveau_vp3_video_addr(dec, target) >> 8; + + PUSH_DATA (push, (stride_out << 24) | (stride_out << 16) | low700); // 700 + PUSH_DATA (push, (stride_in << 24) | (stride_in << 16) | (dec_h << 8) | dec_w); // 704 + assert(dec_w == stride_in); + + /* Input: */ + PUSH_DATA (push, in_addr); // 708 + PUSH_DATA (push, in_addr + y2); // 70c + PUSH_DATA (push, in_addr + cbcr); // 710 + PUSH_DATA (push, in_addr + cbcr2); // 714 + + for (i = 0; i < 2; ++i) { + struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i]; + + PUSH_DATA (push, mt->base.address >> 8); + PUSH_DATA (push, (mt->base.address + mt->total_size/2) >> 8); + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + } +} + +static uint32_t +nvc0_decoder_vc1_ppp(struct nouveau_vp3_decoder *dec, struct pipe_vc1_picture_desc *desc, struct nouveau_vp3_video_buffer *target) { + struct nouveau_pushbuf *push = dec->pushbuf[2]; + + nvc0_decoder_setup_ppp(dec, target, 0x1412); + assert(!desc->deblockEnable); + assert(!(dec->base.width & 0xf)); + assert(!(dec->base.height & 0xf)); + + BEGIN_NVC0(push, SUBC_PPP(0x400), 1); + PUSH_DATA (push, desc->pquant << 11); + + // 728 = wtf? + return 0x10; +} + +void +nvc0_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct nouveau_vp3_video_buffer *target, unsigned comm_seq) { + enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); + struct nouveau_pushbuf *push = dec->pushbuf[2]; + unsigned ppp_caps = 0x10; + unsigned fence_extra = 0; + +#if NOUVEAU_VP3_DEBUG_FENCE + fence_extra = 4; +#endif + + nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0); + + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG12: { + unsigned mpeg2 = dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1; + nvc0_decoder_setup_ppp(dec, target, 0x1410 | mpeg2); + break; + } + case PIPE_VIDEO_FORMAT_MPEG4: nvc0_decoder_setup_ppp(dec, target, 0x1414); break; + case PIPE_VIDEO_FORMAT_VC1: ppp_caps = nvc0_decoder_vc1_ppp(dec, desc.vc1, target); break; + case PIPE_VIDEO_FORMAT_MPEG4_AVC: nvc0_decoder_setup_ppp(dec, target, 0x1413); break; + default: assert(0); + } + BEGIN_NVC0(push, SUBC_PPP(0x734), 2); + PUSH_DATA (push, comm_seq); + PUSH_DATA (push, ppp_caps); + +#if NOUVEAU_VP3_DEBUG_FENCE + BEGIN_NVC0(push, SUBC_PPP(0x240), 3); + PUSH_DATAh(push, (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push, (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NVC0(push, SUBC_PPP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK (push); + + { + unsigned spin = 0; + + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) + debug_printf("p%u: %u\n", dec->fence_seq, dec->fence_map[8]); + } while (dec->fence_seq > dec->fence_map[8]); + } +#else + BEGIN_NVC0(push, SUBC_PPP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); +#endif +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c new file mode 100644 index 00000000000..0d152b9624f --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c @@ -0,0 +1,202 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_video.h" +#include <sys/mman.h> + +#if NOUVEAU_VP3_DEBUG_FENCE +static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq, + struct nouveau_bo *inter_bo, unsigned slice_size) +{ + unsigned i, idx = comm->pvp_cur_index & 0xf; + debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); +#if 0 + debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); + debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); + + for (i = 0; i != comm->irq_index; ++i) + debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); + for (i = 0; i != comm->parse_endpos_index; ++i) + debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); +#endif + debug_printf("mb_y = %u\n", comm->mb_y[idx]); + if (comm->status_vp[idx] == 1) + return; + + if ((comm->pvp_stage & 0xff) != 0xff) { + unsigned *map; + assert(nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client) >= 0); + map = inter_bo->map; + for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { + debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); + } + munmap(inter_bo->map, inter_bo->size); + inter_bo->map = NULL; + } + assert((comm->pvp_stage & 0xff) == 0xff); +} +#endif + +static void +nvc0_decoder_kick_ref(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target) +{ + dec->refs[target->valid_ref].vidbuf = NULL; + dec->refs[target->valid_ref].last_used = 0; +// debug_printf("Unreffed %p\n", target); +} + +void +nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, unsigned comm_seq, + unsigned caps, unsigned is_ref, + struct nouveau_vp3_video_buffer *refs[16]) +{ + struct nouveau_pushbuf *push = dec->pushbuf[1]; + uint32_t bsp_addr, comm_addr, inter_addr, ucode_addr, pic_addr[17], last_addr, null_addr; + uint32_t slice_size, bucket_size, ring_size, i; + enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH]; + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; + u32 fence_extra = 0, codec_extra = 0; + struct nouveau_pushbuf_refn bo_refs[] = { + { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, +#if NOUVEAU_VP3_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + { dec->fw_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs) - !dec->fw_bo; + +#if NOUVEAU_VP3_DEBUG_FENCE + fence_extra = 4; +#endif + + if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) { + nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); + codec_extra += 2; + } else + nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size); + + if (dec->base.max_references > 2) + codec_extra += 1 + (dec->base.max_references - 2); + + pic_addr[16] = nouveau_vp3_video_addr(dec, target) >> 8; + last_addr = null_addr = nouveau_vp3_video_addr(dec, NULL) >> 8; + + for (i = 0; i < dec->base.max_references; ++i) { + if (!refs[i]) + pic_addr[i] = last_addr; + else if (dec->refs[refs[i]->valid_ref].vidbuf == refs[i]) + last_addr = pic_addr[i] = nouveau_vp3_video_addr(dec, refs[i]) >> 8; + else + pic_addr[i] = null_addr; + } + if (!is_ref) + nvc0_decoder_kick_ref(dec, target); + + nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) + + 6 + codec_extra + fence_extra + 2, num_refs, 0); + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + bsp_addr = bsp_bo->offset >> 8; +#if NOUVEAU_VP3_DEBUG_FENCE + comm_addr = (dec->fence_bo->offset + COMM_OFFSET)>>8; +#else + comm_addr = bsp_addr + (COMM_OFFSET>>8); +#endif + inter_addr = inter_bo->offset >> 8; + if (dec->fw_bo) + ucode_addr = dec->fw_bo->offset >> 8; + else + ucode_addr = 0; + + BEGIN_NVC0(push, SUBC_VP(0x700), 7); + PUSH_DATA (push, caps); // 700 + PUSH_DATA (push, comm_seq); // 704 + PUSH_DATA (push, 0); // 708 fuc targets, ignored for nvc0 + PUSH_DATA (push, dec->fw_sizes); // 70c + PUSH_DATA (push, bsp_addr+(VP_OFFSET>>8)); // 710 picparm_addr + PUSH_DATA (push, inter_addr); // 714 inter_parm + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 718 inter_data_ofs + + if (bucket_size) { + uint64_t tmpimg_addr = dec->ref_bo->offset + dec->ref_stride * (dec->base.max_references+2); + + BEGIN_NVC0(push, SUBC_VP(0x71c), 2); + PUSH_DATA (push, tmpimg_addr >> 8); // 71c + PUSH_DATA (push, inter_addr + slice_size); // 720 bucket_ofs + } + + BEGIN_NVC0(push, SUBC_VP(0x724), 5); + PUSH_DATA (push, comm_addr); // 724 + PUSH_DATA (push, ucode_addr); // 728 + PUSH_DATA (push, pic_addr[16]); // 734 + PUSH_DATA (push, pic_addr[0]); // 72c + PUSH_DATA (push, pic_addr[1]); // 730 + + if (dec->base.max_references > 2) { + int i; + + BEGIN_NVC0(push, SUBC_VP(0x400), dec->base.max_references - 2); + for (i = 2; i < dec->base.max_references; ++i) { + assert(0x400 + (i - 2) * 4 < 0x438); + PUSH_DATA (push, pic_addr[i]); + } + } + + if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) { + BEGIN_NVC0(push, SUBC_VP(0x438), 1); + PUSH_DATA (push, desc.h264->slice_count); + } + + //debug_printf("Decoding %08lx with %08lx and %08lx\n", pic_addr[16], pic_addr[0], pic_addr[1]); + +#if NOUVEAU_VP3_DEBUG_FENCE + BEGIN_NVC0(push, SUBC_VP(0x240), 3); + PUSH_DATAh(push, (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push, (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NVC0(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK(push); + + { + unsigned spin = 0; + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) { + debug_printf("v%u: %u\n", dec->fence_seq, dec->fence_map[4]); + dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8); + } + } while (dec->fence_seq > dec->fence_map[4]); + } + dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8); +#else + BEGIN_NVC0(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); +#endif +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h new file mode 100644 index 00000000000..3514d9dc3d0 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h @@ -0,0 +1,144 @@ + +#ifndef __NVC0_WINSYS_H__ +#define __NVC0_WINSYS_H__ + +#include <stdint.h> +#include <unistd.h> + +#include "pipe/p_defines.h" + +#include "nouveau_winsys.h" +#include "nouveau_buffer.h" + +#ifndef NV04_PFIFO_MAX_PACKET_LEN +#define NV04_PFIFO_MAX_PACKET_LEN 2047 +#endif + + +static INLINE void +nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin, + unsigned flags, struct nouveau_bo *bo) +{ + nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL; +} + +static INLINE void +nvc0_add_resident(struct nouveau_bufctx *bufctx, int bin, + struct nv04_resource *res, unsigned flags) +{ + struct nouveau_bufref *ref = + nouveau_bufctx_refn(bufctx, bin, res->bo, flags | res->domain); + ref->priv = res; + ref->priv_data = flags; +} + +#define BCTX_REFN_bo(ctx, bin, fl, bo) \ + nv50_add_bufctx_resident_bo(ctx, NVC0_BIND_##bin, fl, bo); + +#define BCTX_REFN(bctx, bin, res, acc) \ + nvc0_add_resident(bctx, NVC0_BIND_##bin, res, NOUVEAU_BO_##acc) + +static INLINE void +PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) +{ + struct nouveau_pushbuf_refn ref = { bo, flags }; + nouveau_pushbuf_refn(push, &ref, 1); +} + + +#define SUBC_3D(m) 0, (m) +#define NVC0_3D(n) SUBC_3D(NVC0_3D_##n) +#define NVE4_3D(n) SUBC_3D(NVE4_3D_##n) + +#define SUBC_COMPUTE(m) 1, (m) +#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n) +#define NVE4_COMPUTE(n) SUBC_COMPUTE(NVE4_COMPUTE_##n) + +#define SUBC_M2MF(m) 2, (m) +#define SUBC_P2MF(m) 2, (m) +#define NVC0_M2MF(n) SUBC_M2MF(NVC0_M2MF_##n) +#define NVE4_P2MF(n) SUBC_P2MF(NVE4_P2MF_##n) + +#define SUBC_2D(m) 3, (m) +#define NVC0_2D(n) SUBC_2D(NVC0_2D_##n) + +#define SUBC_COPY(m) 4, (m) +#define NVE4_COPY(m) SUBC_COPY(NVE4_COPY_##n) + +#define SUBC_SW(m) 7, (m) + +static INLINE uint32_t +NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size) +{ + return 0x20000000 | (size << 16) | (subc << 13) | (mthd >> 2); +} + +static INLINE uint32_t +NVC0_FIFO_PKHDR_NI(int subc, int mthd, unsigned size) +{ + return 0x60000000 | (size << 16) | (subc << 13) | (mthd >> 2); +} + +static INLINE uint32_t +NVC0_FIFO_PKHDR_IL(int subc, int mthd, uint8_t data) +{ + return 0x80000000 | (data << 16) | (subc << 13) | (mthd >> 2); +} + +static INLINE uint32_t +NVC0_FIFO_PKHDR_1I(int subc, int mthd, unsigned size) +{ + return 0xa0000000 | (size << 16) | (subc << 13) | (mthd >> 2); +} + + +static INLINE uint8_t +nouveau_bo_memtype(const struct nouveau_bo *bo) +{ + return bo->config.nvc0.memtype; +} + + +static INLINE void +PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data) +{ + *push->cur++ = (uint32_t)(data >> 32); +} + +static INLINE void +BEGIN_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size) +{ +#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, size + 1); +#endif + PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(subc, mthd, size)); +} + +static INLINE void +BEGIN_NIC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size) +{ +#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, size + 1); +#endif + PUSH_DATA (push, NVC0_FIFO_PKHDR_NI(subc, mthd, size)); +} + +static INLINE void +BEGIN_1IC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size) +{ +#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, size + 1); +#endif + PUSH_DATA (push, NVC0_FIFO_PKHDR_1I(subc, mthd, size)); +} + +static INLINE void +IMMED_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, uint8_t data) +{ +#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, 1); +#endif + PUSH_DATA (push, NVC0_FIFO_PKHDR_IL(subc, mthd, data)); +} + +#endif /* __NVC0_WINSYS_H__ */ diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c new file mode 100644 index 00000000000..06c914fb5e6 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -0,0 +1,652 @@ +/* + * Copyright 2012 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christoph Bumiller + */ + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_compute.h" +#include "nvc0/nve4_compute.h" + +#include "codegen/nv50_ir_driver.h" + +#ifdef DEBUG +static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *); +#endif + + +int +nve4_screen_compute_setup(struct nvc0_screen *screen, + struct nouveau_pushbuf *push) +{ + struct nouveau_device *dev = screen->base.device; + struct nouveau_object *chan = screen->base.channel; + unsigned i; + int ret; + uint32_t obj_class; + + switch (dev->chipset & 0xf0) { + case 0xf0: + obj_class = NVF0_COMPUTE_CLASS; /* GK110 */ + break; + case 0xe0: + obj_class = NVE4_COMPUTE_CLASS; /* GK104 */ + break; + default: + NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); + return -1; + } + + ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0, + &screen->compute); + if (ret) { + NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); + return ret; + } + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, NVE4_CP_PARAM_SIZE, NULL, + &screen->parm); + if (ret) + return ret; + + BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->compute->oclass); + + BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->tls->offset); + PUSH_DATA (push, screen->tls->offset); + /* No idea why there are 2. Divide size by 2 to be safe. + * Actually this might be per-MP TEMP size and looks like I'm only using + * 2 MPs instead of all 8. + */ + BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(0)), 3); + PUSH_DATAh(push, screen->tls->size / screen->mp_count); + PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); + PUSH_DATA (push, 0xff); + BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(1)), 3); + PUSH_DATAh(push, screen->tls->size / screen->mp_count); + PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); + PUSH_DATA (push, 0xff); + + /* Unified address space ? Who needs that ? Certainly not OpenCL. + * + * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be + * accessible. We cannot prevent that at the moment, so expect failure. + */ + BEGIN_NVC0(push, NVE4_COMPUTE(LOCAL_BASE), 1); + PUSH_DATA (push, 1 << 24); + BEGIN_NVC0(push, NVE4_COMPUTE(SHARED_BASE), 1); + PUSH_DATA (push, 2 << 24); + + BEGIN_NVC0(push, NVE4_COMPUTE(CODE_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->text->offset); + PUSH_DATA (push, screen->text->offset); + + BEGIN_NVC0(push, SUBC_COMPUTE(0x0310), 1); + PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300); + + /* NOTE: these do not affect the state used by the 3D object */ + BEGIN_NVC0(push, NVE4_COMPUTE(TIC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset); + PUSH_DATA (push, screen->txc->offset); + PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); + BEGIN_NVC0(push, NVE4_COMPUTE(TSC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset + 65536); + PUSH_DATA (push, screen->txc->offset + 65536); + PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); + + if (obj_class >= NVF0_COMPUTE_CLASS) { + BEGIN_NVC0(push, SUBC_COMPUTE(0x0248), 1); + PUSH_DATA (push, 0x100); + BEGIN_NIC0(push, SUBC_COMPUTE(0x0248), 63); + for (i = 63; i >= 1; --i) + PUSH_DATA(push, 0x38000 | i); + IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0); + IMMED_NVC0(push, SUBC_COMPUTE(0x518), 0); + } + + BEGIN_NVC0(push, NVE4_COMPUTE(TEX_CB_INDEX), 1); + PUSH_DATA (push, 0); /* does not interefere with 3D */ + + if (obj_class >= NVF0_COMPUTE_CLASS) + IMMED_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + + /* MS sample coordinate offsets: these do not work with _ALT modes ! */ + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS); + PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 64); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + PUSH_DATA (push, 0); /* 0 */ + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); /* 1 */ + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); /* 2 */ + PUSH_DATA (push, 1); + PUSH_DATA (push, 1); /* 3 */ + PUSH_DATA (push, 1); + PUSH_DATA (push, 2); /* 4 */ + PUSH_DATA (push, 0); + PUSH_DATA (push, 3); /* 5 */ + PUSH_DATA (push, 0); + PUSH_DATA (push, 2); /* 6 */ + PUSH_DATA (push, 1); + PUSH_DATA (push, 3); /* 7 */ + PUSH_DATA (push, 1); + +#ifdef DEBUG + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); + PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 28); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 8); + PUSH_DATA (push, 1); + PUSH_DATA (push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO); + PUSH_DATAh(push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO); + PUSH_DATA (push, screen->tls->offset); + PUSH_DATAh(push, screen->tls->offset); + PUSH_DATA (push, screen->tls->size / 2); /* MP TEMP block size */ + PUSH_DATA (push, screen->tls->size / 2 / 64); /* warp TEMP block size */ + PUSH_DATA (push, 0); /* warp cfstack size */ +#endif + + BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); + + return 0; +} + + +static void +nve4_compute_validate_surfaces(struct nvc0_context *nvc0) +{ + struct nvc0_screen *screen = nvc0->screen; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv50_surface *sf; + struct nv04_resource *res; + uint32_t mask; + unsigned i; + const unsigned t = 1; + + mask = nvc0->surfaces_dirty[t]; + while (mask) { + i = ffs(mask) - 1; + mask &= ~(1 << i); + + /* + * NVE4's surface load/store instructions receive all the information + * directly instead of via binding points, so we have to supply them. + */ + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i)); + PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i)); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 64); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + + nve4_set_surface_info(push, nvc0->surfaces[t][i], screen); + + sf = nv50_surface(nvc0->surfaces[t][i]); + if (sf) { + res = nv04_resource(sf->base.texture); + + if (sf->base.writable) + BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR); + else + BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD); + } + } + if (nvc0->surfaces_dirty[t]) { + BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); + } + + /* re-reference non-dirty surfaces */ + mask = nvc0->surfaces_valid[t] & ~nvc0->surfaces_dirty[t]; + while (mask) { + i = ffs(mask) - 1; + mask &= ~(1 << i); + + sf = nv50_surface(nvc0->surfaces[t][i]); + res = nv04_resource(sf->base.texture); + + if (sf->base.writable) + BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR); + else + BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD); + } + + nvc0->surfaces_dirty[t] = 0; +} + + +/* Thankfully, textures with samplers follow the normal rules. */ +static void +nve4_compute_validate_samplers(struct nvc0_context *nvc0) +{ + boolean need_flush = nve4_validate_tsc(nvc0, 5); + if (need_flush) { + BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, 0); + } +} +/* (Code duplicated at bottom for various non-convincing reasons. + * E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC + * entries to avoid a subchannel switch. + * Same for texture cache flushes. + * Also, the bufctx differs, and more IFs in the 3D version looks ugly.) + */ +static void nve4_compute_validate_textures(struct nvc0_context *); + +static void +nve4_compute_set_tex_handles(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + uint64_t address; + const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE); + unsigned i, n; + uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s]; + + if (!dirty) + return; + i = ffs(dirty) - 1; + n = util_logbase2(dirty) + 1 - i; + assert(n); + + address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i); + + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, n * 4); + PUSH_DATA (push, 0x1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + n); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + PUSH_DATAp(push, &nvc0->tex_handles[s][i], n); + + BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); + + nvc0->textures_dirty[s] = 0; + nvc0->samplers_dirty[s] = 0; +} + + +static boolean +nve4_compute_state_validate(struct nvc0_context *nvc0) +{ + if (!nvc0_compute_validate_program(nvc0)) + return FALSE; + if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES) + nve4_compute_validate_textures(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS) + nve4_compute_validate_samplers(nvc0); + if (nvc0->dirty_cp & (NVC0_NEW_CP_TEXTURES | NVC0_NEW_CP_SAMPLERS)) + nve4_compute_set_tex_handles(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_SURFACES) + nve4_compute_validate_surfaces(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_GLOBALS) + nvc0_validate_global_residents(nvc0, + nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL); + + nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE); + + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp); + if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf))) + return FALSE; + if (unlikely(nvc0->state.flushed)) + nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE); + + return TRUE; +} + + +static void +nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input, + const uint *block_layout, + const uint *grid_layout) +{ + struct nvc0_screen *screen = nvc0->screen; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *cp = nvc0->compprog; + + if (cp->parm_size) { + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->parm->offset); + PUSH_DATA (push, screen->parm->offset); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, cp->parm_size); + PUSH_DATA (push, 0x1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4)); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + PUSH_DATAp(push, input, cp->parm_size / 4); + } + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0)); + PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0)); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 7 * 4); + PUSH_DATA (push, 0x1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + 7); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + PUSH_DATAp(push, block_layout, 3); + PUSH_DATAp(push, grid_layout, 3); + PUSH_DATA (push, 0); + + BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); +} + +static INLINE uint8_t +nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size) +{ + if (shared_size > (32 << 10)) + return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1; + if (shared_size > (16 << 10)) + return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1; + return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1; +} + +static void +nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, + struct nve4_cp_launch_desc *desc, + uint32_t label, + const uint *block_layout, + const uint *grid_layout) +{ + const struct nvc0_screen *screen = nvc0->screen; + const struct nvc0_program *cp = nvc0->compprog; + unsigned i; + + nve4_cp_launch_desc_init_default(desc); + + desc->entry = nvc0_program_symbol_offset(cp, label); + + desc->griddim_x = grid_layout[0]; + desc->griddim_y = grid_layout[1]; + desc->griddim_z = grid_layout[2]; + desc->blockdim_x = block_layout[0]; + desc->blockdim_y = block_layout[1]; + desc->blockdim_z = block_layout[2]; + + desc->shared_size = align(cp->cp.smem_size, 0x100); + desc->local_size_p = align(cp->cp.lmem_size, 0x10); + desc->local_size_n = 0; + desc->cstack_size = 0x800; + desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size); + + desc->gpr_alloc = cp->num_gprs; + desc->bar_alloc = cp->num_barriers; + + for (i = 0; i < 7; ++i) { + const unsigned s = 5; + if (nvc0->constbuf[s][i].u.buf) + nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]); + } + nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE); +} + +static INLINE struct nve4_cp_launch_desc * +nve4_compute_alloc_launch_desc(struct nouveau_context *nv, + struct nouveau_bo **pbo, uint64_t *pgpuaddr) +{ + uint8_t *ptr = nouveau_scratch_get(nv, 512, pgpuaddr, pbo); + if (!ptr) + return NULL; + if (*pgpuaddr & 255) { + unsigned adj = 256 - (*pgpuaddr & 255); + ptr += adj; + *pgpuaddr += adj; + } + return (struct nve4_cp_launch_desc *)ptr; +} + +void +nve4_launch_grid(struct pipe_context *pipe, + const uint *block_layout, const uint *grid_layout, + uint32_t label, + const void *input) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nve4_cp_launch_desc *desc; + uint64_t desc_gpuaddr; + struct nouveau_bo *desc_bo; + int ret; + + desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr); + if (!desc) { + ret = -1; + goto out; + } + BCTX_REFN_bo(nvc0->bufctx_cp, CP_DESC, NOUVEAU_BO_GART | NOUVEAU_BO_RD, + desc_bo); + + ret = !nve4_compute_state_validate(nvc0); + if (ret) + goto out; + + nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout); +#ifdef DEBUG + if (debug_get_num_option("NV50_PROG_DEBUG", 0)) + nve4_compute_dump_launch_desc(desc); +#endif + + nve4_compute_upload_input(nvc0, input, block_layout, grid_layout); + + /* upload descriptor and flush */ +#if 0 + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, desc_gpuaddr); + PUSH_DATA (push, desc_gpuaddr); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 256); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (256 / 4)); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1)); + PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4); + BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE); +#endif + BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH_DESC_ADDRESS), 1); + PUSH_DATA (push, desc_gpuaddr >> 8); + BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH), 1); + PUSH_DATA (push, 0x3); + BEGIN_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + +out: + if (ret) + NOUVEAU_ERR("Failed to launch grid !\n"); + nouveau_scratch_done(&nvc0->base); + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC); +} + + +#define NVE4_TIC_ENTRY_INVALID 0x000fffff + +static void +nve4_compute_validate_textures(struct nvc0_context *nvc0) +{ + struct nouveau_bo *txc = nvc0->screen->txc; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const unsigned s = 5; + unsigned i; + uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX]; + unsigned n[2] = { 0, 0 }; + + for (i = 0; i < nvc0->num_textures[s]; ++i) { + struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); + struct nv04_resource *res; + const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i)); + + if (!tic) { + nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; + continue; + } + res = nv04_resource(tic->pipe.texture); + + if (tic->id < 0) { + tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); + + PUSH_SPACE(push, 16); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, txc->offset + (tic->id * 32)); + PUSH_DATA (push, txc->offset + (tic->id * 32)); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 32); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 9); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + PUSH_DATAp(push, &tic->tic[0], 8); + + commands[0][n[0]++] = (tic->id << 4) | 1; + } else + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + commands[1][n[1]++] = (tic->id << 4) | 1; + } + nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID; + nvc0->tex_handles[s][i] |= tic->id; + if (dirty) + BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD); + } + for (; i < nvc0->state.num_textures[s]; ++i) + nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; + + if (n[0]) { + BEGIN_NIC0(push, NVE4_COMPUTE(TIC_FLUSH), n[0]); + PUSH_DATAp(push, commands[0], n[0]); + } + if (n[1]) { + BEGIN_NIC0(push, NVE4_COMPUTE(TEX_CACHE_CTL), n[1]); + PUSH_DATAp(push, commands[1], n[1]); + } + + nvc0->state.num_textures[s] = nvc0->num_textures[s]; +} + + +#ifdef DEBUG +static const char *nve4_cache_split_name(unsigned value) +{ + switch (value) { + case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1"; + case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1"; + case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1"; + default: + return "(invalid)"; + } +} + +static void +nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc) +{ + const uint32_t *data = (const uint32_t *)desc; + unsigned i; + boolean zero = FALSE; + + debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n"); + + for (i = 0; i < sizeof(*desc); i += 4) { + if (data[i / 4]) { + debug_printf("[%x]: 0x%08x\n", i, data[i / 4]); + zero = FALSE; + } else + if (!zero) { + debug_printf("...\n"); + zero = TRUE; + } + } + + debug_printf("entry = 0x%x\n", desc->entry); + debug_printf("grid dimensions = %ux%ux%u\n", + desc->griddim_x, desc->griddim_y, desc->griddim_z); + debug_printf("block dimensions = %ux%ux%u\n", + desc->blockdim_x, desc->blockdim_y, desc->blockdim_z); + debug_printf("s[] size: 0x%x\n", desc->shared_size); + debug_printf("l[] size: -0x%x / +0x%x\n", + desc->local_size_n, desc->local_size_p); + debug_printf("stack size: 0x%x\n", desc->cstack_size); + debug_printf("barrier count: %u\n", desc->bar_alloc); + debug_printf("$r count: %u\n", desc->gpr_alloc); + debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split)); + + for (i = 0; i < 8; ++i) { + uint64_t address; + uint32_t size = desc->cb[i].size; + boolean valid = !!(desc->cb_mask & (1 << i)); + + address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l; + + if (!valid && !address && !size) + continue; + debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n", + i, address, size, valid ? "" : " (invalid)"); + } +} +#endif + +#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER +static void +nve4_compute_trap_info(struct nvc0_context *nvc0) +{ + struct nvc0_screen *screen = nvc0->screen; + struct nouveau_bo *bo = screen->parm; + int ret, i; + volatile struct nve4_mp_trap_info *info; + uint8_t *map; + + ret = nouveau_bo_map(bo, NOUVEAU_BO_RDWR, nvc0->base.client); + if (ret) + return; + map = (uint8_t *)bo->map; + info = (volatile struct nve4_mp_trap_info *)(map + NVE4_CP_PARAM_TRAP_INFO); + + if (info->lock) { + debug_printf("trapstat = %08x\n", info->trapstat); + debug_printf("warperr = %08x\n", info->warperr); + debug_printf("PC = %x\n", info->pc); + debug_printf("tid = %u %u %u\n", + info->tid[0], info->tid[1], info->tid[2]); + debug_printf("ctaid = %u %u %u\n", + info->ctaid[0], info->ctaid[1], info->ctaid[2]); + for (i = 0; i <= 63; ++i) + debug_printf("$r%i = %08x\n", i, info->r[i]); + for (i = 0; i <= 6; ++i) + debug_printf("$p%i = %i\n", i, (info->flags >> i) & 1); + debug_printf("$c = %x\n", info->flags >> 12); + } + info->lock = 0; +} +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h new file mode 100644 index 00000000000..79862b7dcd8 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h @@ -0,0 +1,131 @@ + +#ifndef NVE4_COMPUTE_H +#define NVE4_COMPUTE_H + +#include "nv50/nv50_defs.xml.h" +#include "nvc0/nve4_compute.xml.h" + +/* Input space is implemented as c0[], to which we bind the screen->parm bo. + */ +#define NVE4_CP_INPUT_USER 0x0000 +#define NVE4_CP_INPUT_USER_LIMIT 0x1000 +#define NVE4_CP_INPUT_GRID_INFO(i) (0x1000 + (i) * 4) +#define NVE4_CP_INPUT_NTID(i) (0x1000 + (i) * 4) +#define NVE4_CP_INPUT_NCTAID(i) (0x100c + (i) * 4) +#define NVE4_CP_INPUT_GRIDID 0x1018 +#define NVE4_CP_INPUT_TEX(i) (0x1040 + (i) * 4) +#define NVE4_CP_INPUT_TEX_STRIDE 4 +#define NVE4_CP_INPUT_TEX_MAX 32 +#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0 +#define NVE4_CP_INPUT_SUF_STRIDE 64 +#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE) +#define NVE4_CP_INPUT_SUF_MAX 32 +#define NVE4_CP_INPUT_TRAP_INFO_PTR 0x1900 +#define NVE4_CP_INPUT_TEMP_PTR 0x1908 +#define NVE4_CP_INPUT_MP_TEMP_SIZE 0x1910 +#define NVE4_CP_INPUT_WARP_TEMP_SIZE 0x1914 +#define NVE4_CP_INPUT_CSTACK_SIZE 0x1918 +#define NVE4_CP_INPUT_SIZE 0x1a00 +#define NVE4_CP_PARAM_TRAP_INFO 0x2000 +#define NVE4_CP_PARAM_TRAP_INFO_SZ (1 << 16) +#define NVE4_CP_PARAM_SIZE (NVE4_CP_PARAM_TRAP_INFO + (1 << 16)) + +struct nve4_cp_launch_desc +{ + u32 unk0[8]; + u32 entry; + u32 unk9[3]; + u32 griddim_x : 31; + u32 unk12 : 1; + u16 griddim_y; + u16 griddim_z; + u32 unk14[3]; + u16 shared_size; /* must be aligned to 0x100 */ + u16 unk15; + u16 unk16; + u16 blockdim_x; + u16 blockdim_y; + u16 blockdim_z; + u32 cb_mask : 8; + u32 unk20_8 : 21; + u32 cache_split : 2; + u32 unk20_31 : 1; + u32 unk21[8]; + struct { + u32 address_l; + u32 address_h : 8; + u32 reserved : 7; + u32 size : 17; + } cb[8]; + u32 local_size_p : 20; + u32 unk45_20 : 7; + u32 bar_alloc : 5; + u32 local_size_n : 20; + u32 unk46_20 : 4; + u32 gpr_alloc : 8; + u32 cstack_size : 20; + u32 unk47_20 : 12; + u32 unk48[16]; +}; + +static INLINE void +nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc) +{ + memset(desc, 0, sizeof(*desc)); + + desc->unk0[7] = 0xbc000000; + desc->unk9[2] = 0x44014000; + desc->unk47_20 = 0x300; +} + +static INLINE void +nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc, + unsigned index, + struct nouveau_bo *bo, + uint32_t base, uint16_t size) +{ + uint64_t address = bo->offset + base; + + assert(index < 8); + assert(!(base & 0xff)); + assert(size <= 65536); + + desc->cb[index].address_l = address; + desc->cb[index].address_h = address >> 32; + desc->cb[index].size = size; + + desc->cb_mask |= 1 << index; +} + +static INLINE void +nve4_cp_launch_desc_set_ctx_cb(struct nve4_cp_launch_desc *desc, + unsigned index, + const struct nvc0_constbuf *cb) +{ + assert(index < 8); + + if (!cb->u.buf) { + desc->cb_mask &= ~(1 << index); + } else { + const struct nv04_resource *buf = nv04_resource(cb->u.buf); + assert(!cb->user); + nve4_cp_launch_desc_set_cb(desc, index, + buf->bo, buf->offset + cb->offset, cb->size); + } +} + +struct nve4_mp_trap_info { + u32 lock; + u32 pc; + u32 trapstat; + u32 warperr; + u32 tid[3]; + u32 ctaid[3]; + u32 pad028[2]; + u32 r[64]; + u32 flags; + u32 pad134[3]; + u32 s[0x3000]; +}; + +#endif /* NVE4_COMPUTE_H */ diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h new file mode 100644 index 00000000000..e971fc1ac6b --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h @@ -0,0 +1,429 @@ +#ifndef NVE4_COMPUTE_XML +#define NVE4_COMPUTE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nve4_compute.xml ( 10168 bytes, from 2013-06-04 13:57:02) +- copyright.xml ( 6452 bytes, from 2012-04-16 22:51:01) +- nvchipsets.xml ( 3954 bytes, from 2013-06-04 13:57:02) +- nv_object.xml ( 14395 bytes, from 2013-06-04 13:57:02) +- nv_defs.xml ( 4437 bytes, from 2012-04-16 22:51:01) +- nv50_defs.xml ( 16877 bytes, from 2013-07-17 09:10:01) +- nve4_p2mf.xml ( 2373 bytes, from 2013-06-04 13:57:02) + +Copyright (C) 2006-2013 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + + +#define NVE4_COMPUTE_UNK0144 0x00000144 + +#define NVE4_COMPUTE_UPLOAD 0x00000000 + +#define NVE4_COMPUTE_UPLOAD_LINE_LENGTH_IN 0x00000180 + +#define NVE4_COMPUTE_UPLOAD_LINE_COUNT 0x00000184 + +#define NVE4_COMPUTE_UPLOAD_DST_ADDRESS_HIGH 0x00000188 + +#define NVE4_COMPUTE_UPLOAD_DST_ADDRESS_LOW 0x0000018c + +#define NVE4_COMPUTE_UPLOAD_DST_PITCH 0x00000190 + +#define NVE4_COMPUTE_UPLOAD_DST_TILE_MODE 0x00000194 + +#define NVE4_COMPUTE_UPLOAD_DST_WIDTH 0x00000198 + +#define NVE4_COMPUTE_UPLOAD_DST_HEIGHT 0x0000019c + +#define NVE4_COMPUTE_UPLOAD_DST_DEPTH 0x000001a0 + +#define NVE4_COMPUTE_UPLOAD_DST_Z 0x000001a4 + +#define NVE4_COMPUTE_UPLOAD_DST_X 0x000001a8 + +#define NVE4_COMPUTE_UPLOAD_DST_Y 0x000001ac + +#define NVE4_COMPUTE_UPLOAD_EXEC 0x000001b0 +#define NVE4_COMPUTE_UPLOAD_EXEC_LINEAR 0x00000001 +#define NVE4_COMPUTE_UPLOAD_EXEC_UNK1__MASK 0x0000007e +#define NVE4_COMPUTE_UPLOAD_EXEC_UNK1__SHIFT 1 +#define NVE4_COMPUTE_UPLOAD_EXEC_BUF_NOTIFY 0x00000300 +#define NVE4_COMPUTE_UPLOAD_EXEC_UNK12__MASK 0x0000f000 +#define NVE4_COMPUTE_UPLOAD_EXEC_UNK12__SHIFT 12 + +#define NVE4_COMPUTE_UPLOAD_DATA 0x000001b4 + +#define NVE4_COMPUTE_UPLOAD_QUERY_ADDRESS_HIGH 0x000001dc + +#define NVE4_COMPUTE_UPLOAD_QUERY_ADDRESS_LOW 0x000001e0 + +#define NVE4_COMPUTE_UPLOAD_QUERY_SEQUENCE 0x000001e4 + +#define NVE4_COMPUTE_UPLOAD_UNK01F0 0x000001f0 + +#define NVE4_COMPUTE_UPLOAD_UNK01F4 0x000001f4 + +#define NVE4_COMPUTE_UPLOAD_UNK01F8 0x000001f8 + +#define NVE4_COMPUTE_UPLOAD_UNK01FC 0x000001fc + +#define NVE4_COMPUTE_SHARED_BASE 0x00000214 + +#define NVE4_COMPUTE_MEM_BARRIER 0x0000021c +#define NVE4_COMPUTE_MEM_BARRIER_UNK0__MASK 0x00000007 +#define NVE4_COMPUTE_MEM_BARRIER_UNK0__SHIFT 0 +#define NVE4_COMPUTE_MEM_BARRIER_UNK4 0x00000010 +#define NVE4_COMPUTE_MEM_BARRIER_UNK12 0x00001000 + +#define NVE4_COMPUTE_UNK0240 0x00000240 + +#define NVE4_COMPUTE_UNK244_TIC_FLUSH 0x00000244 + +#define NVE4_COMPUTE_UNK0248 0x00000248 +#define NVE4_COMPUTE_UNK0248_UNK0__MASK 0x0000003f +#define NVE4_COMPUTE_UNK0248_UNK0__SHIFT 0 +#define NVE4_COMPUTE_UNK0248_UNK8__MASK 0x00ffff00 +#define NVE4_COMPUTE_UNK0248_UNK8__SHIFT 8 + +#define NVE4_COMPUTE_UNK0274 0x00000274 + +#define NVE4_COMPUTE_UNK0278 0x00000278 + +#define NVE4_COMPUTE_UNK027C 0x0000027c + +#define NVE4_COMPUTE_UNK0280 0x00000280 + +#define NVE4_COMPUTE_UNK0284 0x00000284 + +#define NVE4_COMPUTE_UNK0288 0x00000288 + +#define NVE4_COMPUTE_UNK0290 0x00000290 + +#define NVE4_COMPUTE_UNK02B0 0x000002b0 + +#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS 0x000002b4 +#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS__SHR 8 + +#define NVE4_COMPUTE_UNK02B8 0x000002b8 + +#define NVE4_COMPUTE_LAUNCH 0x000002bc + +#define NVE4_COMPUTE_MP_TEMP_SIZE(i0) (0x000002e4 + 0xc*(i0)) +#define NVE4_COMPUTE_MP_TEMP_SIZE__ESIZE 0x0000000c +#define NVE4_COMPUTE_MP_TEMP_SIZE__LEN 0x00000002 + +#define NVE4_COMPUTE_MP_TEMP_SIZE_HIGH(i0) (0x000002e4 + 0xc*(i0)) + +#define NVE4_COMPUTE_MP_TEMP_SIZE_LOW(i0) (0x000002e8 + 0xc*(i0)) + +#define NVE4_COMPUTE_MP_TEMP_SIZE_MASK(i0) (0x000002ec + 0xc*(i0)) + +#define NVE4_COMPUTE_UNK0310 0x00000310 + +#define NVE4_COMPUTE_FIRMWARE(i0) (0x00000500 + 0x4*(i0)) +#define NVE4_COMPUTE_FIRMWARE__ESIZE 0x00000004 +#define NVE4_COMPUTE_FIRMWARE__LEN 0x00000020 + +#define NVE4_COMPUTE_LOCAL_BASE 0x0000077c + +#define NVE4_COMPUTE_TEMP_ADDRESS_HIGH 0x00000790 + +#define NVE4_COMPUTE_TEMP_ADDRESS_LOW 0x00000794 + +#define NVE4_COMPUTE_UNK0D94 0x00000d94 + +#define NVE4_COMPUTE_WATCHDOG_TIMER 0x00000de4 + +#define NVE4_COMPUTE_UNK0F44(i0) (0x00000f44 + 0x4*(i0)) +#define NVE4_COMPUTE_UNK0F44__ESIZE 0x00000004 +#define NVE4_COMPUTE_UNK0F44__LEN 0x00000004 + +#define NVE4_COMPUTE_UNK1040(i0) (0x00001040 + 0x4*(i0)) +#define NVE4_COMPUTE_UNK1040__ESIZE 0x00000004 +#define NVE4_COMPUTE_UNK1040__LEN 0x0000000c + +#define NVE4_COMPUTE_UNK1288_TIC_FLUSH 0x00001288 + +#define NVE4_COMPUTE_TSC_FLUSH 0x00001330 +#define NVE4_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001 +#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NVE4_COMPUTE_TIC_FLUSH 0x00001334 +#define NVE4_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001 +#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NVE4_COMPUTE_TEX_CACHE_CTL 0x00001338 +#define NVE4_COMPUTE_TEX_CACHE_CTL_UNK0 0x00000001 +#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__MASK 0x03fffff0 +#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__SHIFT 4 + +#define NVE4_COMPUTE_UNK1424_TSC_FLUSH 0x00001424 + +#define NVE4_COMPUTE_COND_ADDRESS_HIGH 0x00001550 + +#define NVE4_COMPUTE_COND_ADDRESS_LOW 0x00001554 + +#define NVE4_COMPUTE_COND_MODE 0x00001558 +#define NVE4_COMPUTE_COND_MODE_NEVER 0x00000000 +#define NVE4_COMPUTE_COND_MODE_ALWAYS 0x00000001 +#define NVE4_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002 +#define NVE4_COMPUTE_COND_MODE_EQUAL 0x00000003 +#define NVE4_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004 + +#define NVE4_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c + +#define NVE4_COMPUTE_TSC_ADDRESS_LOW 0x00001560 + +#define NVE4_COMPUTE_TSC_LIMIT 0x00001564 + +#define NVE4_COMPUTE_TIC_ADDRESS_HIGH 0x00001574 + +#define NVE4_COMPUTE_TIC_ADDRESS_LOW 0x00001578 + +#define NVE4_COMPUTE_TIC_LIMIT 0x0000157c + +#define NVE4_COMPUTE_CODE_ADDRESS_HIGH 0x00001608 + +#define NVE4_COMPUTE_CODE_ADDRESS_LOW 0x0000160c + +#define NVE4_COMPUTE_UNK1690 0x00001690 + +#define NVE4_COMPUTE_FLUSH 0x00001698 +#define NVE4_COMPUTE_FLUSH_CODE 0x00000001 +#define NVE4_COMPUTE_FLUSH_GLOBAL 0x00000010 +#define NVE4_COMPUTE_FLUSH_CB 0x00001000 + +#define NVE4_COMPUTE_UNK1944 0x00001944 + +#define NVE4_COMPUTE_DELAY 0x00001a24 + +#define NVE4_COMPUTE_UNK1A2C(i0) (0x00001a2c + 0x4*(i0)) +#define NVE4_COMPUTE_UNK1A2C__ESIZE 0x00000004 +#define NVE4_COMPUTE_UNK1A2C__LEN 0x00000005 + +#define NVE4_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00 + +#define NVE4_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04 + +#define NVE4_COMPUTE_QUERY_SEQUENCE 0x00001b08 + +#define NVE4_COMPUTE_QUERY_GET 0x00001b0c +#define NVE4_COMPUTE_QUERY_GET_MODE__MASK 0x00000003 +#define NVE4_COMPUTE_QUERY_GET_MODE__SHIFT 0 +#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE 0x00000000 +#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE_INTR_NRHOST 0x00000003 +#define NVE4_COMPUTE_QUERY_GET_INTR 0x00100000 +#define NVE4_COMPUTE_QUERY_GET_SHORT 0x10000000 + +#define NVE4_COMPUTE_TEX_CB_INDEX 0x00002608 + +#define NVE4_COMPUTE_UNK260C 0x0000260c + +#define NVE4_COMPUTE_MP_PM_SET(i0) (0x0000335c + 0x4*(i0)) +#define NVE4_COMPUTE_MP_PM_SET__ESIZE 0x00000004 +#define NVE4_COMPUTE_MP_PM_SET__LEN 0x00000008 + +#define NVE4_COMPUTE_MP_PM_A_SIGSEL(i0) (0x0000337c + 0x4*(i0)) +#define NVE4_COMPUTE_MP_PM_A_SIGSEL__ESIZE 0x00000004 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL__LEN 0x00000004 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_NONE 0x00000000 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_USER 0x00000001 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_LAUNCH 0x00000003 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_EXEC 0x00000004 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_ISSUE 0x00000005 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_LDST 0x0000001b +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_BRANCH 0x0000001c + +#define NVE4_COMPUTE_MP_PM_B_SIGSEL(i0) (0x0000338c + 0x4*(i0)) +#define NVE4_COMPUTE_MP_PM_B_SIGSEL__ESIZE 0x00000004 +#define NVE4_COMPUTE_MP_PM_B_SIGSEL__LEN 0x00000004 +#define NVE4_COMPUTE_MP_PM_B_SIGSEL_NONE 0x00000000 +#define NVE4_COMPUTE_MP_PM_B_SIGSEL_WARP 0x00000002 +#define NVE4_COMPUTE_MP_PM_B_SIGSEL_REPLAY 0x00000008 +#define NVE4_COMPUTE_MP_PM_B_SIGSEL_TRANSACTION 0x0000000e +#define NVE4_COMPUTE_MP_PM_B_SIGSEL_L1 0x00000010 +#define NVE4_COMPUTE_MP_PM_B_SIGSEL_MEM 0x00000011 + +#define NVE4_COMPUTE_MP_PM_SRCSEL(i0) (0x0000339c + 0x4*(i0)) +#define NVE4_COMPUTE_MP_PM_SRCSEL__ESIZE 0x00000004 +#define NVE4_COMPUTE_MP_PM_SRCSEL__LEN 0x00000008 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP0__MASK 0x00000003 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP0__SHIFT 0 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG0__MASK 0x0000001c +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG0__SHIFT 2 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP1__MASK 0x00000060 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP1__SHIFT 5 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG1__MASK 0x00000380 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG1__SHIFT 7 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP2__MASK 0x00000c00 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP2__SHIFT 10 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG2__MASK 0x00007000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG2__SHIFT 12 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP3__MASK 0x00018000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP3__SHIFT 15 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG3__MASK 0x000e0000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG3__SHIFT 17 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP4__MASK 0x00300000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP4__SHIFT 20 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG4__MASK 0x01c00000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG4__SHIFT 22 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP5__MASK 0x06000000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP5__SHIFT 25 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG5__MASK 0x38000000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG5__SHIFT 27 + +#define NVE4_COMPUTE_MP_PM_FUNC(i0) (0x000033bc + 0x4*(i0)) +#define NVE4_COMPUTE_MP_PM_FUNC__ESIZE 0x00000004 +#define NVE4_COMPUTE_MP_PM_FUNC__LEN 0x00000008 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE__MASK 0x0000000f +#define NVE4_COMPUTE_MP_PM_FUNC_MODE__SHIFT 0 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP 0x00000000 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP_PULSE 0x00000001 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_B6 0x00000002 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK3 0x00000003 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP_B6 0x00000004 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP_B6_PULSE 0x00000005 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK6 0x00000006 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK7 0x00000007 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK8 0x00000008 +#define NVE4_COMPUTE_MP_PM_FUNC_FUNC__MASK 0x000ffff0 +#define NVE4_COMPUTE_MP_PM_FUNC_FUNC__SHIFT 4 + +#define NVE4_COMPUTE_MP_PM_UNK33DC 0x000033dc + +#define NVE4_COMPUTE_LAUNCH_DESC__SIZE 0x00000100 +#define NVE4_COMPUTE_LAUNCH_DESC_6 0x00000018 +#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__MASK 0x00000c00 +#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__SHIFT 10 + +#define NVE4_COMPUTE_LAUNCH_DESC_PROG_START 0x00000020 + +#define NVE4_COMPUTE_LAUNCH_DESC_12 0x00000030 +#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__MASK 0x7fffffff +#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__SHIFT 0 + +#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ 0x00000034 +#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__MASK 0x0000ffff +#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__SHIFT 0 +#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__MASK 0xffff0000 +#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__SHIFT 16 + +#define NVE4_COMPUTE_LAUNCH_DESC_17 0x00000044 +#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__MASK 0x0000ffff +#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__SHIFT 0 + +#define NVE4_COMPUTE_LAUNCH_DESC_18 0x00000048 +#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__MASK 0xffff0000 +#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__SHIFT 16 + +#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ 0x0000004c +#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__MASK 0x0000ffff +#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__SHIFT 0 +#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__MASK 0xffff0000 +#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__SHIFT 16 + +#define NVE4_COMPUTE_LAUNCH_DESC_20 0x00000050 +#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__MASK 0x000000ff +#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__SHIFT 0 +#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__MASK 0x60000000 +#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__SHIFT 29 +#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_16K_SHARED_48K_L1 0x20000000 +#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_32K_SHARED_32K_L1 0x40000000 +#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_48K_SHARED_16K_L1 0x60000000 + +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0(i0) (0x00000074 + 0x8*(i0)) +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__ESIZE 0x00000008 +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__LEN 0x00000008 +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__MASK 0xffffffff +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__SHIFT 0 + +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1(i0) (0x00000078 + 0x8*(i0)) +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__ESIZE 0x00000008 +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__LEN 0x00000008 +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__MASK 0x000000ff +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__SHIFT 0 +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__MASK 0xffff8000 +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__SHIFT 15 + +#define NVE4_COMPUTE_LAUNCH_DESC_45 0x000000b4 +#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__MASK 0x000fffff +#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__SHIFT 0 +#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__MASK 0xf8000000 +#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__SHIFT 27 + +#define NVE4_COMPUTE_LAUNCH_DESC_46 0x000000b8 +#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__MASK 0x000fffff +#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__SHIFT 0 +#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__MASK 0x3f000000 +#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__SHIFT 24 + +#define NVE4_COMPUTE_LAUNCH_DESC_47 0x000000bc +#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__MASK 0x000fffff +#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__SHIFT 0 + + +#endif /* NVE4_COMPUTE_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h b/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h new file mode 100644 index 00000000000..68a742fadfe --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h @@ -0,0 +1,107 @@ +#ifndef RNNDB_NVE4_P2MF_XML +#define RNNDB_NVE4_P2MF_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- rnndb/nve4_p2mf.xml ( 1400 bytes, from 2012-04-14 21:29:11) +- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv_object.xml ( 12736 bytes, from 2012-04-14 21:30:24) +- ./rnndb/nvchipsets.xml ( 3701 bytes, from 2012-03-22 20:40:59) +- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12) + +Copyright (C) 2006-2012 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVE4_P2MF_LINE_LENGTH_IN 0x00000180 + +#define NVE4_P2MF_LINE_COUNT 0x00000184 + +#define NVE4_P2MF_DST_ADDRESS_HIGH 0x00000188 + +#define NVE4_P2MF_DST_ADDRESS_LOW 0x0000018c + +#define NVE4_P2MF_DST_TILE_MODE 0x00000194 + +#define NVE4_P2MF_DST_PITCH 0x00000198 + +#define NVE4_P2MF_DST_HEIGHT 0x0000019c + +#define NVE4_P2MF_DST_DEPTH 0x000001a0 + +#define NVE4_P2MF_DST_Z 0x000001a4 + +#define NVE4_P2MF_DST_X 0x000001a8 + +#define NVE4_P2MF_DST_Y 0x000001ac + +#define NVE4_P2MF_EXEC 0x000001b0 +#define NVE4_P2MF_EXEC_LINEAR 0x00000001 +#define NVE4_P2MF_EXEC_UNK12 0x00001000 + +#define NVE4_P2MF_DATA 0x000001b4 + + +#endif /* RNNDB_NVE4_P2MF_XML */ |