diff options
author | Christoph Bumiller <[email protected]> | 2013-02-22 18:45:16 +0100 |
---|---|---|
committer | Christoph Bumiller <[email protected]> | 2013-03-12 12:55:34 +0100 |
commit | 22b762f9b495b14400f30bd6537f7c5a6d262325 (patch) | |
tree | 25ce4f42408d1af788dbcb1fcdec76e766c0a16f | |
parent | c82714c593ac38ea87e061b92d10b34853784723 (diff) |
nv50/ir: add various new OPs that will be needed for compute
9 files changed, 179 insertions, 48 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp index ec35c51c5c4..3121c5ff270 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp @@ -567,11 +567,11 @@ void Instruction::init() terminator = 0; ftz = 0; dnz = 0; - atomic = 0; perPatch = 0; fixed = 0; encSize = 0; ipa = 0; + mask = 0; lanes = 0xf; @@ -733,7 +733,7 @@ Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const i->saturate = saturate; i->join = join; i->exit = exit; - i->atomic = atomic; + i->mask = mask; i->ftz = ftz; i->dnz = dnz; i->ipa = ipa; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h index b951b9d8da7..ae577cc4209 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h @@ -66,7 +66,7 @@ enum operation OP_SHR, OP_MAX, OP_MIN, - OP_SAT, // CLAMP(f32, 0.0, 1.0) + OP_SAT, // CLAMP(f32, 0.0, 1.0) OP_CEIL, OP_FLOOR, OP_TRUNC, @@ -102,7 +102,7 @@ enum operation OP_JOIN, // converge OP_DISCARD, OP_EXIT, - OP_MEMBAR, + OP_MEMBAR, // memory barrier (mfence, lfence, sfence) OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1] OP_EXPORT, @@ -117,21 +117,42 @@ enum operation OP_TXQ, // texture size query OP_TXD, // texture derivatives OP_TXG, // texture gather - OP_TEXCSAA, - OP_SULD, // surface load - OP_SUST, // surface store + OP_TEXCSAA, // texture op for coverage sampling + OP_TEXPREP, // turn cube map array into 2d array coordinates + OP_SULDB, // surface load (raw) + OP_SULDP, // surface load (formatted) + OP_SUSTB, // surface store (raw) + OP_SUSTP, // surface store (formatted) + OP_SUREDB, + OP_SUREDP, // surface reduction (atomic op) + OP_SULEA, // surface load effective address + OP_SUBFM, // surface bitfield manipulation + OP_SUCLAMP, // clamp surface coordinates + OP_SUEAU, // surface effective address + OP_MADSP, // special integer multiply-add + OP_TEXBAR, // texture dependency barrier OP_DFDX, OP_DFDY, OP_RDSV, // read system value OP_WRSV, // write system value - OP_TEXPREP, // turn cube map array into 2d array coordinates, TODO: move OP_QUADOP, OP_QUADON, OP_QUADPOP, OP_POPCNT, // bitcount(src0 & src1) OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7] - OP_EXTBF, - OP_TEXBAR, + OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK + OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order) + OP_ATOM, + OP_BAR, // execution barrier, sources = { id, thread count, predicate } + OP_VADD, // byte/word vector operations + OP_VAVG, + OP_VMIN, + OP_VMAX, + OP_VSAD, + OP_VSET, + OP_VSHR, + OP_VSHL, + OP_VSEL, OP_LAST }; @@ -146,6 +167,59 @@ enum operation #define NV50_IR_SUBOP_EMU_PRERET 1 #define NV50_IR_SUBOP_TEXBAR(n) n #define NV50_IR_SUBOP_MOV_FINAL 1 +#define NV50_IR_SUBOP_EXTBF_REV 1 +#define NV50_IR_SUBOP_PERMT_F4E 1 +#define NV50_IR_SUBOP_PERMT_B4E 2 +#define NV50_IR_SUBOP_PERMT_RC8 3 +#define NV50_IR_SUBOP_PERMT_ECL 4 +#define NV50_IR_SUBOP_PERMT_ECR 5 +#define NV50_IR_SUBOP_PERMT_RC16 6 +#define NV50_IR_SUBOP_BAR_SYNC 0 +#define NV50_IR_SUBOP_BAR_ARRIVE 1 +#define NV50_IR_SUBOP_BAR_RED_AND 2 +#define NV50_IR_SUBOP_BAR_RED_OR 3 +#define NV50_IR_SUBOP_BAR_RED_POPC 4 +#define NV50_IR_SUBOP_MEMBAR_L 1 +#define NV50_IR_SUBOP_MEMBAR_S 2 +#define NV50_IR_SUBOP_MEMBAR_M 3 +#define NV50_IR_SUBOP_MEMBAR_CTA (0 << 2) +#define NV50_IR_SUBOP_MEMBAR_GL (1 << 2) +#define NV50_IR_SUBOP_MEMBAR_SYS (2 << 2) +#define NV50_IR_SUBOP_MEMBAR_DIR(m) ((m) & 0x3) +#define NV50_IR_SUBOP_MEMBAR_SCOPE(m) ((m) & ~0x3) +#define NV50_IR_SUBOP_MEMBAR(d,s) \ + (NV50_IR_SUBOP_MEMBAR_##d | NV50_IR_SUBOP_MEMBAR_##s) +#define NV50_IR_SUBOP_ATOM_ADD 0 +#define NV50_IR_SUBOP_ATOM_MIN 1 +#define NV50_IR_SUBOP_ATOM_MAX 2 +#define NV50_IR_SUBOP_ATOM_INC 3 +#define NV50_IR_SUBOP_ATOM_DEC 4 +#define NV50_IR_SUBOP_ATOM_AND 5 +#define NV50_IR_SUBOP_ATOM_OR 6 +#define NV50_IR_SUBOP_ATOM_XOR 7 +#define NV50_IR_SUBOP_ATOM_CAS 8 +#define NV50_IR_SUBOP_ATOM_EXCH 9 +#define NV50_IR_SUBOP_SUST_IGN 0 +#define NV50_IR_SUBOP_SUST_TRAP 1 +#define NV50_IR_SUBOP_SUST_SDCL 3 +#define NV50_IR_SUBOP_SULD_ZERO 0 +#define NV50_IR_SUBOP_SULD_TRAP 1 +#define NV50_IR_SUBOP_SULD_SDCL 3 +#define NV50_IR_SUBOP_SUBFM_3D 1 +#define NV50_IR_SUBOP_SUCLAMP_2D 0x10 +#define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0)) +#define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0)) +#define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0)) +#define NV50_IR_SUBOP_MADSP_SD 0xffff +// Yes, we could represent those with DataType. +// Or put the type into operation and have a couple 1000 values in that enum. +// This will have to do for now. +// The bitfields are supposed to correspond to nve4 ISA. +#define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a)) +#define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000) +#define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000) +#define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000) +#define NV50_IR_SUBOP_Vn(n) ((n) >> 14) enum DataType { @@ -680,22 +754,20 @@ public: RoundMode rnd; CacheMode cache; - uint8_t subOp; // quadop, 1 for mul-high, etc. - - uint8_t sched; // scheduling data (NOTE: maybe move to separate storage) + uint16_t subOp; // quadop, 1 for mul-high, etc. unsigned encSize : 4; // encoding size in bytes unsigned saturate : 1; // to [0.0f, 1.0f] unsigned join : 1; // converge control flow (use OP_JOIN until end) unsigned fixed : 1; // prevent dead code elimination unsigned terminator : 1; // end of basic block - unsigned atomic : 1; unsigned ftz : 1; // flush denormal to zero unsigned dnz : 1; // denormals, NaN are zero unsigned ipa : 4; // interpolation mode unsigned lanes : 4; unsigned perPatch : 1; unsigned exit : 1; // terminate program after insn + unsigned mask : 4; // for vector ops int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor @@ -703,6 +775,8 @@ public: int8_t flagsDef; int8_t flagsSrc; + uint8_t sched; // scheduling data (NOTE: maybe move to separate storage) + BasicBlock *bb; protected: diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h index 40241b18d1c..7ec22b55e66 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h @@ -43,7 +43,7 @@ static inline bool isMemoryFile(DataFile f) // contrary to asTex(), this will never include SULD/SUST static inline bool isTextureOp(operation op) { - return (op >= OP_TEX && op <= OP_TEXCSAA) || (op == OP_TEXPREP); + return (op >= OP_TEX && op <= OP_TEXPREP); } static inline unsigned int typeSizeof(DataType ty) @@ -304,14 +304,14 @@ const FlowInstruction *Instruction::asFlow() const TexInstruction *Instruction::asTex() { - if ((op >= OP_TEX && op <= OP_TEXCSAA) || (op == OP_TEXPREP)) + if (op >= OP_TEX && op <= OP_SULEA) return static_cast<TexInstruction *>(this); return NULL; } const TexInstruction *Instruction::asTex() const { - if (op >= OP_TEX && op <= OP_TEXCSAA) + if (op >= OP_TEX && op <= OP_SULEA) return static_cast<const TexInstruction *>(this); return NULL; } diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp index 05997b6b568..19d1c369a3f 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp @@ -2060,8 +2060,7 @@ Instruction::isActionEqual(const Instruction *that) const if (this->asFlow()) { return false; } else { - if (this->atomic != that->atomic || - this->ipa != that->ipa || + if (this->ipa != that->ipa || this->lanes != that->lanes || this->perPatch != that->perPatch) return false; @@ -2074,7 +2073,8 @@ Instruction::isActionEqual(const Instruction *that) const this->rnd != that->rnd || this->ftz != that->ftz || this->dnz != that->dnz || - this->cache != that->cache) + this->cache != that->cache || + this->mask != that->mask) return false; return true; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp index b20aed53fb6..48ade15842d 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp @@ -132,7 +132,7 @@ const char *operationStr[OP_LAST + 1] = "join", "discard", "exit", - "barrier", + "membar", "vfetch", "pfetch", "export", @@ -148,23 +148,49 @@ const char *operationStr[OP_LAST + 1] = "texgrad", "texgather", "texcsaa", - "suld", - "sust", + "texprep", + "suldb", + "suldp", + "sustb", + "sustp", + "suredb", + "suredp", + "sulea", + "subfm", + "suclamp", + "sueau", + "madsp", + "texbar", "dfdx", "dfdy", "rdsv", "wrsv", - "texprep", "quadop", "quadon", "quadpop", "popcnt", "insbf", "extbf", - "texbar", + "permt", + "atom", + "bar", + "vadd", + "vavg", + "vmin", + "vmax", + "vsad", + "vset", + "vshr", + "vshl", + "vsel", "(invalid)" }; +static const char *atomSubOpStr[] = +{ + "add", "min", "max", "inc", "dec", "and", "or", "xor", "cas", "exch" +}; + static const char *DataTypeStr[] = { "-", @@ -488,8 +514,17 @@ void Instruction::print() const PRINT("%s ", operationStr[op]); if (op == OP_LINTERP || op == OP_PINTERP) PRINT("%s ", interpStr[ipa]); - if (subOp) - PRINT("(SUBOP:%u) ", subOp); + switch (op) { + case OP_SUREDP: + case OP_ATOM: + if (subOp < Elements(atomSubOpStr)) + PRINT("%s ", atomSubOpStr[subOp]); + break; + default: + if (subOp) + PRINT("(SUBOP:%u) ", subOp); + break; + } if (perPatch) PRINT("patch "); if (asTex()) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp index be6a276ac9b..7642c5d16d6 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp @@ -44,11 +44,16 @@ const uint8_t Target::operationSrcNr[OP_LAST + 1] = 1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP 1, 1, // EMIT, RESTART 1, 1, 1, // TEX, TXB, TXL, - 1, 1, 1, 1, 1, // TXF, TXQ, TXD, TXG, TEXCSAA - 1, 2, // SULD, SUST + 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TEXCSAA, TEXPREP + 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA + 3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP + 0, // TEXBAR 1, 1, // DFDX, DFDY - 1, 2, 2, 2, 0, 0, // RDSV, WRSV, TEXPREP, QUADOP, QUADON, QUADPOP - 2, 3, 2, 0, // POPCNT, INSBF, EXTBF, TEXBAR + 1, 2, 2, 0, 0, // RDSV, WRSV, QUADOP, QUADON, QUADPOP + 2, 3, 2, 3, // POPCNT, INSBF, EXTBF, PERMT + 2, 2, // ATOM, BAR + 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, + 2, 2, 2, // VSHR, VSHL, VSEL 0 }; @@ -89,25 +94,37 @@ const OpClass Target::operationClass[OP_LAST + 1] = // DISCARD, EXIT OPCLASS_FLOW, OPCLASS_FLOW, // MEMBAR - OPCLASS_OTHER, + OPCLASS_CONTROL, // VFETCH, PFETCH, EXPORT OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE, // LINTERP, PINTERP OPCLASS_SFU, OPCLASS_SFU, // EMIT, RESTART - OPCLASS_OTHER, OPCLASS_OTHER, - // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA + OPCLASS_CONTROL, OPCLASS_CONTROL, + // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA; TEXPREP OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, - // SULD, SUST - OPCLASS_SURFACE, OPCLASS_SURFACE, - // DFDX, DFDY, RDSV, WRSV; TEXPREP, QUADOP, QUADON, QUADPOP - OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, - OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, - // POPCNT, INSBF, EXTBF - OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, + OPCLASS_TEXTURE, + // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA + OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE, + OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE, + // SUBFM, SUCLAMP, SUEAU, MADSP + OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH, // TEXBAR OPCLASS_OTHER, + // DFDX, DFDY, RDSV, WRSV; QUADOP, QUADON, QUADPOP + OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, + OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, + // POPCNT, INSBF, EXTBF, PERMT + OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, + // ATOM, BAR + OPCLASS_ATOMIC, OPCLASS_CONTROL, + // VADD, VAVG, VMIN, VMAX + OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, + // VSAD, VSET, VSHR, VSHL + OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, + // VSEL + OPCLASS_VECTOR, OPCLASS_PSEUDO // LAST }; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.h b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h index 304dda4f865..d312685a708 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h @@ -111,7 +111,10 @@ enum OpClass OPCLASS_SURFACE = 11, OPCLASS_FLOW = 12, OPCLASS_PSEUDO = 14, - OPCLASS_OTHER = 15 + OPCLASS_VECTOR = 15, + OPCLASS_BITFIELD = 16, + OPCLASS_CONTROL = 17, + OPCLASS_OTHER = 18 }; class Target diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp index 6b4175edfcf..db09cb3dbae 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp @@ -113,19 +113,20 @@ void TargetNV50::initOpInfo() static const uint32_t commutative[(OP_LAST + 31) / 32] = { // ADD,MAD,MUL,AND,OR,XOR,MAX,MIN - 0x0670ca00, 0x0000003f, 0x00000000 + 0x0670ca00, 0x0000003f, 0x00000000, 0x00000000 }; static const uint32_t shortForm[(OP_LAST + 31) / 32] = { // MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF - 0x00010e40, 0x00000040, 0x00000498 + 0x00010e40, 0x00000040, 0x00000498, 0x00000000 }; static const operation noDestList[] = { OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT, OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET, OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART, - OP_QUADON, OP_QUADPOP + OP_QUADON, OP_QUADPOP, OP_TEXBAR, OP_SUSTB, OP_SUSTP, OP_SUREDP, + OP_SUREDB, OP_BAR }; static const operation noPredList[] = { diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp index cdc7c0aba57..36fddd785cd 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp @@ -260,13 +260,13 @@ void TargetNVC0::initOpInfo() static const uint32_t commutative[(OP_LAST + 31) / 32] = { // ADD, MAD, MUL, AND, OR, XOR, MAX, MIN - 0x0670ca00, 0x0000003f, 0x00000000 + 0x0670ca00, 0x0000003f, 0x00000000, 0x00000000 }; static const uint32_t shortForm[(OP_LAST + 31) / 32] = { // ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV - 0x0670ca00, 0x00000000, 0x00000000 + 0x0670ca00, 0x00000000, 0x00000000, 0x00000000 }; static const operation noDest[] = @@ -274,7 +274,8 @@ void TargetNVC0::initOpInfo() OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT, OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET, OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART, - OP_QUADON, OP_QUADPOP, OP_TEXBAR + OP_QUADON, OP_QUADPOP, OP_TEXBAR, OP_SUSTB, OP_SUSTP, OP_SUREDP, + OP_SUREDB, OP_BAR }; for (i = 0; i < DATA_FILE_COUNT; ++i) |