diff options
author | Christoph Bumiller <[email protected]> | 2012-04-29 17:59:06 +0200 |
---|---|---|
committer | Christoph Bumiller <[email protected]> | 2012-04-29 17:59:06 +0200 |
commit | afcd7b5d1614a8a758ccb4353a9c31a601c9b9b4 (patch) | |
tree | ec2a84699154b9e712d1ee47917c118113aca7e6 /src/gallium/drivers/nv50 | |
parent | 00fe442253744c4c4e7e68da44d6983da053968b (diff) |
nvc0/ir: initial implementation of nve4 scheduling hints
Diffstat (limited to 'src/gallium/drivers/nv50')
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp | 20 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp | 81 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_target.h | 38 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp | 4 |
5 files changed, 141 insertions, 4 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h index da9042066ad..e544d071b52 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h @@ -681,6 +681,8 @@ public: uint8_t subOp; // quadop, 1 for mul-high, etc. + uint8_t sched; // scheduling data (NOTE: maybe move to separate storage) + unsigned encSize : 4; // encoding size in bytes unsigned saturate : 1; // to [0.0f, 1.0f] unsigned join : 1; // converge control flow (use OP_JOIN until end) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp index 4652bb95f69..9d92b7bc24b 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp @@ -53,6 +53,26 @@ static const char *colour[8] = #endif }; +static const char *OpClassStr[OPCLASS_OTHER + 1] = +{ + "MOVE", + "LOAD", + "STORE", + "ARITH", + "SHIFT", + "SFU", + "LOGIC", + "COMPARE", + "CONVERT", + "ATOMIC", + "TEXTURE", + "SURFACE", + "FLOW", + "(INVALID)", + "PSEUDO", + "OTHER" +}; + const char *operationStr[OP_LAST + 1] = { "nop", diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp index e3eae69554c..f718912fb39 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp @@ -52,6 +52,65 @@ const uint8_t Target::operationSrcNr[OP_LAST + 1] = 0 }; +const OpClass Target::operationClass[OP_LAST + 1] = +{ + // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT + OPCLASS_OTHER, + OPCLASS_PSEUDO, + OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, + // MOV; LOAD; STORE + OPCLASS_MOVE, + OPCLASS_LOAD, + OPCLASS_STORE, + // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, + OPCLASS_ARITH, OPCLASS_ARITH, + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, + // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR + OPCLASS_CONVERT, OPCLASS_CONVERT, + OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, + OPCLASS_SHIFT, OPCLASS_SHIFT, + // MAX, MIN + OPCLASS_COMPARE, OPCLASS_COMPARE, + // SAT, CEIL, FLOOR, TRUNC; CVT + OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, + OPCLASS_CONVERT, + // SET(AND,OR,XOR); SELP, SLCT + OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, + OPCLASS_COMPARE, OPCLASS_COMPARE, + // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW + OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, + OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, + OPCLASS_SFU, OPCLASS_SFU, + // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN + OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, + OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, + OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, + // DISCARD, EXIT + OPCLASS_FLOW, OPCLASS_FLOW, + // MEMBAR + OPCLASS_OTHER, + // VFETCH, PFETCH, EXPORT + OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE, + // LINTERP, PINTERP + OPCLASS_SFU, OPCLASS_SFU, + // EMIT, RESTART + OPCLASS_OTHER, OPCLASS_OTHER, + // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA + OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, + OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, + // SULD, SUST + OPCLASS_SURFACE, OPCLASS_SURFACE, + // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP + OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, + OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, + // POPCNT, INSBF, EXTBF + OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, + // TEXBAR + OPCLASS_OTHER, + OPCLASS_PSEUDO // LAST +}; + extern Target *getTargetNVC0(unsigned int chipset); extern Target *getTargetNV50(unsigned int chipset); @@ -104,6 +163,11 @@ CodeEmitter::printBinary() const INFO("\n"); } +static inline uint32_t sizeToBundlesNVE4(uint32_t size) +{ + return (size + 55) / 56; +} + void CodeEmitter::prepareEmission(Program *prog) { @@ -112,6 +176,23 @@ CodeEmitter::prepareEmission(Program *prog) Function *func = reinterpret_cast<Function *>(fi.get()); func->binPos = prog->binSize; prepareEmission(func); + + // adjust sizes & positions for schedulding info: + if (prog->getTarget()->hasSWSched) { + BasicBlock *bb = NULL; + for (int i = 0; i < func->bbCount; ++i) { + bb = func->bbArray[i]; + const uint32_t oldPos = bb->binPos; + const uint32_t oldEnd = bb->binPos + bb->binSize; + uint32_t adjPos = oldPos + sizeToBundlesNVE4(oldPos) * 8; + uint32_t adjEnd = oldEnd + sizeToBundlesNVE4(oldEnd) * 8; + bb->binPos = adjPos; + bb->binSize = adjEnd - adjPos; + } + if (bb) + func->binSize = bb->binPos + bb->binSize; + } + prog->binSize += func->binSize; } } diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.h b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h index 88996ebbde3..c60ee0216f7 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h @@ -93,9 +93,31 @@ protected: RelocInfo *relocInfo; }; + +enum OpClass +{ + OPCLASS_MOVE = 0, + OPCLASS_LOAD = 1, + OPCLASS_STORE = 2, + OPCLASS_ARITH = 3, + OPCLASS_SHIFT = 4, + OPCLASS_SFU = 5, + OPCLASS_LOGIC = 6, + OPCLASS_COMPARE = 7, + OPCLASS_CONVERT = 8, + OPCLASS_ATOMIC = 9, + OPCLASS_TEXTURE = 10, + OPCLASS_SURFACE = 11, + OPCLASS_FLOW = 12, + OPCLASS_PSEUDO = 14, + OPCLASS_OTHER = 15 +}; + class Target { public: + Target(bool j, bool s) : joinAnterior(j), hasSWSched(s) { } + static Target *create(uint32_t chipset); static void destroy(Target *); @@ -153,6 +175,9 @@ public: virtual bool mayPredicate(const Instruction *, const Value *) const = 0; + // whether @insn can be issued together with @next (order matters) + virtual bool canDualIssue(const Instruction *insn, + const Instruction *next) const { return false; } virtual int getLatency(const Instruction *) const { return 1; } virtual int getThroughput(const Instruction *) const { return 1; } @@ -162,9 +187,20 @@ public: virtual uint32_t getSVAddress(DataFile, const Symbol *) const = 0; public: - bool joinAnterior; // true if join is executed before the op + const bool joinAnterior; // true if join is executed before the op + const bool hasSWSched; // true if code should provide scheduling data static const uint8_t operationSrcNr[OP_LAST + 1]; + static const OpClass operationClass[OP_LAST + 1]; + + static inline uint8_t getOpSrcNr(operation op) + { + return operationSrcNr[op]; + } + static inline OpClass getOpClass(operation op) + { + return operationClass[op]; + } protected: uint32_t chipset; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp index a64f7f72255..5e541e514cb 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp @@ -29,7 +29,7 @@ Target *getTargetNV50(unsigned int chipset) return new TargetNV50(chipset); } -TargetNV50::TargetNV50(unsigned int card) +TargetNV50::TargetNV50(unsigned int card) : Target(true, false) { chipset = card; @@ -132,8 +132,6 @@ void TargetNV50::initOpInfo() OP_CALL, OP_PREBREAK, OP_PRERET, OP_QUADON, OP_QUADPOP, OP_JOINAT }; - joinAnterior = true; - for (i = 0; i < DATA_FILE_COUNT; ++i) nativeFileMap[i] = (DataFile)i; nativeFileMap[FILE_PREDICATE] = FILE_FLAGS; |