summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nv50
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2013-02-22 18:45:16 +0100
committerChristoph Bumiller <[email protected]>2013-03-12 12:55:34 +0100
commit22b762f9b495b14400f30bd6537f7c5a6d262325 (patch)
tree25ce4f42408d1af788dbcb1fcdec76e766c0a16f /src/gallium/drivers/nv50
parentc82714c593ac38ea87e061b92d10b34853784723 (diff)
nv50/ir: add various new OPs that will be needed for compute
Diffstat (limited to 'src/gallium/drivers/nv50')
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.cpp4
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.h98
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h6
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp6
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp49
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp45
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_target.h5
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp7
8 files changed, 175 insertions, 45 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
index ec35c51c5c4..3121c5ff270 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
@@ -567,11 +567,11 @@ void Instruction::init()
terminator = 0;
ftz = 0;
dnz = 0;
- atomic = 0;
perPatch = 0;
fixed = 0;
encSize = 0;
ipa = 0;
+ mask = 0;
lanes = 0xf;
@@ -733,7 +733,7 @@ Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
i->saturate = saturate;
i->join = join;
i->exit = exit;
- i->atomic = atomic;
+ i->mask = mask;
i->ftz = ftz;
i->dnz = dnz;
i->ipa = ipa;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h
index b951b9d8da7..ae577cc4209 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -66,7 +66,7 @@ enum operation
OP_SHR,
OP_MAX,
OP_MIN,
- OP_SAT, // CLAMP(f32, 0.0, 1.0)
+ OP_SAT, // CLAMP(f32, 0.0, 1.0)
OP_CEIL,
OP_FLOOR,
OP_TRUNC,
@@ -102,7 +102,7 @@ enum operation
OP_JOIN, // converge
OP_DISCARD,
OP_EXIT,
- OP_MEMBAR,
+ OP_MEMBAR, // memory barrier (mfence, lfence, sfence)
OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
OP_EXPORT,
@@ -117,21 +117,42 @@ enum operation
OP_TXQ, // texture size query
OP_TXD, // texture derivatives
OP_TXG, // texture gather
- OP_TEXCSAA,
- OP_SULD, // surface load
- OP_SUST, // surface store
+ OP_TEXCSAA, // texture op for coverage sampling
+ OP_TEXPREP, // turn cube map array into 2d array coordinates
+ OP_SULDB, // surface load (raw)
+ OP_SULDP, // surface load (formatted)
+ OP_SUSTB, // surface store (raw)
+ OP_SUSTP, // surface store (formatted)
+ OP_SUREDB,
+ OP_SUREDP, // surface reduction (atomic op)
+ OP_SULEA, // surface load effective address
+ OP_SUBFM, // surface bitfield manipulation
+ OP_SUCLAMP, // clamp surface coordinates
+ OP_SUEAU, // surface effective address
+ OP_MADSP, // special integer multiply-add
+ OP_TEXBAR, // texture dependency barrier
OP_DFDX,
OP_DFDY,
OP_RDSV, // read system value
OP_WRSV, // write system value
- OP_TEXPREP, // turn cube map array into 2d array coordinates, TODO: move
OP_QUADOP,
OP_QUADON,
OP_QUADPOP,
OP_POPCNT, // bitcount(src0 & src1)
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
- OP_EXTBF,
- OP_TEXBAR,
+ OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
+ OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
+ OP_ATOM,
+ OP_BAR, // execution barrier, sources = { id, thread count, predicate }
+ OP_VADD, // byte/word vector operations
+ OP_VAVG,
+ OP_VMIN,
+ OP_VMAX,
+ OP_VSAD,
+ OP_VSET,
+ OP_VSHR,
+ OP_VSHL,
+ OP_VSEL,
OP_LAST
};
@@ -146,6 +167,59 @@ enum operation
#define NV50_IR_SUBOP_EMU_PRERET 1
#define NV50_IR_SUBOP_TEXBAR(n) n
#define NV50_IR_SUBOP_MOV_FINAL 1
+#define NV50_IR_SUBOP_EXTBF_REV 1
+#define NV50_IR_SUBOP_PERMT_F4E 1
+#define NV50_IR_SUBOP_PERMT_B4E 2
+#define NV50_IR_SUBOP_PERMT_RC8 3
+#define NV50_IR_SUBOP_PERMT_ECL 4
+#define NV50_IR_SUBOP_PERMT_ECR 5
+#define NV50_IR_SUBOP_PERMT_RC16 6
+#define NV50_IR_SUBOP_BAR_SYNC 0
+#define NV50_IR_SUBOP_BAR_ARRIVE 1
+#define NV50_IR_SUBOP_BAR_RED_AND 2
+#define NV50_IR_SUBOP_BAR_RED_OR 3
+#define NV50_IR_SUBOP_BAR_RED_POPC 4
+#define NV50_IR_SUBOP_MEMBAR_L 1
+#define NV50_IR_SUBOP_MEMBAR_S 2
+#define NV50_IR_SUBOP_MEMBAR_M 3
+#define NV50_IR_SUBOP_MEMBAR_CTA (0 << 2)
+#define NV50_IR_SUBOP_MEMBAR_GL (1 << 2)
+#define NV50_IR_SUBOP_MEMBAR_SYS (2 << 2)
+#define NV50_IR_SUBOP_MEMBAR_DIR(m) ((m) & 0x3)
+#define NV50_IR_SUBOP_MEMBAR_SCOPE(m) ((m) & ~0x3)
+#define NV50_IR_SUBOP_MEMBAR(d,s) \
+ (NV50_IR_SUBOP_MEMBAR_##d | NV50_IR_SUBOP_MEMBAR_##s)
+#define NV50_IR_SUBOP_ATOM_ADD 0
+#define NV50_IR_SUBOP_ATOM_MIN 1
+#define NV50_IR_SUBOP_ATOM_MAX 2
+#define NV50_IR_SUBOP_ATOM_INC 3
+#define NV50_IR_SUBOP_ATOM_DEC 4
+#define NV50_IR_SUBOP_ATOM_AND 5
+#define NV50_IR_SUBOP_ATOM_OR 6
+#define NV50_IR_SUBOP_ATOM_XOR 7
+#define NV50_IR_SUBOP_ATOM_CAS 8
+#define NV50_IR_SUBOP_ATOM_EXCH 9
+#define NV50_IR_SUBOP_SUST_IGN 0
+#define NV50_IR_SUBOP_SUST_TRAP 1
+#define NV50_IR_SUBOP_SUST_SDCL 3
+#define NV50_IR_SUBOP_SULD_ZERO 0
+#define NV50_IR_SUBOP_SULD_TRAP 1
+#define NV50_IR_SUBOP_SULD_SDCL 3
+#define NV50_IR_SUBOP_SUBFM_3D 1
+#define NV50_IR_SUBOP_SUCLAMP_2D 0x10
+#define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0))
+#define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0))
+#define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0))
+#define NV50_IR_SUBOP_MADSP_SD 0xffff
+// Yes, we could represent those with DataType.
+// Or put the type into operation and have a couple 1000 values in that enum.
+// This will have to do for now.
+// The bitfields are supposed to correspond to nve4 ISA.
+#define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a))
+#define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000)
+#define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000)
+#define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000)
+#define NV50_IR_SUBOP_Vn(n) ((n) >> 14)
enum DataType
{
@@ -680,22 +754,20 @@ public:
RoundMode rnd;
CacheMode cache;
- uint8_t subOp; // quadop, 1 for mul-high, etc.
-
- uint8_t sched; // scheduling data (NOTE: maybe move to separate storage)
+ uint16_t subOp; // quadop, 1 for mul-high, etc.
unsigned encSize : 4; // encoding size in bytes
unsigned saturate : 1; // to [0.0f, 1.0f]
unsigned join : 1; // converge control flow (use OP_JOIN until end)
unsigned fixed : 1; // prevent dead code elimination
unsigned terminator : 1; // end of basic block
- unsigned atomic : 1;
unsigned ftz : 1; // flush denormal to zero
unsigned dnz : 1; // denormals, NaN are zero
unsigned ipa : 4; // interpolation mode
unsigned lanes : 4;
unsigned perPatch : 1;
unsigned exit : 1; // terminate program after insn
+ unsigned mask : 4; // for vector ops
int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor
@@ -703,6 +775,8 @@ public:
int8_t flagsDef;
int8_t flagsSrc;
+ uint8_t sched; // scheduling data (NOTE: maybe move to separate storage)
+
BasicBlock *bb;
protected:
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
index 40241b18d1c..7ec22b55e66 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
@@ -43,7 +43,7 @@ static inline bool isMemoryFile(DataFile f)
// contrary to asTex(), this will never include SULD/SUST
static inline bool isTextureOp(operation op)
{
- return (op >= OP_TEX && op <= OP_TEXCSAA) || (op == OP_TEXPREP);
+ return (op >= OP_TEX && op <= OP_TEXPREP);
}
static inline unsigned int typeSizeof(DataType ty)
@@ -304,14 +304,14 @@ const FlowInstruction *Instruction::asFlow() const
TexInstruction *Instruction::asTex()
{
- if ((op >= OP_TEX && op <= OP_TEXCSAA) || (op == OP_TEXPREP))
+ if (op >= OP_TEX && op <= OP_SULEA)
return static_cast<TexInstruction *>(this);
return NULL;
}
const TexInstruction *Instruction::asTex() const
{
- if (op >= OP_TEX && op <= OP_TEXCSAA)
+ if (op >= OP_TEX && op <= OP_SULEA)
return static_cast<const TexInstruction *>(this);
return NULL;
}
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
index 05997b6b568..19d1c369a3f 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
@@ -2060,8 +2060,7 @@ Instruction::isActionEqual(const Instruction *that) const
if (this->asFlow()) {
return false;
} else {
- if (this->atomic != that->atomic ||
- this->ipa != that->ipa ||
+ if (this->ipa != that->ipa ||
this->lanes != that->lanes ||
this->perPatch != that->perPatch)
return false;
@@ -2074,7 +2073,8 @@ Instruction::isActionEqual(const Instruction *that) const
this->rnd != that->rnd ||
this->ftz != that->ftz ||
this->dnz != that->dnz ||
- this->cache != that->cache)
+ this->cache != that->cache ||
+ this->mask != that->mask)
return false;
return true;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
index b20aed53fb6..48ade15842d 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
@@ -132,7 +132,7 @@ const char *operationStr[OP_LAST + 1] =
"join",
"discard",
"exit",
- "barrier",
+ "membar",
"vfetch",
"pfetch",
"export",
@@ -148,23 +148,49 @@ const char *operationStr[OP_LAST + 1] =
"texgrad",
"texgather",
"texcsaa",
- "suld",
- "sust",
+ "texprep",
+ "suldb",
+ "suldp",
+ "sustb",
+ "sustp",
+ "suredb",
+ "suredp",
+ "sulea",
+ "subfm",
+ "suclamp",
+ "sueau",
+ "madsp",
+ "texbar",
"dfdx",
"dfdy",
"rdsv",
"wrsv",
- "texprep",
"quadop",
"quadon",
"quadpop",
"popcnt",
"insbf",
"extbf",
- "texbar",
+ "permt",
+ "atom",
+ "bar",
+ "vadd",
+ "vavg",
+ "vmin",
+ "vmax",
+ "vsad",
+ "vset",
+ "vshr",
+ "vshl",
+ "vsel",
"(invalid)"
};
+static const char *atomSubOpStr[] =
+{
+ "add", "min", "max", "inc", "dec", "and", "or", "xor", "cas", "exch"
+};
+
static const char *DataTypeStr[] =
{
"-",
@@ -488,8 +514,17 @@ void Instruction::print() const
PRINT("%s ", operationStr[op]);
if (op == OP_LINTERP || op == OP_PINTERP)
PRINT("%s ", interpStr[ipa]);
- if (subOp)
- PRINT("(SUBOP:%u) ", subOp);
+ switch (op) {
+ case OP_SUREDP:
+ case OP_ATOM:
+ if (subOp < Elements(atomSubOpStr))
+ PRINT("%s ", atomSubOpStr[subOp]);
+ break;
+ default:
+ if (subOp)
+ PRINT("(SUBOP:%u) ", subOp);
+ break;
+ }
if (perPatch)
PRINT("patch ");
if (asTex())
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
index be6a276ac9b..7642c5d16d6 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
@@ -44,11 +44,16 @@ const uint8_t Target::operationSrcNr[OP_LAST + 1] =
1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
1, 1, // EMIT, RESTART
1, 1, 1, // TEX, TXB, TXL,
- 1, 1, 1, 1, 1, // TXF, TXQ, TXD, TXG, TEXCSAA
- 1, 2, // SULD, SUST
+ 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TEXCSAA, TEXPREP
+ 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
+ 3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP
+ 0, // TEXBAR
1, 1, // DFDX, DFDY
- 1, 2, 2, 2, 0, 0, // RDSV, WRSV, TEXPREP, QUADOP, QUADON, QUADPOP
- 2, 3, 2, 0, // POPCNT, INSBF, EXTBF, TEXBAR
+ 1, 2, 2, 0, 0, // RDSV, WRSV, QUADOP, QUADON, QUADPOP
+ 2, 3, 2, 3, // POPCNT, INSBF, EXTBF, PERMT
+ 2, 2, // ATOM, BAR
+ 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
+ 2, 2, 2, // VSHR, VSHL, VSEL
0
};
@@ -89,25 +94,37 @@ const OpClass Target::operationClass[OP_LAST + 1] =
// DISCARD, EXIT
OPCLASS_FLOW, OPCLASS_FLOW,
// MEMBAR
- OPCLASS_OTHER,
+ OPCLASS_CONTROL,
// VFETCH, PFETCH, EXPORT
OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
// LINTERP, PINTERP
OPCLASS_SFU, OPCLASS_SFU,
// EMIT, RESTART
- OPCLASS_OTHER, OPCLASS_OTHER,
- // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA
+ OPCLASS_CONTROL, OPCLASS_CONTROL,
+ // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA; TEXPREP
OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
- // SULD, SUST
- OPCLASS_SURFACE, OPCLASS_SURFACE,
- // DFDX, DFDY, RDSV, WRSV; TEXPREP, QUADOP, QUADON, QUADPOP
- OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
- OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
- // POPCNT, INSBF, EXTBF
- OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
+ OPCLASS_TEXTURE,
+ // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
+ OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
+ OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
+ // SUBFM, SUCLAMP, SUEAU, MADSP
+ OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
// TEXBAR
OPCLASS_OTHER,
+ // DFDX, DFDY, RDSV, WRSV; QUADOP, QUADON, QUADPOP
+ OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
+ OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
+ // POPCNT, INSBF, EXTBF, PERMT
+ OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
+ // ATOM, BAR
+ OPCLASS_ATOMIC, OPCLASS_CONTROL,
+ // VADD, VAVG, VMIN, VMAX
+ OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
+ // VSAD, VSET, VSHR, VSHL
+ OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
+ // VSEL
+ OPCLASS_VECTOR,
OPCLASS_PSEUDO // LAST
};
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.h b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h
index 304dda4f865..d312685a708 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.h
@@ -111,7 +111,10 @@ enum OpClass
OPCLASS_SURFACE = 11,
OPCLASS_FLOW = 12,
OPCLASS_PSEUDO = 14,
- OPCLASS_OTHER = 15
+ OPCLASS_VECTOR = 15,
+ OPCLASS_BITFIELD = 16,
+ OPCLASS_CONTROL = 17,
+ OPCLASS_OTHER = 18
};
class Target
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp
index 6b4175edfcf..db09cb3dbae 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp
@@ -113,19 +113,20 @@ void TargetNV50::initOpInfo()
static const uint32_t commutative[(OP_LAST + 31) / 32] =
{
// ADD,MAD,MUL,AND,OR,XOR,MAX,MIN
- 0x0670ca00, 0x0000003f, 0x00000000
+ 0x0670ca00, 0x0000003f, 0x00000000, 0x00000000
};
static const uint32_t shortForm[(OP_LAST + 31) / 32] =
{
// MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF
- 0x00010e40, 0x00000040, 0x00000498
+ 0x00010e40, 0x00000040, 0x00000498, 0x00000000
};
static const operation noDestList[] =
{
OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
- OP_QUADON, OP_QUADPOP
+ OP_QUADON, OP_QUADPOP, OP_TEXBAR, OP_SUSTB, OP_SUSTP, OP_SUREDP,
+ OP_SUREDB, OP_BAR
};
static const operation noPredList[] =
{