summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2013-02-23 00:39:23 +0100
committerChristoph Bumiller <[email protected]>2013-03-12 12:55:35 +0100
commitc2dfcd7f0eaf4eda375eb320e0e87793a80ef92d (patch)
treefc72ee7b904fad16fb44b7c991306bfd604166aa
parentd105b3df14283a4dd80cecc1e6cab58432368ef6 (diff)
nv50/ir/tgsi: handle TGSI_OPCODE_LOAD,STORE
Squashed and (heavily) modified original patches by Francisco Jerez: nv50/ir/tgsi: Implement resource LOAD/STORE (wip). nv50/ir/tgsi: Emit SUST/SULD for surface access, and add CB LOAD/STORE support nv50/ir/tgsi: Fix/clean up the LOAD/STORE handling code. Left out for now: nv50/ir/tgsi: Resource indirect indexing Treating raw, read-only surfaces as constant buffers (CBs) was removed because CBs are limited to a size of 64 KiB which isn't desireable, and because this decision should probably be made by the state tracker. If we used a number of CB slots for surfaces, it might find that we cannot accomodate the advertised limit.
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.h4
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp12
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h6
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_driver.h5
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp298
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp4
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp4
7 files changed, 303 insertions, 30 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h
index bdea48bbdf3..dd7ff90de96 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -871,9 +871,9 @@ public:
struct {
Target target;
- uint8_t r;
+ uint16_t r;
+ uint16_t s;
int8_t rIndirectSrc;
- uint8_t s;
int8_t sIndirectSrc;
uint8_t mask;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
index 4448e8299d2..0af2c61b3e5 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
@@ -240,15 +240,17 @@ BuildUtil::mkCmp(operation op, CondCode cc, DataType ty, Value *dst,
return insn;
}
-Instruction *
-BuildUtil::mkTex(operation op, TexTarget targ, uint8_t tic, uint8_t tsc,
- Value **def, Value **src)
+TexInstruction *
+BuildUtil::mkTex(operation op, TexTarget targ,
+ uint16_t tic, uint16_t tsc,
+ const std::vector<Value *> &def,
+ const std::vector<Value *> &src)
{
TexInstruction *tex = new_TexInstruction(func, op);
- for (int d = 0; d < 4 && def[d]; ++d)
+ for (size_t d = 0; d < def.size() && def[d]; ++d)
tex->setDef(d, def[d]);
- for (int s = 0; s < 4 && src[s]; ++s)
+ for (size_t s = 0; s < src.size() && src[s]; ++s)
tex->setSrc(s, src[s]);
tex->setTexture(targ, tic, tsc);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
index 963c3505083..f48dbc21168 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
@@ -75,8 +75,10 @@ public:
CmpInstruction *mkCmp(operation, CondCode, DataType,
Value *,
Value *, Value *, Value * = NULL);
- Instruction *mkTex(operation, TexTarget, uint8_t tic, uint8_t tsc,
- Value **def, Value **src);
+ TexInstruction *mkTex(operation, TexTarget,
+ uint16_t tic, uint16_t tsc,
+ const std::vector<Value *> &def,
+ const std::vector<Value *> &src);
Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *);
FlowInstruction *mkFlow(operation, void *target, CondCode, Value *pred);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
index deee60cd6a7..933a5e106ac 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
@@ -161,6 +161,10 @@ struct nv50_ir_prog_info
boolean separateFragData;
boolean usesDiscard;
} fp;
+ struct {
+ uint32_t inputOffset; /* base address for user args */
+ uint32_t sharedOffset; /* reserved space in s[] */
+ } cp;
} prop;
struct {
@@ -179,6 +183,7 @@ struct nv50_ir_prog_info
uint8_t sampleMask; /* output index of SampleMask */
uint8_t backFaceColor[2]; /* input/output indices of back face colour */
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
+ boolean nv50styleSurfaces; /* generate gX[] access for raw buffers */
uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */
uint16_t texBindBase; /* base address for tex handles (nve4) */
uint16_t suInfoBase; /* base address for surface info (nve4) */
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
index 69c05c1464c..afbabfde23d 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
@@ -559,7 +559,6 @@ static nv50_ir::operation translateOpcode(uint opcode)
NV50_IR_OPCODE_CASE(USLT, SET);
NV50_IR_OPCODE_CASE(USNE, SET);
- NV50_IR_OPCODE_CASE(LOAD, TXF);
NV50_IR_OPCODE_CASE(SAMPLE, TEX);
NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
@@ -620,8 +619,17 @@ public:
int clipVertexOutput;
- uint8_t *samplerViewTargets; // TGSI_TEXTURE_*
- unsigned samplerViewCount;
+ struct TextureView {
+ uint8_t target; // TGSI_TEXTURE_*
+ };
+ std::vector<TextureView> textureViews;
+
+ struct Resource {
+ uint8_t target; // TGSI_TEXTURE_*
+ bool raw;
+ uint8_t slot; // $surface index
+ };
+ std::vector<Resource> resources;
private:
int inferSysValDirection(unsigned sn) const;
@@ -640,8 +648,6 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
tgsi_dump(tokens, 0);
- samplerViewTargets = NULL;
-
mainTempsInLMem = FALSE;
}
@@ -654,9 +660,6 @@ Source::~Source()
FREE(info->immd.data);
if (info->immd.type)
FREE(info->immd.type);
-
- if (samplerViewTargets)
- delete[] samplerViewTargets;
}
bool Source::scanSource()
@@ -673,8 +676,8 @@ bool Source::scanSource()
clipVertexOutput = -1;
- samplerViewCount = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
- samplerViewTargets = new uint8_t[samplerViewCount];
+ textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
+ resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
info->immd.bufSize = 0;
tempArrayCount = 0;
@@ -899,9 +902,16 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
info->sv[i].input = inferSysValDirection(sn);
}
break;
+ case TGSI_FILE_RESOURCE:
+ for (i = first; i <= last; ++i) {
+ resources[i].target = decl->Resource.Resource;
+ resources[i].raw = decl->Resource.Raw;
+ resources[i].slot = i;
+ }
+ break;
case TGSI_FILE_SAMPLER_VIEW:
for (i = first; i <= last; ++i)
- samplerViewTargets[i] = decl->SamplerView.Resource;
+ textureViews[i].target = decl->SamplerView.Resource;
break;
case TGSI_FILE_IMMEDIATE_ARRAY:
{
@@ -997,9 +1007,15 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
for (unsigned s = 0; s < insn.srcCount(); ++s) {
Instruction::SrcRegister src = insn.getSrc(s);
- if (src.getFile() == TGSI_FILE_TEMPORARY)
+ if (src.getFile() == TGSI_FILE_TEMPORARY) {
if (src.isIndirect(0))
mainTempsInLMem = TRUE;
+ } else
+ if (src.getFile() == TGSI_FILE_RESOURCE) {
+ if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
+ info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+ 0x1 : 0x2;
+ }
if (src.getFile() != TGSI_FILE_INPUT)
continue;
unsigned mask = insn.srcMask(s);
@@ -1025,13 +1041,16 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
nv50_ir::TexInstruction::Target
Instruction::getTexture(const tgsi::Source *code, int s) const
{
+ // XXX: indirect access
+ unsigned int r;
+
switch (getSrc(s).getFile()) {
- case TGSI_FILE_SAMPLER_VIEW: {
- // XXX: indirect access
- unsigned int r = getSrc(s).getIndex(0);
- assert(r < code->samplerViewCount);
- return translateTexture(code->samplerViewTargets[r]);
- }
+ case TGSI_FILE_RESOURCE:
+ r = getSrc(s).getIndex(0);
+ return translateTexture(code->resources.at(r).target);
+ case TGSI_FILE_SAMPLER_VIEW:
+ r = getSrc(s).getIndex(0);
+ return translateTexture(code->textureViews.at(r).target);
default:
return translateTexture(insn->Texture.Texture);
}
@@ -1091,6 +1110,12 @@ private:
void handleLIT(Value *dst0[4]);
void handleUserClipPlanes();
+ Symbol *getResourceBase(int r);
+ void getResourceCoords(std::vector<Value *>&, int r, int s);
+
+ void handleLOAD(Value *dst0[4]);
+ void handleSTORE();
+
Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
@@ -1710,6 +1735,236 @@ Converter::handleLIT(Value *dst0[4])
}
}
+static inline bool
+isResourceSpecial(const int r)
+{
+ return (r == TGSI_RESOURCE_GLOBAL ||
+ r == TGSI_RESOURCE_LOCAL ||
+ r == TGSI_RESOURCE_PRIVATE ||
+ r == TGSI_RESOURCE_INPUT);
+}
+
+static inline bool
+isResourceRaw(const struct tgsi::Source *code, const int r)
+{
+ return isResourceSpecial(r) || code->resources[r].raw;
+}
+
+static inline nv50_ir::TexTarget
+getResourceTarget(const struct tgsi::Source *code, int r)
+{
+ if (isResourceSpecial(r))
+ return nv50_ir::TEX_TARGET_BUFFER;
+ return tgsi::translateTexture(code->resources.at(r).target);
+}
+
+Symbol *
+Converter::getResourceBase(const int r)
+{
+ Symbol *sym = NULL;
+
+ switch (r) {
+ case TGSI_RESOURCE_GLOBAL:
+ sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15);
+ break;
+ case TGSI_RESOURCE_LOCAL:
+ assert(prog->getType() == Program::TYPE_COMPUTE);
+ sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,
+ info->prop.cp.sharedOffset);
+ break;
+ case TGSI_RESOURCE_PRIVATE:
+ sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,
+ info->bin.tlsSpace);
+ break;
+ case TGSI_RESOURCE_INPUT:
+ assert(prog->getType() == Program::TYPE_COMPUTE);
+ sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,
+ info->prop.cp.inputOffset);
+ break;
+ default:
+ sym = new_Symbol(prog,
+ nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);
+ break;
+ }
+ return sym;
+}
+
+void
+Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s)
+{
+ const int arg =
+ TexInstruction::Target(getResourceTarget(code, r)).getArgCount();
+
+ for (int c = 0; c < arg; ++c)
+ coords.push_back(fetchSrc(s, c));
+
+ // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk
+ if (r == TGSI_RESOURCE_LOCAL ||
+ r == TGSI_RESOURCE_PRIVATE ||
+ r == TGSI_RESOURCE_INPUT)
+ coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
+ coords[0]);
+}
+
+static inline int
+partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
+{
+ int n = 0;
+
+ while (mask) {
+ if (mask & 1) {
+ size[n]++;
+ } else {
+ if (size[n])
+ comp[n = 1] = size[0] + 1;
+ else
+ comp[n]++;
+ }
+ mask >>= 1;
+ }
+ if (size[0] == 3) {
+ n = 1;
+ size[0] = (comp[0] == 1) ? 1 : 2;
+ size[1] = 3 - size[0];
+ comp[1] = comp[0] + size[0];
+ }
+ return n + 1;
+}
+
+// For raw loads, granularity is 4 byte.
+// Usage of the texture read mask on OP_SULDP is not allowed.
+void
+Converter::handleLOAD(Value *dst0[4])
+{
+ const int r = tgsi.getSrc(0).getIndex(0);
+ int c;
+ std::vector<Value *> off, src, ldv, def;
+
+ getResourceCoords(off, r, 1);
+
+ if (isResourceRaw(code, r)) {
+ uint8_t mask = 0;
+ uint8_t comp[2] = { 0, 0 };
+ uint8_t size[2] = { 0, 0 };
+
+ Symbol *base = getResourceBase(r);
+
+ // determine the base and size of the at most 2 load ops
+ for (c = 0; c < 4; ++c)
+ if (!tgsi.getDst(0).isMasked(c))
+ mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);
+
+ int n = partitionLoadStore(comp, size, mask);
+
+ src = off;
+
+ def.resize(4); // index by component, the ones we need will be non-NULL
+ for (c = 0; c < 4; ++c) {
+ if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))
+ def[c] = dst0[c];
+ else
+ if (mask & (1 << c))
+ def[c] = getScratch();
+ }
+
+ const bool useLd = isResourceSpecial(r) ||
+ (info->io.nv50styleSurfaces &&
+ code->resources[r].target == TGSI_TEXTURE_BUFFER);
+
+ for (int i = 0; i < n; ++i) {
+ ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);
+
+ if (comp[i]) // adjust x component of source address if necessary
+ src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
+ off[0], mkImm(comp[i] * 4));
+ else
+ src[0] = off[0];
+
+ if (useLd) {
+ Instruction *ld =
+ mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);
+ for (size_t c = 1; c < ldv.size(); ++c)
+ ld->setDef(c, ldv[c]);
+ } else {
+ mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,
+ 0, ldv, src)->dType = typeOfSize(size[i] * 4);
+ }
+ }
+ } else {
+ def.resize(4);
+ for (c = 0; c < 4; ++c) {
+ if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
+ def[c] = getScratch();
+ else
+ def[c] = dst0[c];
+ }
+
+ mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,
+ def, off);
+ }
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ if (dst0[c] != def[c])
+ mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
+}
+
+// For formatted stores, the write mask on OP_SUSTP can be used.
+// Raw stores have to be split.
+void
+Converter::handleSTORE()
+{
+ const int r = tgsi.getDst(0).getIndex(0);
+ int c;
+ std::vector<Value *> off, src, dummy;
+
+ getResourceCoords(off, r, 0);
+ src = off;
+ const int s = src.size();
+
+ if (isResourceRaw(code, r)) {
+ uint8_t comp[2] = { 0, 0 };
+ uint8_t size[2] = { 0, 0 };
+
+ int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());
+
+ Symbol *base = getResourceBase(r);
+
+ const bool useSt = isResourceSpecial(r) ||
+ (info->io.nv50styleSurfaces &&
+ code->resources[r].target == TGSI_TEXTURE_BUFFER);
+
+ for (int i = 0; i < n; ++i) {
+ if (comp[i]) // adjust x component of source address if necessary
+ src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
+ off[0], mkImm(comp[i] * 4));
+ else
+ src[0] = off[0];
+
+ const DataType stTy = typeOfSize(size[i] * 4);
+
+ if (useSt) {
+ Instruction *st =
+ mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));
+ for (c = 1; c < size[i]; ++c)
+ st->setSrc(1 + c, fetchSrc(1, comp[i] + c));
+ st->setIndirect(0, 0, src[0]);
+ } else {
+ // attach values to be stored
+ src.resize(s + size[i]);
+ for (c = 0; c < size[i]; ++c)
+ src[s + c] = fetchSrc(1, comp[i] + c);
+ mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,
+ 0, dummy, src)->setType(stTy);
+ }
+ }
+ } else {
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
+ src.push_back(fetchSrc(1, c));
+
+ mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
+ dummy, src)->tex.mask = tgsi.getDst(0).getMask();
+ }
+}
+
Converter::Subroutine *
Converter::getSubroutine(unsigned ip)
{
@@ -2072,7 +2327,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
break;
case TGSI_OPCODE_TXF:
- case TGSI_OPCODE_LOAD:
handleTXF(dst0, 1);
break;
case TGSI_OPCODE_TXQ:
@@ -2257,6 +2511,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
ERROR("switch/case opcode encountered, should have been lowered\n");
abort();
break;
+ case TGSI_OPCODE_LOAD:
+ handleLOAD(dst0);
+ break;
+ case TGSI_OPCODE_STORE:
+ handleSTORE();
+ break;
default:
ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
assert(0);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
index db1306151ea..83f7201fc35 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
@@ -594,11 +594,13 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i)
i->setSrc(arg - 1, src);
if (i->tex.target.isCube()) {
- Value *acube[4], *a2d[4];
+ std::vector<Value *> acube, a2d;
int c;
+ acube.resize(4);
for (c = 0; c < 4; ++c)
acube[c] = i->getSrc(c);
+ a2d.resize(4);
for (c = 0; c < 3; ++c)
a2d[c] = new_LValue(func, FILE_GPR);
a2d[3] = NULL;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
index 00a80544c17..a2b61104f6f 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
@@ -532,7 +532,9 @@ void Instruction::print() const
if (perPatch)
PRINT("patch ");
if (asTex())
- PRINT("%s ", asTex()->tex.target.getName());
+ PRINT("%s %s$r%u $s%u %s", asTex()->tex.target.getName(),
+ colour[TXT_MEM], asTex()->tex.r, asTex()->tex.s,
+ colour[TXT_INSN]);
if (postFactor)
PRINT("x2^%i ", postFactor);
PRINT("%s%s", dnz ? "dnz " : (ftz ? "ftz " : ""), DataTypeStr[dType]);