summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nv50/codegen
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2012-04-14 23:56:56 +0200
committerChristoph Bumiller <[email protected]>2012-04-15 00:08:51 +0200
commite44089b2f79aa2dcaacf348911433d1e21235c0c (patch)
tree955d621392f0068ef8e3c98dc46195ff3916525e /src/gallium/drivers/nv50/codegen
parent69a921892d2303f1400576aa73980c28880f8654 (diff)
nvc0: add initial support for nve4+ (Kepler) chipsets
Most things that work on Fermi should work on Kepler too. There are a few performance optimizations left to do, like better placement of texture barriers and adding scheduling data to the shader instructions (without them, a thread group will be masked for 32 cycles after each single instruction issue).
Diffstat (limited to 'src/gallium/drivers/nv50/codegen')
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.h2
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_driver.h2
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp6
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h1
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp1
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp3
6 files changed, 11 insertions, 4 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h
index 6ec4fc95441..c299cab3f52 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -131,6 +131,7 @@ enum operation
OP_POPCNT, // bitcount(src0 & src1)
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
OP_EXTBF,
+ OP_TEXBAR,
OP_LAST
};
@@ -141,6 +142,7 @@ enum operation
#define NV50_IR_SUBOP_LDC_ISL 3
#define NV50_IR_SUBOP_SHIFT_WRAP 1
#define NV50_IR_SUBOP_EMU_PRERET 1
+#define NV50_IR_SUBOP_TEXBAR(n) n
enum DataType
{
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
index e734c5b03bd..9632986fe40 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
@@ -163,6 +163,8 @@ struct nv50_ir_prog_info
uint8_t clipDistanceMask; /* mask of clip distances defined */
uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */
int8_t genUserClip; /* request user clip planes for ClipVertex */
+ uint16_t ucpBase; /* base address for UCPs */
+ uint8_t ucpBinding; /* constant buffer index of UCP data */
uint8_t pointSize; /* output index for PointSize */
uint8_t instanceId; /* system value index of InstanceID */
uint8_t vertexId; /* system value index of VertexID */
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
index 4530dc23715..8bd784fa47d 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
@@ -2250,9 +2250,9 @@ Converter::handleUserClipPlanes()
for (c = 0; c < 4; ++c) {
for (i = 0; i < info->io.genUserClip; ++i) {
- Value *ucp;
- ucp = mkLoad(TYPE_F32, mkSymbol(FILE_MEMORY_CONST, 15, TYPE_F32,
- i * 16 + c * 4), NULL);
+ Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpBinding,
+ TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
+ Value *ucp = mkLoad(TYPE_F32, sym, NULL);
if (c == 0)
res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
else
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
index 4ce9deb131f..93e502ea609 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
@@ -40,6 +40,7 @@ static inline bool isMemoryFile(DataFile f)
return (f >= FILE_MEMORY_CONST && f <= FILE_MEMORY_LOCAL);
}
+// contrary to asTex(), this will never include SULD/SUST
static inline bool isTextureOp(operation op)
{
return (op >= OP_TEX && op <= OP_TEXCSAA);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
index 45e61c5e58a..4652bb95f69 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
@@ -147,6 +147,7 @@ const char *operationStr[OP_LAST + 1] =
"popcnt",
"insbf",
"extbf",
+ "texbar",
"(invalid)"
};
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
index 27b9610ed52..e3eae69554c 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
@@ -48,7 +48,7 @@ const uint8_t Target::operationSrcNr[OP_LAST + 1] =
1, 2, // SULD, SUST
1, 1, // DFDX, DFDY
1, 2, 2, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
- 2, 3, 2, // POPCNT, INSBF, EXTBF
+ 2, 3, 2, 0, // POPCNT, INSBF, EXTBF, TEXBAR
0
};
@@ -61,6 +61,7 @@ Target *Target::create(unsigned int chipset)
switch (chipset & 0xf0) {
case 0xc0:
case 0xd0:
+ case 0xe0:
return getTargetNVC0(chipset);
case 0x50:
case 0x80: