summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2016-05-28 14:28:07 -0400
committerIlia Mirkin <[email protected]>2016-05-30 18:15:10 -0400
commit1f895caba0accc0af3e637d6193ac0b673ce98bc (patch)
treed5e5a40333339d1f84080ee5c6f14bb3a49eb16d
parent27a51ff9b420909334898785cf194b5998776e88 (diff)
nvc0/ir: limit max number of regs based on availability in SM
This effectively limits registers to 32 and 64 for fermi and kepler when 1024 threads are used, but allows the full amount to be used with smaller thread sizes. Signed-off-by: Ilia Mirkin <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]>
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp4
2 files changed, 4 insertions, 2 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index da2fa4bdf18..689fecfea44 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -455,7 +455,7 @@ NVC0LegalizePostRA::visit(Function *fn)
pOne = new_LValue(fn, FILE_PREDICATE);
carry = new_LValue(fn, FILE_FLAGS);
- rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR);
+ rZero->reg.data.id = (prog->getTarget()->getChipset() >= NVISA_GK20A_CHIPSET) ? 255 : 63;
carry->reg.data.id = 0;
pOne->reg.data.id = 7;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index fd0f8942caa..932ec397454 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -238,9 +238,11 @@ void TargetNVC0::initOpInfo()
unsigned int
TargetNVC0::getFileSize(DataFile file) const
{
+ const unsigned int gprs = (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63;
+ const unsigned int smregs = (chipset >= NVISA_GK104_CHIPSET) ? 65536 : 32768;
switch (file) {
case FILE_NULL: return 0;
- case FILE_GPR: return (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63;
+ case FILE_GPR: return MIN2(gprs, smregs / threads);
case FILE_PREDICATE: return 7;
case FILE_FLAGS: return 1;
case FILE_ADDRESS: return 0;