From 1f895caba0accc0af3e637d6193ac0b673ce98bc Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 28 May 2016 14:28:07 -0400 Subject: nvc0/ir: limit max number of regs based on availability in SM This effectively limits registers to 32 and 64 for fermi and kepler when 1024 threads are used, but allows the full amount to be used with smaller thread sizes. Signed-off-by: Ilia Mirkin Reviewed-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 +- src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nouveau') diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index da2fa4bdf18..689fecfea44 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -455,7 +455,7 @@ NVC0LegalizePostRA::visit(Function *fn) pOne = new_LValue(fn, FILE_PREDICATE); carry = new_LValue(fn, FILE_FLAGS); - rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR); + rZero->reg.data.id = (prog->getTarget()->getChipset() >= NVISA_GK20A_CHIPSET) ? 255 : 63; carry->reg.data.id = 0; pOne->reg.data.id = 7; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index fd0f8942caa..932ec397454 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -238,9 +238,11 @@ void TargetNVC0::initOpInfo() unsigned int TargetNVC0::getFileSize(DataFile file) const { + const unsigned int gprs = (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63; + const unsigned int smregs = (chipset >= NVISA_GK104_CHIPSET) ? 65536 : 32768; switch (file) { case FILE_NULL: return 0; - case FILE_GPR: return (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63; + case FILE_GPR: return MIN2(gprs, smregs / threads); case FILE_PREDICATE: return 7; case FILE_FLAGS: return 1; case FILE_ADDRESS: return 0; -- cgit v1.2.3