summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2016-09-07 00:12:51 +0200
committerSamuel Pitoiset <[email protected]>2016-10-07 00:18:57 +0200
commit11e75fffeb4afc5be0021477f11e5a18a6ff6abf (patch)
treeb48f069cb7e75320e72238436d89caa3985926ee
parent590734fa0d2c97035eae33278cce6065c98b880f (diff)
nv50/ir: set number of threads/block for variable local size
When a variable local size is defined as specified by ARB_compute_variable_group_size, the fixed local size is set to 0 and a SIGFPE occurs when we compute the maximum number of regs. This allows to use 64 GPRs/thread. v4: - use 512 threads on Fermi, 1024 on Kepler+ Signed-off-by: Samuel Pitoiset <[email protected]>
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target.h2
1 files changed, 2 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
index 4a701f7cb9d..eaf50cc19be 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
@@ -175,6 +175,8 @@ public:
virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) {
threads = info->prop.cp.numThreads;
+ if (threads == 0)
+ threads = info->target >= NVISA_GK104_CHIPSET ? 1024 : 512;
}
virtual bool runLegalizePass(Program *, CGStage stage) const = 0;