summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2017-01-25 22:16:56 -0500
committerIlia Mirkin <[email protected]>2017-02-09 15:15:36 -0500
commite4a698cb97224ef22469b0d8fd703cf164d380f1 (patch)
tree3bd121d85c5453d858e6bb5fad10b39c73a98c50
parent1acdd62847cf0da8a8e9c7915d698208d73a5be8 (diff)
nv50/ir: always return 0 when trying to read thread id along unit dim
Many many many compute shaders only define a 1- or 2-dimensional block, but then continue to use system values that take the full 3d into account (like gl_LocalInvocationIndex, etc). So for the special case that a dimension is exactly 1, we know that the thread id along that axis will always be 0, so return it as such and allow constant folding to fix things up. Signed-off-by: Ilia Mirkin <[email protected]> Reviewed-by: Pierre Moreau <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]>
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.cpp6
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp10
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target.h4
4 files changed, 17 insertions, 5 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index 186c9fdbedf..b67a1ddbd59 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1179,7 +1179,11 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
info->prop.gp.instanceCount = 1;
info->prop.gp.maxVertices = 1;
}
- info->prop.cp.numThreads = 1;
+ if (info->type == PIPE_SHADER_COMPUTE) {
+ info->prop.cp.numThreads[0] =
+ info->prop.cp.numThreads[1] =
+ info->prop.cp.numThreads[2] = 1;
+ }
info->io.pointSize = 0xff;
info->io.instanceId = 0xff;
info->io.vertexId = 0xff;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 65d0904d0f1..e7d840df00a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -152,7 +152,7 @@ struct nv50_ir_prog_info
uint32_t inputOffset; /* base address for user args */
uint32_t sharedOffset; /* reserved space in s[] */
uint32_t gridInfoBase; /* base address for NTID,NCTAID */
- uint32_t numThreads; /* max number of threads */
+ uint16_t numThreads[3]; /* max number of threads */
} cp;
} prop;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 80cc7fa01ac..a376b1dcc7d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1140,7 +1140,6 @@ bool Source::scanSource()
}
info->io.viewportId = -1;
- info->prop.cp.numThreads = 1;
info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
@@ -1243,9 +1242,13 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
break;
case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
+ info->prop.cp.numThreads[0] = prop->u[0].Data;
+ break;
case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
+ info->prop.cp.numThreads[1] = prop->u[0].Data;
+ break;
case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
- info->prop.cp.numThreads *= prop->u[0].Data;
+ info->prop.cp.numThreads[2] = prop->u[0].Data;
break;
case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
info->io.clipDistances = prop->u[0].Data;
@@ -2034,6 +2037,9 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
return ld->getDef(0);
case TGSI_FILE_SYSTEM_VALUE:
assert(!ptr);
+ if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID &&
+ info->prop.cp.numThreads[swz] == 1)
+ return zero;
ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
ld->perPatch = info->sv[idx].patch;
return ld->getDef(0);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
index eaf50cc19be..e9d10574835 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
@@ -174,7 +174,9 @@ public:
virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const = 0;
virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) {
- threads = info->prop.cp.numThreads;
+ threads = info->prop.cp.numThreads[0] *
+ info->prop.cp.numThreads[1] *
+ info->prop.cp.numThreads[2];
if (threads == 0)
threads = info->target >= NVISA_GK104_CHIPSET ? 1024 : 512;
}