summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPierre Moreau <[email protected]>2017-10-02 20:57:10 +0200
committerIlia Mirkin <[email protected]>2017-11-04 14:12:07 -0400
commitefe532b73934299bb6eeeecf0aaab7145317fd51 (patch)
tree09c98957c67bfc06dfea885072c894b5bead5af5
parent49752e99f8f109a1b1adc6740c4e3e2a50d0e09b (diff)
nv50,nvc0: Copy shared memory per block to the program info structure and back
In OpenCL/CUDA kernels, shared memory usage can be defined within the kernel code. Those usage will only be picked up while parsing the SPIR-V, during the translation phase of the program. Signed-off-by: Pierre Moreau <[email protected]>
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c2
2 files changed, 4 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 92e73f8c12c..6b472d7fdd0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -336,6 +336,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
info->bin.sourceRep = PIPE_SHADER_IR_TGSI;
info->bin.source = (void *)prog->pipe.tokens;
+ info->bin.smemSize = prog->cp.smem_size;
info->io.auxCBSlot = 15;
info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
info->io.genUserClip = prog->vp.clpd_nr;
@@ -382,6 +383,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
prog->interps = info->bin.fixupData;
prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
prog->tls_space = info->bin.tlsSpace;
+ prog->cp.smem_size = info->bin.smemSize;
prog->mul_zero_wins = info->io.mul_zero_wins;
prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index e43a8de9f59..a6112f401ef 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -579,6 +579,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->optLevel = 3;
#endif
+ info->bin.smemSize = prog->cp.smem_size;
info->io.genUserClip = prog->vp.num_ucps;
info->io.auxCBSlot = 15;
info->io.msInfoCBSlot = 15;
@@ -618,6 +619,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
prog->relocs = info->bin.relocData;
prog->fixups = info->bin.fixupData;
prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
+ prog->cp.smem_size = info->bin.smemSize;
prog->num_barriers = info->numBarriers;
prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;