aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2013-03-09 12:11:38 +0100
committerChristoph Bumiller <[email protected]>2013-03-12 12:55:37 +0100
commitae59a7d35d2b6f23634617dc91e1baf85c9d6d81 (patch)
tree371866130026989f2596c33a8708a74c10c93170
parente066f2f62f6043d43385bcdce4e7fa07ffa3ecbe (diff)
nvc0: they removed the NTID,NCTAID,GRIDID registers on nve4
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_driver.h1
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp25
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp4
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.c1
-rw-r--r--src/gallium/drivers/nvc0/nve4_compute.c32
-rw-r--r--src/gallium/drivers/nvc0/nve4_compute.h26
6 files changed, 66 insertions, 23 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
index 7bdd4b9bb65..c5a5b23c320 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
@@ -164,6 +164,7 @@ struct nv50_ir_prog_info
struct {
uint32_t inputOffset; /* base address for user args */
uint32_t sharedOffset; /* reserved space in s[] */
+ uint32_t gridInfoBase; /* base address for NTID,NCTAID */
} cp;
} prop;
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
index a82465af17f..5f0f2e72231 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1304,14 +1304,22 @@ bool
NVC0LoweringPass::handleRDSV(Instruction *i)
{
Symbol *sym = i->getSrc(0)->asSym();
+ const SVSemantic sv = sym->reg.data.sv.sv;
Value *vtx = NULL;
Instruction *ld;
uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym);
- if (addr >= 0x400) // mov $sreg
+ if (addr >= 0x400) {
+ // mov $sreg
+ if (sym->reg.data.sv.index == 3) {
+ // TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID
+ i->op = OP_MOV;
+ i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0));
+ }
return true;
+ }
- switch (i->getSrc(0)->reg.data.sv.sv) {
+ switch (sv) {
case SV_POSITION:
assert(prog->getType() == Program::TYPE_FRAGMENT);
bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL);
@@ -1330,6 +1338,19 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL);
readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index);
break;
+ case SV_NTID:
+ case SV_NCTAID:
+ case SV_GRIDID:
+ assert(targ->getChipset() >= NVISA_GK104_CHIPSET); // mov $sreg otherwise
+ if (sym->reg.data.sv.index == 3) {
+ i->op = OP_MOV;
+ i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1));
+ return true;
+ }
+ addr += prog->driver->prop.cp.gridInfoBase;
+ bld.mkLoad(TYPE_U32, i->getDef(0),
+ bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL);
+ break;
default:
if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
index 3aa29e222a1..7f1ac5d539d 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
@@ -260,6 +260,7 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
const SVSemantic sv = sym->reg.data.sv.sv;
const bool isInput = shaderFile == FILE_SHADER_INPUT;
+ const bool kepler = getChipset() >= NVISA_GK104_CHIPSET;
switch (sv) {
case SV_POSITION: return 0x070 + idx * 4;
@@ -274,6 +275,9 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
case SV_FACE: return 0x3fc;
case SV_TESS_FACTOR: return 0x000 + idx * 4;
case SV_TESS_COORD: return 0x2f0 + idx * 4;
+ case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0;
+ case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0;
+ case SV_GRIDID: return kepler ? 0x18 : ~0;
default:
return 0xffffffff;
}
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
index 592d338f446..2f4eae8f052 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nvc0/nvc0_program.c
@@ -573,6 +573,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
info->io.resInfoCBSlot = 0;
info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
+ info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
}
info->io.msInfoCBSlot = 0;
info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
diff --git a/src/gallium/drivers/nvc0/nve4_compute.c b/src/gallium/drivers/nvc0/nve4_compute.c
index e823d210952..b5344e4eed6 100644
--- a/src/gallium/drivers/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nvc0/nve4_compute.c
@@ -335,24 +335,36 @@ nve4_compute_state_validate(struct nvc0_context *nvc0)
static void
-nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input)
+nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
+ const uint *block_layout,
+ const uint *grid_layout)
{
struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *cp = nvc0->compprog;
- if (!cp->parm_size)
- return;
-
+ if (cp->parm_size) {
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset);
+ PUSH_DATA (push, screen->parm->offset);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
+ PUSH_DATA (push, cp->parm_size);
+ PUSH_DATA (push, 0x1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
+ PUSH_DATAp(push, input, cp->parm_size / 4);
+ }
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->parm->offset);
- PUSH_DATA (push, screen->parm->offset);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
- PUSH_DATA (push, cp->parm_size);
+ PUSH_DATA (push, 7 * 4);
PUSH_DATA (push, 0x1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + 7);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
- PUSH_DATAp(push, input, cp->parm_size / 4);
+ PUSH_DATAp(push, block_layout, 3);
+ PUSH_DATAp(push, grid_layout, 3);
+ PUSH_DATA (push, 0);
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
@@ -448,7 +460,7 @@ nve4_launch_grid(struct pipe_context *pipe,
nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout);
nve4_compute_dump_launch_desc(desc);
- nve4_compute_upload_input(nvc0, input);
+ nve4_compute_upload_input(nvc0, input, block_layout, grid_layout);
/* upload descriptor and flush */
#if 0
diff --git a/src/gallium/drivers/nvc0/nve4_compute.h b/src/gallium/drivers/nvc0/nve4_compute.h
index 82a77480c35..11d89283cc0 100644
--- a/src/gallium/drivers/nvc0/nve4_compute.h
+++ b/src/gallium/drivers/nvc0/nve4_compute.h
@@ -7,17 +7,21 @@
/* Input space is implemented as c0[], to which we bind the screen->parm bo.
*/
-#define NVE4_CP_INPUT_USER 0x0000
-#define NVE4_CP_INPUT_USER_LIMIT 0x1000
-#define NVE4_CP_INPUT_TEX(i) (0x1020 + (i) * 4)
-#define NVE4_CP_INPUT_TEX_STRIDE 4
-#define NVE4_CP_INPUT_TEX_MAX 32
-#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0
-#define NVE4_CP_INPUT_SUF_STRIDE 64
-#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
-#define NVE4_CP_INPUT_SUF_MAX 32
-#define NVE4_CP_INPUT_SIZE 0x1900
-#define NVE4_CP_PARAM_SIZE 0x2000
+#define NVE4_CP_INPUT_USER 0x0000
+#define NVE4_CP_INPUT_USER_LIMIT 0x1000
+#define NVE4_CP_INPUT_GRID_INFO(i) (0x1000 + (i) * 4)
+#define NVE4_CP_INPUT_NTID(i) (0x1000 + (i) * 4)
+#define NVE4_CP_INPUT_NCTAID(i) (0x100c + (i) * 4)
+#define NVE4_CP_INPUT_GRIDID 0x1018
+#define NVE4_CP_INPUT_TEX(i) (0x1040 + (i) * 4)
+#define NVE4_CP_INPUT_TEX_STRIDE 4
+#define NVE4_CP_INPUT_TEX_MAX 32
+#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0
+#define NVE4_CP_INPUT_SUF_STRIDE 64
+#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
+#define NVE4_CP_INPUT_SUF_MAX 32
+#define NVE4_CP_INPUT_SIZE 0x1900
+#define NVE4_CP_PARAM_SIZE 0x2000
struct nve4_cp_launch_desc
{