summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/nv40/nv40_state.h1
-rw-r--r--src/gallium/drivers/nv40/nv40_vertprog.c106
2 files changed, 104 insertions, 3 deletions
diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h
index ab2866eb7a5..e018464c9f8 100644
--- a/src/gallium/drivers/nv40/nv40_state.h
+++ b/src/gallium/drivers/nv40/nv40_state.h
@@ -42,6 +42,7 @@ struct nv40_vertex_program {
uint32_t ir;
uint32_t or;
+ uint32_t clip_ctrl;
struct nouveau_stateobj *so;
};
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index 40ef7174a41..5906280e5e1 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -37,6 +37,8 @@
#define neg(s) nv40_sr_neg((s))
#define abs(s) nv40_sr_abs((s))
+#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
+
struct nv40_vpc {
struct nv40_vertex_program *vp;
@@ -200,6 +202,36 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ case NV40_VP_INST_DEST_CLIP(0):
+ vp->or |= (1 << 6);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(1):
+ vp->or |= (1 << 7);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(2):
+ vp->or |= (1 << 8);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(3):
+ vp->or |= (1 << 9);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ case NV40_VP_INST_DEST_CLIP(4):
+ vp->or |= (1 << 10);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ case NV40_VP_INST_DEST_CLIP(5):
+ vp->or |= (1 << 11);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
default:
break;
}
@@ -391,6 +423,11 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
int ai = -1, ci = -1, ii = -1;
int i;
+ struct {
+ struct nv40_sreg dst;
+ unsigned c, m;
+ } clip;
+
if (finst->Instruction.Opcode == TGSI_OPCODE_END)
return TRUE;
@@ -464,6 +501,51 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ /* If writing to clip distance regs, need to modify instruction to
+ * change which component is written to. On NV40 the clip regs
+ * are the unused components (yzw) of FOGC/PSZ
+ */
+ clip.dst = none;
+ if (dst.type == NV40SR_OUTPUT &&
+ dst.index >= NV40_VP_INST_DEST_CLIP(0) &&
+ dst.index <= NV40_VP_INST_DEST_CLIP(5)) {
+ unsigned n = dst.index - NV40_VP_INST_DEST_CLIP(0);
+ unsigned c[] = { SWZ_Y, SWZ_Z, SWZ_W, SWZ_Y, SWZ_Z, SWZ_W };
+ unsigned m[] =
+ { MASK_Y, MASK_Z, MASK_W, MASK_Y, MASK_Z, MASK_W };
+
+ /* Some instructions we can get away with swizzling and/or
+ * changing the writemask. Others, we'll use a temp reg.
+ */
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_DST:
+ case TGSI_OPCODE_EXP:
+ case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_LOG:
+ case TGSI_OPCODE_XPD:
+ clip.dst = dst;
+ clip.c = c[n];
+ clip.m = m[n];
+ dst = temp(vpc);
+ break;
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ mask = m[n];
+ break;
+ default:
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ src[i] = nv40_sr_swz(src[i],
+ c[n], c[n], c[n], c[n]);
+ }
+ mask = m[n];
+ break;
+ }
+ }
+
switch (finst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
@@ -561,6 +643,12 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
return FALSE;
}
+ if (clip.dst.type != NV40SR_NONE) {
+ arith(vpc, 0, OP_MOV, clip.dst, clip.m,
+ nv40_sr_swz(dst, clip.c, clip.c, clip.c, clip.c),
+ none, none);
+ }
+
release_temps(vpc);
return TRUE;
}
@@ -612,6 +700,15 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
return FALSE;
}
break;
+#if 0
+ case TGSI_SEMANTIC_CLIP:
+ if (fdec->Semantic.SemanticIndex >= 6) {
+ NOUVEAU_ERR("bad clip distance index\n");
+ return FALSE;
+ }
+ hw = NV40_VP_INST_DEST_CLIP(fdec->Semantic.SemanticIndex);
+ break;
+#endif
default:
NOUVEAU_ERR("bad output semantic\n");
return FALSE;
@@ -782,6 +879,7 @@ nv40_vertprog_validate(struct nv40_context *nv40)
{
struct nouveau_winsys *nvws = nv40->nvws;
struct pipe_winsys *ws = nv40->pipe.winsys;
+ struct nouveau_grobj *curie = nv40->screen->curie;
struct nv40_vertex_program *vp;
struct pipe_buffer *constbuf;
boolean upload_code = FALSE, upload_data = FALSE;
@@ -825,12 +923,14 @@ check_gpu_resources:
assert(0);
}
- so = so_new(5, 0);
- so_method(so, nv40->screen->curie, NV40TCL_VP_START_FROM_ID, 1);
+ so = so_new(7, 0);
+ so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1);
so_data (so, vp->exec->start);
- so_method(so, nv40->screen->curie, NV40TCL_VP_ATTRIB_EN, 2);
+ so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2);
so_data (so, vp->ir);
so_data (so, vp->or);
+ so_method(so, curie, NV40TCL_CLIP_PLANE_ENABLE, 1);
+ so_data (so, vp->clip_ctrl);
so_ref(so, &vp->so);
upload_code = TRUE;