From d9e396ce4a124529fa92ad967f2b3ff72534079b Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Sun, 21 Feb 2010 02:26:25 +0100
Subject: nv30, nv40: non-trivially partially unify nv[34]0_shader.h

shader.h is similar, except for the following differences:
1. The instruction sets are not exactly the same, but mostly similar
2. Vertex program fields are in different bit positions

This patch unifies all parts of nv[34]0_shader.h except the vertex
program fields.

Vertex opcodes are also changed so that the constant names includes
SCA if it is a scalar opcode and VEC if it is a vector opcode.
---
 src/gallium/drivers/nv40/nv40_draw.c     |   2 +-
 src/gallium/drivers/nv40/nv40_fragprog.c | 160 ++++++-------
 src/gallium/drivers/nv40/nv40_shader.h   | 380 +------------------------------
 src/gallium/drivers/nv40/nv40_vertprog.c | 166 +++++++-------
 4 files changed, 165 insertions(+), 543 deletions(-)

(limited to 'src/gallium/drivers/nv40')

diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c
index 87d2689d54b..4ed87779fd6 100644
--- a/src/gallium/drivers/nv40/nv40_draw.c
+++ b/src/gallium/drivers/nv40/nv40_draw.c
@@ -8,7 +8,7 @@
 #include "draw/draw_pipe.h"
 
 #include "nv40_context.h"
-#define NV40_SHADER_NO_FUCKEDNESS
+#define NVFX_SHADER_NO_FUCKEDNESS
 #include "nv40_shader.h"
 
 /* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 2a0ab0cf310..e044f367a0b 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -18,14 +18,14 @@
 #define MASK_Z 4
 #define MASK_W 8
 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
-#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X
-#define DEF_CTEST NV40_FP_OP_COND_TR
-#include "nv40_shader.h"
+#define DEF_SCALE NVFX_FP_OP_DST_SCALE_1X
+#define DEF_CTEST NVFX_FP_OP_COND_TR
+#include "nvfx_shader.h"
 
-#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
-#define neg(s) nv40_sr_neg((s))
-#define abs(s) nv40_sr_abs((s))
-#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v)
+#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nvfx_sr_neg((s))
+#define abs(s) nvfx_sr_abs((s))
+#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v)
 
 #define MAX_CONSTS 128
 #define MAX_IMM 32
@@ -36,8 +36,8 @@ struct nv40_fpc {
 
 	unsigned r_temps;
 	unsigned r_temps_discard;
-	struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
-	struct nv40_sreg *r_temp;
+	struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+	struct nvfx_sreg *r_temp;
 
 	int num_regs;
 
@@ -50,11 +50,11 @@ struct nv40_fpc {
 	} consts[MAX_CONSTS];
 	int nr_consts;
 
-	struct nv40_sreg imm[MAX_IMM];
+	struct nvfx_sreg imm[MAX_IMM];
 	unsigned nr_imm;
 };
 
-static INLINE struct nv40_sreg
+static INLINE struct nvfx_sreg
 temp(struct nv40_fpc *fpc)
 {
 	int idx = ffs(~fpc->r_temps) - 1;
@@ -62,12 +62,12 @@ temp(struct nv40_fpc *fpc)
 	if (idx < 0) {
 		NOUVEAU_ERR("out of temps!!\n");
 		assert(0);
-		return nv40_sr(NV40SR_TEMP, 0);
+		return nvfx_sr(NVFXSR_TEMP, 0);
 	}
 
 	fpc->r_temps |= (1 << idx);
 	fpc->r_temps_discard |= (1 << idx);
-	return nv40_sr(NV40SR_TEMP, idx);
+	return nvfx_sr(NVFXSR_TEMP, idx);
 }
 
 static INLINE void
@@ -77,7 +77,7 @@ release_temps(struct nv40_fpc *fpc)
 	fpc->r_temps_discard = 0;
 }
 
-static INLINE struct nv40_sreg
+static INLINE struct nvfx_sreg
 constant(struct nv40_fpc *fpc, int pipe, float vals[4])
 {
 	int idx;
@@ -89,14 +89,14 @@ constant(struct nv40_fpc *fpc, int pipe, float vals[4])
 	fpc->consts[idx].pipe = pipe;
 	if (pipe == -1)
 		memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
-	return nv40_sr(NV40SR_CONST, idx);
+	return nvfx_sr(NVFXSR_CONST, idx);
 }
 
 #define arith(cc,s,o,d,m,s0,s1,s2) \
-	nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \
+	nv40_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \
 			(d), (m), (s0), (s1), (s2))
 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
-	nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \
+	nv40_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \
 		    (d), (m), (s0), none, none)
 
 static void
@@ -109,25 +109,25 @@ grow_insns(struct nv40_fpc *fpc, int size)
 }
 
 static void
-emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
+emit_src(struct nv40_fpc *fpc, int pos, struct nvfx_sreg src)
 {
 	struct nvfx_fragment_program *fp = fpc->fp;
 	uint32_t *hw = &fp->insn[fpc->inst_offset];
 	uint32_t sr = 0;
 
 	switch (src.type) {
-	case NV40SR_INPUT:
-		sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
-		hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT);
+	case NVFXSR_INPUT:
+		sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
+		hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
 		break;
-	case NV40SR_OUTPUT:
-		sr |= NV40_FP_REG_SRC_HALF;
+	case NVFXSR_OUTPUT:
+		sr |= NVFX_FP_REG_SRC_HALF;
 		/* fall-through */
-	case NV40SR_TEMP:
-		sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT);
-		sr |= (src.index << NV40_FP_REG_SRC_SHIFT);
+	case NVFXSR_TEMP:
+		sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
+		sr |= (src.index << NVFX_FP_REG_SRC_SHIFT);
 		break;
-	case NV40SR_CONST:
+	case NVFXSR_CONST:
 		if (!fpc->have_const) {
 			grow_insns(fpc, 4);
 			fpc->have_const = 1;
@@ -149,61 +149,61 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
 				sizeof(uint32_t) * 4);
 		}
 
-		sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
+		sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
 		break;
-	case NV40SR_NONE:
-		sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
+	case NVFXSR_NONE:
+		sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
 		break;
 	default:
 		assert(0);
 	}
 
 	if (src.negate)
-		sr |= NV40_FP_REG_NEGATE;
+		sr |= NVFX_FP_REG_NEGATE;
 
 	if (src.abs)
 		hw[1] |= (1 << (29 + pos));
 
-	sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) |
-	       (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) |
-	       (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) |
-	       (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT));
+	sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
+	       (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
+	       (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
+	       (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));
 
 	hw[pos + 1] |= sr;
 }
 
 static void
-emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst)
+emit_dst(struct nv40_fpc *fpc, struct nvfx_sreg dst)
 {
 	struct nvfx_fragment_program *fp = fpc->fp;
 	uint32_t *hw = &fp->insn[fpc->inst_offset];
 
 	switch (dst.type) {
-	case NV40SR_TEMP:
+	case NVFXSR_TEMP:
 		if (fpc->num_regs < (dst.index + 1))
 			fpc->num_regs = dst.index + 1;
 		break;
-	case NV40SR_OUTPUT:
+	case NVFXSR_OUTPUT:
 		if (dst.index == 1) {
 			fp->fp_control |= 0xe;
 		} else {
-			hw[0] |= NV40_FP_OP_OUT_REG_HALF;
+			hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
 		}
 		break;
-	case NV40SR_NONE:
+	case NVFXSR_NONE:
 		hw[0] |= (1 << 30);
 		break;
 	default:
 		assert(0);
 	}
 
-	hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT);
+	hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
 }
 
 static void
 nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
-	      struct nv40_sreg dst, int mask,
-	      struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
+	      struct nvfx_sreg dst, int mask,
+	      struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
 {
 	struct nvfx_fragment_program *fp = fpc->fp;
 	uint32_t *hw;
@@ -214,22 +214,22 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
 	hw = &fp->insn[fpc->inst_offset];
 	memset(hw, 0, sizeof(uint32_t) * 4);
 
-	if (op == NV40_FP_OP_OPCODE_KIL)
+	if (op == NVFX_FP_OP_OPCODE_KIL)
 		fp->fp_control |= NV40TCL_FP_CONTROL_KIL;
-	hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT);
-	hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT);
-	hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT);
+	hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT);
+	hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT);
+	hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT);
 
 	if (sat)
-		hw[0] |= NV40_FP_OP_OUT_SAT;
+		hw[0] |= NVFX_FP_OP_OUT_SAT;
 
 	if (dst.cc_update)
-		hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE;
-	hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT);
-	hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) |
-		  (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) |
-		  (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) |
-		  (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT));
+		hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
+	hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT);
+	hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
+		  (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
+		  (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
+		  (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
 
 	emit_dst(fpc, dst);
 	emit_src(fpc, 0, s0);
@@ -239,25 +239,25 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
 
 static void
 nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit,
-	    struct nv40_sreg dst, int mask,
-	    struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
+	    struct nvfx_sreg dst, int mask,
+	    struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
 {
 	struct nvfx_fragment_program *fp = fpc->fp;
 
 	nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
 
-	fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT);
+	fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
 	fp->samplers |= (1 << unit);
 }
 
-static INLINE struct nv40_sreg
+static INLINE struct nvfx_sreg
 tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 {
-	struct nv40_sreg src;
+	struct nvfx_sreg src;
 
 	switch (fsrc->Register.File) {
 	case TGSI_FILE_INPUT:
-		src = nv40_sr(NV40SR_INPUT,
+		src = nvfx_sr(NVFXSR_INPUT,
 			      fpc->attrib_map[fsrc->Register.Index]);
 		break;
 	case TGSI_FILE_CONSTANT:
@@ -288,7 +288,7 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 	return src;
 }
 
-static INLINE struct nv40_sreg
+static INLINE struct nvfx_sreg
 tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
 	switch (fdst->Register.File) {
 	case TGSI_FILE_OUTPUT:
@@ -296,10 +296,10 @@ tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
 	case TGSI_FILE_TEMPORARY:
 		return fpc->r_temp[fdst->Register.Index];
 	case TGSI_FILE_NULL:
-		return nv40_sr(NV40SR_NONE, 0);
+		return nvfx_sr(NVFXSR_NONE, 0);
 	default:
 		NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
-		return nv40_sr(NV40SR_NONE, 0);
+		return nvfx_sr(NVFXSR_NONE, 0);
 	}
 }
 
@@ -317,10 +317,10 @@ tgsi_mask(uint tgsi)
 
 static boolean
 src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc,
-	       struct nv40_sreg *src)
+	       struct nvfx_sreg *src)
 {
-	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
-	struct nv40_sreg tgsi = tgsi_src(fpc, fsrc);
+	const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+	struct nvfx_sreg tgsi = tgsi_src(fpc, fsrc);
 	uint mask = 0;
 	uint c;
 
@@ -352,8 +352,8 @@ static boolean
 nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 				const struct tgsi_full_instruction *finst)
 {
-	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
-	struct nv40_sreg src[3], dst, tmp;
+	const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+	struct nvfx_sreg src[3], dst, tmp;
 	int mask, sat, unit;
 	int ai = -1, ci = -1, ii = -1;
 	int i;
@@ -445,12 +445,12 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 		arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_CMP:
-		tmp = nv40_sr(NV40SR_NONE, 0);
+		tmp = nvfx_sr(NVFXSR_NONE, 0);
 		tmp.cc_update = 1;
 		arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
-		dst.cc_test = NV40_VP_INST_COND_GE;
+		dst.cc_test = NVFX_VP_INST_COND_GE;
 		arith(fpc, sat, MOV, dst, mask, src[2], none, none);
-		dst.cc_test = NV40_VP_INST_COND_LT;
+		dst.cc_test = NVFX_VP_INST_COND_LT;
 		arith(fpc, sat, MOV, dst, mask, src[1], none, none);
 		break;
 	case TGSI_OPCODE_COS:
@@ -512,10 +512,10 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 		arith(fpc, 0, KIL, none, 0, none, none, none);
 		break;
 	case TGSI_OPCODE_KIL:
-		dst = nv40_sr(NV40SR_NONE, 0);
+		dst = nvfx_sr(NVFXSR_NONE, 0);
 		dst.cc_update = 1;
 		arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
-		dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT;
+		dst.cc_update = 0; dst.cc_test = NVFX_FP_OP_COND_LT;
 		arith(fpc, 0, KIL, dst, 0, none, none, none);
 		break;
 	case TGSI_OPCODE_LG2:
@@ -662,25 +662,25 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
 
 	switch (fdec->Semantic.Name) {
 	case TGSI_SEMANTIC_POSITION:
-		hw = NV40_FP_OP_INPUT_SRC_POSITION;
+		hw = NVFX_FP_OP_INPUT_SRC_POSITION;
 		break;
 	case TGSI_SEMANTIC_COLOR:
 		if (fdec->Semantic.Index == 0) {
-			hw = NV40_FP_OP_INPUT_SRC_COL0;
+			hw = NVFX_FP_OP_INPUT_SRC_COL0;
 		} else
 		if (fdec->Semantic.Index == 1) {
-			hw = NV40_FP_OP_INPUT_SRC_COL1;
+			hw = NVFX_FP_OP_INPUT_SRC_COL1;
 		} else {
 			NOUVEAU_ERR("bad colour semantic index\n");
 			return FALSE;
 		}
 		break;
 	case TGSI_SEMANTIC_FOG:
-		hw = NV40_FP_OP_INPUT_SRC_FOGC;
+		hw = NVFX_FP_OP_INPUT_SRC_FOGC;
 		break;
 	case TGSI_SEMANTIC_GENERIC:
 		if (fdec->Semantic.Index <= 7) {
-			hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic.
+			hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.
 						     Index);
 		} else {
 			NOUVEAU_ERR("bad generic semantic index\n");
@@ -723,7 +723,7 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,
 		return FALSE;
 	}
 
-	fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw);
+	fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);
 	fpc->r_temps |= (1 << hw);
 	return TRUE;
 }
@@ -787,7 +787,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc)
 	tgsi_parse_free(&p);
 
 	if (++high_temp) {
-		fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg));
+		fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
 		for (i = 0; i < high_temp; i++)
 			fpc->r_temp[i] = temp(fpc);
 		fpc->r_temps_discard = 0;
diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h
index 854dccf5486..8d28137e9de 100644
--- a/src/gallium/drivers/nv40/nv40_shader.h
+++ b/src/gallium/drivers/nv40/nv40_shader.h
@@ -48,14 +48,6 @@
 #define NV40_VP_INST_COND_TEST_ENABLE                                  (1 << 13)
 #define NV40_VP_INST_COND_SHIFT                                               10
 #define NV40_VP_INST_COND_MASK                                       (0x7 << 10)
-#    define NV40_VP_INST_COND_FL                                               0
-#    define NV40_VP_INST_COND_LT                                               1
-#    define NV40_VP_INST_COND_EQ                                               2
-#    define NV40_VP_INST_COND_LE                                               3
-#    define NV40_VP_INST_COND_GT                                               4
-#    define NV40_VP_INST_COND_NE                                               5
-#    define NV40_VP_INST_COND_GE                                               6
-#    define NV40_VP_INST_COND_TR                                               7
 #define NV40_VP_INST_COND_SWZ_X_SHIFT                                          8
 #define NV40_VP_INST_COND_SWZ_X_MASK                                    (3 << 8)
 #define NV40_VP_INST_COND_SWZ_Y_SHIFT                                          6
@@ -84,63 +76,12 @@
 /* ---- OPCODE BITS 95:64 / data DWORD 1 --- */
 #define NV40_VP_INST_VEC_OPCODE_SHIFT                                         22
 #define NV40_VP_INST_VEC_OPCODE_MASK                                (0x1F << 22)
-#    define NV40_VP_INST_OP_NOP                                             0x00
-#    define NV40_VP_INST_OP_MOV                                             0x01
-#    define NV40_VP_INST_OP_MUL                                             0x02
-#    define NV40_VP_INST_OP_ADD                                             0x03
-#    define NV40_VP_INST_OP_MAD                                             0x04
-#    define NV40_VP_INST_OP_DP3                                             0x05
-#    define NV40_VP_INST_OP_DPH                                             0x06
-#    define NV40_VP_INST_OP_DP4                                             0x07
-#    define NV40_VP_INST_OP_DST                                             0x08
-#    define NV40_VP_INST_OP_MIN                                             0x09
-#    define NV40_VP_INST_OP_MAX                                             0x0A
-#    define NV40_VP_INST_OP_SLT                                             0x0B
-#    define NV40_VP_INST_OP_SGE                                             0x0C
-#    define NV40_VP_INST_OP_ARL                                             0x0D
-#    define NV40_VP_INST_OP_FRC                                             0x0E
-#    define NV40_VP_INST_OP_FLR                                             0x0F
-#    define NV40_VP_INST_OP_SEQ                                             0x10
-#    define NV40_VP_INST_OP_SFL                                             0x11
-#    define NV40_VP_INST_OP_SGT                                             0x12
-#    define NV40_VP_INST_OP_SLE                                             0x13
-#    define NV40_VP_INST_OP_SNE                                             0x14
-#    define NV40_VP_INST_OP_STR                                             0x15
-#    define NV40_VP_INST_OP_SSG                                             0x16
-#    define NV40_VP_INST_OP_ARR                                             0x17
-#    define NV40_VP_INST_OP_ARA                                             0x18
-#    define NV40_VP_INST_OP_TXL                                             0x19
 #define NV40_VP_INST_SCA_OPCODE_SHIFT                                         27
 #define NV40_VP_INST_SCA_OPCODE_MASK                                (0x1F << 27)
-#    define NV40_VP_INST_OP_NOP                                             0x00
-#    define NV40_VP_INST_OP_MOV                                             0x01
-#    define NV40_VP_INST_OP_RCP                                             0x02
-#    define NV40_VP_INST_OP_RCC                                             0x03
-#    define NV40_VP_INST_OP_RSQ                                             0x04
-#    define NV40_VP_INST_OP_EXP                                             0x05
-#    define NV40_VP_INST_OP_LOG                                             0x06
-#    define NV40_VP_INST_OP_LIT                                             0x07
-#    define NV40_VP_INST_OP_BRA                                             0x09
-#    define NV40_VP_INST_OP_CAL                                             0x0B
-#    define NV40_VP_INST_OP_RET                                             0x0C
-#    define NV40_VP_INST_OP_LG2                                             0x0D
-#    define NV40_VP_INST_OP_EX2                                             0x0E
-#    define NV40_VP_INST_OP_SIN                                             0x0F
-#    define NV40_VP_INST_OP_COS                                             0x10
-#    define NV40_VP_INST_OP_PUSHA                                           0x13
-#    define NV40_VP_INST_OP_POPA                                            0x14
 #define NV40_VP_INST_CONST_SRC_SHIFT                                          12
 #define NV40_VP_INST_CONST_SRC_MASK                                 (0xFF << 12)
 #define NV40_VP_INST_INPUT_SRC_SHIFT                                           8
 #define NV40_VP_INST_INPUT_SRC_MASK                                  (0x0F << 8)
-#    define NV40_VP_INST_IN_POS                                                0
-#    define NV40_VP_INST_IN_WEIGHT                                             1
-#    define NV40_VP_INST_IN_NORMAL                                             2
-#    define NV40_VP_INST_IN_COL0                                               3
-#    define NV40_VP_INST_IN_COL1                                               4
-#    define NV40_VP_INST_IN_FOGC                                               5
-#    define NV40_VP_INST_IN_TC0                                                8
-#    define NV40_VP_INST_IN_TC(n)                                          (8+n)
 #define NV40_VP_INST_SRC0H_SHIFT                                               0
 #define NV40_VP_INST_SRC0H_MASK                                      (0xFF << 0)
 #define NV40_VP_INST1_KNOWN ( \
@@ -194,7 +135,6 @@
 #    define NV40_VP_INST_DEST_TC(n)                                        (7+n)
 #    define NV40_VP_INST_DEST_TEMP                                          0x1F
 #define NV40_VP_INST_INDEX_CONST                                        (1 << 1)
-#define NV40_VP_INST_LAST                                               (1 << 0)
 #define NV40_VP_INST3_KNOWN ( \
                 NV40_VP_INST_SRC2L_MASK |\
                 NV40_VP_INST_SCA_WRITEMASK_MASK |\
@@ -232,325 +172,7 @@
 #    define NV40_VP_SRC_REG_TYPE_INPUT                                         2
 #    define NV40_VP_SRC_REG_TYPE_CONST                                         3
 
+#include "nvfx_shader.h"
 
-/*
- * Each fragment program opcode appears to be comprised of 4 32-bit values.
- *
- *         0 - Opcode, output reg/mask, ATTRIB source
- *         1 - Source 0
- *         2 - Source 1
- *         3 - Source 2
- *
- * There appears to be no special difference between result regs and temp regs.
- *                 result.color == R0.xyzw
- *                 result.depth == R1.z
- * When the fragprog contains instructions to write depth,
- * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1.
- *
- * Constants are inserted directly after the instruction that uses them.
- * 
- * It appears that it's not possible to use two input registers in one
- * instruction as the input sourcing is done in the instruction dword
- * and not the source selection dwords.  As such instructions such as:
- * 
- *                 ADD result.color, fragment.color, fragment.texcoord[0];
- *
- * must be split into two MOV's and then an ADD (nvidia does this) but
- * I'm not sure why it's not just one MOV and then source the second input
- * in the ADD instruction..
- *
- * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
- * negation requires multiplication with a const.
- *
- * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and
- * SWIZZLE_ONE.
- *
- * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as
- * SWIZZLE_ZERO is implemented simply by not writing to the relevant components
- * of the destination.
- *
- * Looping
- *   Loops appear to be fairly expensive on NV40 at least, the proprietary
- *   driver goes to a lot of effort to avoid using the native looping
- *   instructions.  If the total number of *executed* instructions between
- *   REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop.
- *   The maximum loop count is 255.
- *
- * Conditional execution
- *   TODO
- * 
- * Non-native instructions:
- *         LIT
- *         LRP - MAD+MAD
- *         SUB - ADD, negate second source
- *         RSQ - LG2 + EX2
- *         POW - LG2 + MUL + EX2
- *         SCS - COS + SIN
- *         XPD
- *         DP2 - MUL + ADD
- *         NRM
- */
-
-//== Opcode / Destination selection ==
-#define NV40_FP_OP_PROGRAM_END                                          (1 << 0)
-#define NV40_FP_OP_OUT_REG_SHIFT                                               1
-#define NV40_FP_OP_OUT_REG_MASK                                        (63 << 1)
-/* Needs to be set when writing outputs to get expected result.. */
-#define NV40_FP_OP_OUT_REG_HALF                                         (1 << 7)
-#define NV40_FP_OP_COND_WRITE_ENABLE                                    (1 << 8)
-#define NV40_FP_OP_OUTMASK_SHIFT                                               9
-#define NV40_FP_OP_OUTMASK_MASK                                       (0xF << 9)
-#    define NV40_FP_OP_OUT_X                                            (1 << 9)
-#    define NV40_FP_OP_OUT_Y                                            (1 <<10)
-#    define NV40_FP_OP_OUT_Z                                            (1 <<11)
-#    define NV40_FP_OP_OUT_W                                            (1 <<12)
-/* Uncertain about these, especially the input_src values.. it's possible that
- * they can be dynamically changed.
- */
-#define NV40_FP_OP_INPUT_SRC_SHIFT                                            13
-#define NV40_FP_OP_INPUT_SRC_MASK                                     (15 << 13)
-#    define NV40_FP_OP_INPUT_SRC_POSITION                                    0x0
-#    define NV40_FP_OP_INPUT_SRC_COL0                                        0x1
-#    define NV40_FP_OP_INPUT_SRC_COL1                                        0x2
-#    define NV40_FP_OP_INPUT_SRC_FOGC                                        0x3
-#    define NV40_FP_OP_INPUT_SRC_TC0                                         0x4
-#    define NV40_FP_OP_INPUT_SRC_TC(n)                                 (0x4 + n)
-#    define NV40_FP_OP_INPUT_SRC_FACING                                      0xE
-#define NV40_FP_OP_TEX_UNIT_SHIFT                                             17
-#define NV40_FP_OP_TEX_UNIT_MASK                                     (0xF << 17)
-#define NV40_FP_OP_PRECISION_SHIFT                                            22
-#define NV40_FP_OP_PRECISION_MASK                                      (3 << 22)
-#   define NV40_FP_PRECISION_FP32                                              0
-#   define NV40_FP_PRECISION_FP16                                              1
-#   define NV40_FP_PRECISION_FX12                                              2
-#define NV40_FP_OP_OPCODE_SHIFT                                               24
-#define NV40_FP_OP_OPCODE_MASK                                      (0x3F << 24)
-#        define NV40_FP_OP_OPCODE_NOP                                       0x00
-#        define NV40_FP_OP_OPCODE_MOV                                       0x01
-#        define NV40_FP_OP_OPCODE_MUL                                       0x02
-#        define NV40_FP_OP_OPCODE_ADD                                       0x03
-#        define NV40_FP_OP_OPCODE_MAD                                       0x04
-#        define NV40_FP_OP_OPCODE_DP3                                       0x05
-#        define NV40_FP_OP_OPCODE_DP4                                       0x06
-#        define NV40_FP_OP_OPCODE_DST                                       0x07
-#        define NV40_FP_OP_OPCODE_MIN                                       0x08
-#        define NV40_FP_OP_OPCODE_MAX                                       0x09
-#        define NV40_FP_OP_OPCODE_SLT                                       0x0A
-#        define NV40_FP_OP_OPCODE_SGE                                       0x0B
-#        define NV40_FP_OP_OPCODE_SLE                                       0x0C
-#        define NV40_FP_OP_OPCODE_SGT                                       0x0D
-#        define NV40_FP_OP_OPCODE_SNE                                       0x0E
-#        define NV40_FP_OP_OPCODE_SEQ                                       0x0F
-#        define NV40_FP_OP_OPCODE_FRC                                       0x10
-#        define NV40_FP_OP_OPCODE_FLR                                       0x11
-#        define NV40_FP_OP_OPCODE_KIL                                       0x12
-#        define NV40_FP_OP_OPCODE_PK4B                                      0x13
-#        define NV40_FP_OP_OPCODE_UP4B                                      0x14
-/* DDX/DDY can only write to XY */
-#        define NV40_FP_OP_OPCODE_DDX                                       0x15
-#        define NV40_FP_OP_OPCODE_DDY                                       0x16
-#        define NV40_FP_OP_OPCODE_TEX                                       0x17
-#        define NV40_FP_OP_OPCODE_TXP                                       0x18
-#        define NV40_FP_OP_OPCODE_TXD                                       0x19
-#        define NV40_FP_OP_OPCODE_RCP                                       0x1A
-#        define NV40_FP_OP_OPCODE_EX2                                       0x1C
-#        define NV40_FP_OP_OPCODE_LG2                                       0x1D
-#        define NV40_FP_OP_OPCODE_STR                                       0x20
-#        define NV40_FP_OP_OPCODE_SFL                                       0x21
-#        define NV40_FP_OP_OPCODE_COS                                       0x22
-#        define NV40_FP_OP_OPCODE_SIN                                       0x23
-#        define NV40_FP_OP_OPCODE_PK2H                                      0x24
-#        define NV40_FP_OP_OPCODE_UP2H                                      0x25
-#        define NV40_FP_OP_OPCODE_PK4UB                                     0x27
-#        define NV40_FP_OP_OPCODE_UP4UB                                     0x28
-#        define NV40_FP_OP_OPCODE_PK2US                                     0x29
-#        define NV40_FP_OP_OPCODE_UP2US                                     0x2A
-#        define NV40_FP_OP_OPCODE_DP2A                                      0x2E
-#        define NV40_FP_OP_OPCODE_TXL                                       0x2F
-#        define NV40_FP_OP_OPCODE_TXB                                       0x31
-#        define NV40_FP_OP_OPCODE_DIV                                       0x3A
-#        define NV40_FP_OP_OPCODE_UNK_LIT                                   0x3C
-/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/
-#        define NV40_FP_OP_BRA_OPCODE_BRK                                    0x0
-#        define NV40_FP_OP_BRA_OPCODE_CAL                                    0x1
-#        define NV40_FP_OP_BRA_OPCODE_IF                                     0x2
-#        define NV40_FP_OP_BRA_OPCODE_LOOP                                   0x3
-#        define NV40_FP_OP_BRA_OPCODE_REP                                    0x4
-#        define NV40_FP_OP_BRA_OPCODE_RET                                    0x5
-#define NV40_FP_OP_OUT_SAT                                             (1 << 31)
-
-/* high order bits of SRC0 */
-#define NV40_FP_OP_OUT_ABS                                             (1 << 29)
-#define NV40_FP_OP_COND_SWZ_W_SHIFT                                           27
-#define NV40_FP_OP_COND_SWZ_W_MASK                                     (3 << 27)
-#define NV40_FP_OP_COND_SWZ_Z_SHIFT                                           25
-#define NV40_FP_OP_COND_SWZ_Z_MASK                                     (3 << 25)
-#define NV40_FP_OP_COND_SWZ_Y_SHIFT                                           23
-#define NV40_FP_OP_COND_SWZ_Y_MASK                                     (3 << 23)
-#define NV40_FP_OP_COND_SWZ_X_SHIFT                                           21
-#define NV40_FP_OP_COND_SWZ_X_MASK                                     (3 << 21)
-#define NV40_FP_OP_COND_SWZ_ALL_SHIFT                                         21
-#define NV40_FP_OP_COND_SWZ_ALL_MASK                                (0xFF << 21)
-#define NV40_FP_OP_COND_SHIFT                                                 18
-#define NV40_FP_OP_COND_MASK                                        (0x07 << 18)
-#        define NV40_FP_OP_COND_FL                                             0
-#        define NV40_FP_OP_COND_LT                                             1
-#        define NV40_FP_OP_COND_EQ                                             2
-#        define NV40_FP_OP_COND_LE                                             3
-#        define NV40_FP_OP_COND_GT                                             4
-#        define NV40_FP_OP_COND_NE                                             5
-#        define NV40_FP_OP_COND_GE                                             6
-#        define NV40_FP_OP_COND_TR                                             7
-
-/* high order bits of SRC1 */
-#define NV40_FP_OP_OPCODE_IS_BRANCH                                      (1<<31)
-#define NV40_FP_OP_DST_SCALE_SHIFT                                            28
-#define NV40_FP_OP_DST_SCALE_MASK                                      (3 << 28)
-#define NV40_FP_OP_DST_SCALE_1X                                                0
-#define NV40_FP_OP_DST_SCALE_2X                                                1
-#define NV40_FP_OP_DST_SCALE_4X                                                2
-#define NV40_FP_OP_DST_SCALE_8X                                                3
-#define NV40_FP_OP_DST_SCALE_INV_2X                                            5
-#define NV40_FP_OP_DST_SCALE_INV_4X                                            6
-#define NV40_FP_OP_DST_SCALE_INV_8X                                            7
-
-/* SRC1 LOOP */
-#define NV40_FP_OP_LOOP_INCR_SHIFT                                            19
-#define NV40_FP_OP_LOOP_INCR_MASK                                   (0xFF << 19)
-#define NV40_FP_OP_LOOP_INDEX_SHIFT                                           10
-#define NV40_FP_OP_LOOP_INDEX_MASK                                  (0xFF << 10)
-#define NV40_FP_OP_LOOP_COUNT_SHIFT                                            2
-#define NV40_FP_OP_LOOP_COUNT_MASK                                   (0xFF << 2)
-
-/* SRC1 IF */
-#define NV40_FP_OP_ELSE_ID_SHIFT                                               2
-#define NV40_FP_OP_ELSE_ID_MASK                                      (0xFF << 2)
-
-/* SRC1 CAL */
-#define NV40_FP_OP_IADDR_SHIFT                                                 2
-#define NV40_FP_OP_IADDR_MASK                                        (0xFF << 2)
-
-/* SRC1 REP
- *   I have no idea why there are 3 count values here..  but they
- *   have always been filled with the same value in my tests so
- *   far..
- */
-#define NV40_FP_OP_REP_COUNT1_SHIFT                                            2
-#define NV40_FP_OP_REP_COUNT1_MASK                                   (0xFF << 2)
-#define NV40_FP_OP_REP_COUNT2_SHIFT                                           10
-#define NV40_FP_OP_REP_COUNT2_MASK                                  (0xFF << 10)
-#define NV40_FP_OP_REP_COUNT3_SHIFT                                           19
-#define NV40_FP_OP_REP_COUNT3_MASK                                  (0xFF << 19)
-
-/* SRC2 REP/IF */
-#define NV40_FP_OP_END_ID_SHIFT                                                2
-#define NV40_FP_OP_END_ID_MASK                                       (0xFF << 2)
-
-// SRC2 high-order
-#define NV40_FP_OP_INDEX_INPUT                                         (1 << 30)
-#define NV40_FP_OP_ADDR_INDEX_SHIFT                                           19
-#define NV40_FP_OP_ADDR_INDEX_MASK                                   (0xF << 19)
-
-//== Register selection ==
-#define NV40_FP_REG_TYPE_SHIFT                                                 0
-#define NV40_FP_REG_TYPE_MASK                                           (3 << 0)
-#        define NV40_FP_REG_TYPE_TEMP                                          0
-#        define NV40_FP_REG_TYPE_INPUT                                         1
-#        define NV40_FP_REG_TYPE_CONST                                         2
-#define NV40_FP_REG_SRC_SHIFT                                                  2
-#define NV40_FP_REG_SRC_MASK                                           (63 << 2)
-#define NV40_FP_REG_SRC_HALF                                            (1 << 8)
-#define NV40_FP_REG_SWZ_ALL_SHIFT                                              9
-#define NV40_FP_REG_SWZ_ALL_MASK                                      (255 << 9)
-#define NV40_FP_REG_SWZ_X_SHIFT                                                9
-#define NV40_FP_REG_SWZ_X_MASK                                          (3 << 9)
-#define NV40_FP_REG_SWZ_Y_SHIFT                                               11
-#define NV40_FP_REG_SWZ_Y_MASK                                         (3 << 11)
-#define NV40_FP_REG_SWZ_Z_SHIFT                                               13
-#define NV40_FP_REG_SWZ_Z_MASK                                         (3 << 13)
-#define NV40_FP_REG_SWZ_W_SHIFT                                               15
-#define NV40_FP_REG_SWZ_W_MASK                                         (3 << 15)
-#        define NV40_FP_SWIZZLE_X                                              0
-#        define NV40_FP_SWIZZLE_Y                                              1
-#        define NV40_FP_SWIZZLE_Z                                              2
-#        define NV40_FP_SWIZZLE_W                                              3
-#define NV40_FP_REG_NEGATE                                             (1 << 17)
-
-#ifndef NV40_SHADER_NO_FUCKEDNESS
-#define NV40SR_NONE	0
-#define NV40SR_OUTPUT	1
-#define NV40SR_INPUT	2
-#define NV40SR_TEMP	3
-#define NV40SR_CONST	4
-
-struct nv40_sreg {
-	int type;
-	int index;
-
-	int dst_scale;
-
-	int negate;
-	int abs;
-	int swz[4];
-
-	int cc_update;
-	int cc_update_reg;
-	int cc_test;
-	int cc_test_reg;
-	int cc_swz[4];
-};
-
-static INLINE struct nv40_sreg
-nv40_sr(int type, int index)
-{
-	struct nv40_sreg temp = {
-		.type = type,
-		.index = index,
-		.dst_scale = DEF_SCALE,
-		.abs = 0,
-		.negate = 0,
-		.swz = { 0, 1, 2, 3 },
-		.cc_update = 0,
-		.cc_update_reg = 0,
-		.cc_test = DEF_CTEST,
-		.cc_test_reg = 0,
-		.cc_swz = { 0, 1, 2, 3 },
-	};
-	return temp;
-}
-
-static INLINE struct nv40_sreg
-nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w)
-{
-	struct nv40_sreg dst = src;
-
-	dst.swz[SWZ_X] = src.swz[x];
-	dst.swz[SWZ_Y] = src.swz[y];
-	dst.swz[SWZ_Z] = src.swz[z];
-	dst.swz[SWZ_W] = src.swz[w];
-	return dst;
-}
-
-static INLINE struct nv40_sreg
-nv40_sr_neg(struct nv40_sreg src)
-{
-	src.negate = !src.negate;
-	return src;
-}
-
-static INLINE struct nv40_sreg
-nv40_sr_abs(struct nv40_sreg src)
-{
-	src.abs = 1;
-	return src;
-}
-
-static INLINE struct nv40_sreg
-nv40_sr_scale(struct nv40_sreg src, int scale)
-{
-	src.dst_scale = scale;
-	return src;
-}
 #endif
 
-#endif
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index a199f0766e4..752cd0d1b3d 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -34,9 +34,9 @@
 #define DEF_CTEST 0
 #include "nv40_shader.h"
 
-#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
-#define neg(s) nv40_sr_neg((s))
-#define abs(s) nv40_sr_abs((s))
+#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nvfx_sr_neg((s))
+#define abs(s) nvfx_sr_abs((s))
 
 #define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
 
@@ -47,17 +47,17 @@ struct nv40_vpc {
 
 	unsigned r_temps;
 	unsigned r_temps_discard;
-	struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
-	struct nv40_sreg *r_address;
-	struct nv40_sreg *r_temp;
+	struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+	struct nvfx_sreg *r_address;
+	struct nvfx_sreg *r_temp;
 
-	struct nv40_sreg *imm;
+	struct nvfx_sreg *imm;
 	unsigned nr_imm;
 
 	unsigned hpos_idx;
 };
 
-static struct nv40_sreg
+static struct nvfx_sreg
 temp(struct nv40_vpc *vpc)
 {
 	int idx = ffs(~vpc->r_temps) - 1;
@@ -65,12 +65,12 @@ temp(struct nv40_vpc *vpc)
 	if (idx < 0) {
 		NOUVEAU_ERR("out of temps!!\n");
 		assert(0);
-		return nv40_sr(NV40SR_TEMP, 0);
+		return nvfx_sr(NVFXSR_TEMP, 0);
 	}
 
 	vpc->r_temps |= (1 << idx);
 	vpc->r_temps_discard |= (1 << idx);
-	return nv40_sr(NV40SR_TEMP, idx);
+	return nvfx_sr(NVFXSR_TEMP, idx);
 }
 
 static INLINE void
@@ -80,7 +80,7 @@ release_temps(struct nv40_vpc *vpc)
 	vpc->r_temps_discard = 0;
 }
 
-static struct nv40_sreg
+static struct nvfx_sreg
 constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)
 {
 	struct nvfx_vertex_program *vp = vpc->vp;
@@ -90,7 +90,7 @@ constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)
 	if (pipe >= 0) {
 		for (idx = 0; idx < vp->nr_consts; idx++) {
 			if (vp->consts[idx].index == pipe)
-				return nv40_sr(NV40SR_CONST, idx);
+				return nvfx_sr(NVFXSR_CONST, idx);
 		}
 	}
 
@@ -103,37 +103,37 @@ constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)
 	vpd->value[1] = y;
 	vpd->value[2] = z;
 	vpd->value[3] = w;
-	return nv40_sr(NV40SR_CONST, idx);
+	return nvfx_sr(NVFXSR_CONST, idx);
 }
 
 #define arith(cc,s,o,d,m,s0,s1,s2) \
-	nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+	nv40_vp_arith((cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2))
 
 static void
-emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src)
+emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src)
 {
 	struct nvfx_vertex_program *vp = vpc->vp;
 	uint32_t sr = 0;
 
 	switch (src.type) {
-	case NV40SR_TEMP:
+	case NVFXSR_TEMP:
 		sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT);
 		sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT);
 		break;
-	case NV40SR_INPUT:
+	case NVFXSR_INPUT:
 		sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<
 		       NV40_VP_SRC_REG_TYPE_SHIFT);
 		vp->ir |= (1 << src.index);
 		hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT);
 		break;
-	case NV40SR_CONST:
+	case NVFXSR_CONST:
 		sr |= (NV40_VP_SRC_REG_TYPE_CONST <<
 		       NV40_VP_SRC_REG_TYPE_SHIFT);
 		assert(vpc->vpi->const_index == -1 ||
 		       vpc->vpi->const_index == src.index);
 		vpc->vpi->const_index = src.index;
 		break;
-	case NV40SR_NONE:
+	case NVFXSR_NONE:
 		sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<
 		       NV40_VP_SRC_REG_TYPE_SHIFT);
 		break;
@@ -174,12 +174,12 @@ emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src)
 }
 
 static void
-emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
+emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst)
 {
 	struct nvfx_vertex_program *vp = vpc->vp;
 
 	switch (dst.type) {
-	case NV40SR_TEMP:
+	case NVFXSR_TEMP:
 		hw[3] |= NV40_VP_INST_DEST_MASK;
 		if (slot == 0) {
 			hw[0] |= (dst.index <<
@@ -189,7 +189,7 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
 				  NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
 		}
 		break;
-	case NV40SR_OUTPUT:
+	case NVFXSR_OUTPUT:
 		switch (dst.index) {
 		case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
 		case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
@@ -255,9 +255,9 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
 
 static void
 nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,
-	      struct nv40_sreg dst, int mask,
-	      struct nv40_sreg s0, struct nv40_sreg s1,
-	      struct nv40_sreg s2)
+	      struct nvfx_sreg dst, int mask,
+	      struct nvfx_sreg s0, struct nvfx_sreg s1,
+	      struct nvfx_sreg s2)
 {
 	struct nvfx_vertex_program *vp = vpc->vp;
 	uint32_t *hw;
@@ -269,7 +269,7 @@ nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,
 
 	hw = vpc->vpi->data;
 
-	hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT);
+	hw[0] |= (NVFX_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT);
 	hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) |
 		  (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) |
 		  (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) |
@@ -291,13 +291,13 @@ nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,
 	emit_src(vpc, hw, 2, s2);
 }
 
-static INLINE struct nv40_sreg
+static INLINE struct nvfx_sreg
 tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
-	struct nv40_sreg src;
+	struct nvfx_sreg src;
 
 	switch (fsrc->Register.File) {
 	case TGSI_FILE_INPUT:
-		src = nv40_sr(NV40SR_INPUT, fsrc->Register.Index);
+		src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index);
 		break;
 	case TGSI_FILE_CONSTANT:
 		src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
@@ -322,9 +322,9 @@ tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
 	return src;
 }
 
-static INLINE struct nv40_sreg
+static INLINE struct nvfx_sreg
 tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
-	struct nv40_sreg dst;
+	struct nvfx_sreg dst;
 
 	switch (fdst->Register.File) {
 	case TGSI_FILE_OUTPUT:
@@ -358,10 +358,10 @@ tgsi_mask(uint tgsi)
 
 static boolean
 src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
-	       struct nv40_sreg *src)
+	       struct nvfx_sreg *src)
 {
-	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
-	struct nv40_sreg tgsi = tgsi_src(vpc, fsrc);
+	const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+	struct nvfx_sreg tgsi = tgsi_src(vpc, fsrc);
 	uint mask = 0;
 	uint c;
 
@@ -384,7 +384,7 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
 	*src = temp(vpc);
 
 	if (mask)
-		arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none);
+		arith(vpc, VEC, MOV, *src, mask, tgsi, none, none);
 
 	return FALSE;
 }
@@ -393,8 +393,8 @@ static boolean
 nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 				const struct tgsi_full_instruction *finst)
 {
-	struct nv40_sreg src[3], dst, tmp;
-	struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+	struct nvfx_sreg src[3], dst, tmp;
+	struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
 	int mask;
 	int ai = -1, ci = -1, ii = -1;
 	int i;
@@ -434,7 +434,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
-				arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+				arith(vpc, VEC, MOV, src[i], MASK_ALL,
 				      tgsi_src(vpc, fsrc), none, none);
 			}
 			break;
@@ -445,7 +445,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
-				arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+				arith(vpc, VEC, MOV, src[i], MASK_ALL,
 				      tgsi_src(vpc, fsrc), none, none);
 			}
 			break;
@@ -456,7 +456,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
-				arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+				arith(vpc, VEC, MOV, src[i], MASK_ALL,
 				      tgsi_src(vpc, fsrc), none, none);
 			}
 			break;
@@ -474,93 +474,93 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 
 	switch (finst->Instruction.Opcode) {
 	case TGSI_OPCODE_ABS:
-		arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+		arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none);
 		break;
 	case TGSI_OPCODE_ADD:
-		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+		arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]);
 		break;
 	case TGSI_OPCODE_ARL:
-		arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+		arith(vpc, VEC, ARL, dst, mask, src[0], none, none);
 		break;
 	case TGSI_OPCODE_DP3:
-		arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_DP4:
-		arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_DPH:
-		arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_DST:
-		arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, DST, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_EX2:
-		arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, EX2, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_EXP:
-		arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, EXP, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_FLR:
-		arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+		arith(vpc, VEC, FLR, dst, mask, src[0], none, none);
 		break;
 	case TGSI_OPCODE_FRC:
-		arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+		arith(vpc, VEC, FRC, dst, mask, src[0], none, none);
 		break;
 	case TGSI_OPCODE_LG2:
-		arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, LG2, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_LIT:
-		arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, LIT, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_LOG:
-		arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, LOG, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_MAD:
-		arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+		arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]);
 		break;
 	case TGSI_OPCODE_MAX:
-		arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_MIN:
-		arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_MOV:
-		arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+		arith(vpc, VEC, MOV, dst, mask, src[0], none, none);
 		break;
 	case TGSI_OPCODE_MUL:
-		arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_POW:
 		tmp = temp(vpc);
-		arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+		arith(vpc, SCA, LG2, tmp, MASK_X, none, none,
 		      swz(src[0], X, X, X, X));
-		arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+		arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
 		      swz(src[1], X, X, X, X), none);
-		arith(vpc, 1, OP_EX2, dst, mask, none, none,
+		arith(vpc, SCA, EX2, dst, mask, none, none,
 		      swz(tmp, X, X, X, X));
 		break;
 	case TGSI_OPCODE_RCP:
-		arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, RCP, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_RET:
 		break;
 	case TGSI_OPCODE_RSQ:
-		arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0]));
+		arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0]));
 		break;
 	case TGSI_OPCODE_SGE:
-		arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_SLT:
-		arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_SUB:
-		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+		arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1]));
 		break;
 	case TGSI_OPCODE_XPD:
 		tmp = temp(vpc);
-		arith(vpc, 0, OP_MUL, tmp, mask,
+		arith(vpc, VEC, MUL, tmp, mask,
 		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
-		arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+		arith(vpc, VEC, MAD, dst, (mask & ~MASK_W),
 		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
 		      neg(tmp));
 		break;
@@ -630,7 +630,7 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
 		return FALSE;
 	}
 
-	vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw);
+	vpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);
 	return TRUE;
 }
 
@@ -702,18 +702,18 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc)
 	tgsi_parse_free(&p);
 
 	if (nr_imm) {
-		vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg));
+		vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg));
 		assert(vpc->imm);
 	}
 
 	if (++high_temp) {
-		vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg));
+		vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
 		for (i = 0; i < high_temp; i++)
 			vpc->r_temp[i] = temp(vpc);
 	}
 
 	if (++high_addr) {
-		vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg));
+		vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_sreg));
 		for (i = 0; i < high_addr; i++)
 			vpc->r_address[i] = temp(vpc);
 	}
@@ -728,7 +728,7 @@ nv40_vertprog_translate(struct nvfx_context *nvfx,
 {
 	struct tgsi_parse_context parse;
 	struct nv40_vpc *vpc = NULL;
-	struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+	struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
 	int i;
 
 	vpc = CALLOC(1, sizeof(struct nv40_vpc));
@@ -785,24 +785,24 @@ nv40_vertprog_translate(struct nvfx_context *nvfx,
 	}
 
 	/* Write out HPOS if it was redirected to a temp earlier */
-	if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) {
-		struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT,
+	if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) {
+		struct nvfx_sreg hpos = nvfx_sr(NVFXSR_OUTPUT,
 						NV40_VP_INST_DEST_POS);
-		struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+		struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx];
 
-		arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none);
+		arith(vpc, VEC, MOV, hpos, MASK_ALL, htmp, none, none);
 	}
 
 	/* Insert code to handle user clip planes */
 	for (i = 0; i < vp->ucp.nr; i++) {
-		struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT,
+		struct nvfx_sreg cdst = nvfx_sr(NVFXSR_OUTPUT,
 						NV40_VP_INST_DEST_CLIP(i));
-		struct nv40_sreg ceqn = constant(vpc, -1,
+		struct nvfx_sreg ceqn = constant(vpc, -1,
 						 nvfx->clip.ucp[i][0],
 						 nvfx->clip.ucp[i][1],
 						 nvfx->clip.ucp[i][2],
 						 nvfx->clip.ucp[i][3]);
-		struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+		struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx];
 		unsigned mask;
 
 		switch (i) {
@@ -814,10 +814,10 @@ nv40_vertprog_translate(struct nvfx_context *nvfx,
 			goto out_err;
 		}
 
-		arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none);
+		arith(vpc, VEC, DP4, cdst, mask, htmp, ceqn, none);
 	}
 
-	vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST;
+	vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
 	vp->translated = TRUE;
 out_err:
 	tgsi_parse_free(&parse);
-- 
cgit v1.2.3