summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nvfx
diff options
context:
space:
mode:
authorLuca Barbieri <[email protected]>2010-02-21 15:13:12 +0100
committerYounes Manton <[email protected]>2010-03-15 00:03:04 -0400
commitf9d09a2e7859a2cf025d71b7c3cb189edb6688c4 (patch)
treee53f6e5ff88aeb1bb8796822c02f20225b7e01ca /src/gallium/drivers/nvfx
parent7d210fa05f286eb19398ac2f8c8f631f6f83c859 (diff)
nv30, nv40: move last files to nvfx/ and rm -rf nv30 nv40
This is the last nvfx unification patch. nv[34]0_fragtex.c are moved to the common directory nv[34]0_shader.h are renamed to nv[34]0_vertprog.h and moved to the common directory The separate nv30 and nv40 directories are removed from the build system
Diffstat (limited to 'src/gallium/drivers/nvfx')
-rw-r--r--src/gallium/drivers/nvfx/Makefile2
-rw-r--r--src/gallium/drivers/nvfx/nv30_fragtex.c147
-rw-r--r--src/gallium/drivers/nvfx/nv30_vertprog.h169
-rw-r--r--src/gallium/drivers/nvfx/nv40_fragtex.c174
-rw-r--r--src/gallium/drivers/nvfx/nv40_vertprog.h177
-rw-r--r--src/gallium/drivers/nvfx/nvfx_draw.c4
-rw-r--r--src/gallium/drivers/nvfx/nvfx_vertprog.c4
7 files changed, 673 insertions, 4 deletions
diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile
index 51fa34cfad2..e912177b211 100644
--- a/src/gallium/drivers/nvfx/Makefile
+++ b/src/gallium/drivers/nvfx/Makefile
@@ -9,6 +9,8 @@ C_SOURCES = \
nvfx_draw.c \
nvfx_fragprog.c \
nvfx_fragtex.c \
+ nv30_fragtex.c \
+ nv40_fragtex.c \
nvfx_miptree.c \
nvfx_query.c \
nvfx_screen.c \
diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c
new file mode 100644
index 00000000000..2b56f454921
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv30_fragtex.c
@@ -0,0 +1,147 @@
+#include "util/u_format.h"
+
+#include "nvfx_context.h"
+#include "nouveau/nouveau_util.h"
+#include "nvfx_tex.h"
+
+void
+nv30_sampler_state_init(struct pipe_context *pipe,
+ struct nvfx_sampler_state *ps,
+ const struct pipe_sampler_state *cso)
+{
+ if (cso->max_anisotropy >= 8) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 4) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_4X;
+ } else
+ if (cso->max_anisotropy >= 2) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_2X;
+ }
+
+ {
+ float limit;
+
+ limit = CLAMP(cso->lod_bias, -16.0, 15.0);
+ ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+
+ limit = CLAMP(cso->max_lod, 0.0, 15.0);
+ ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/;
+
+ limit = CLAMP(cso->min_lod, 0.0, 15.0);
+ ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/;
+ }
+}
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV34TCL_TX_FORMAT_FORMAT_##tf, \
+ (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \
+ NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \
+ NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \
+ NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w) \
+}
+
+struct nv30_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+ int swizzle;
+};
+
+static struct nv30_texture_format
+nv30_texture_formats[] = {
+ _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W),
+ _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W),
+ _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W),
+ _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W),
+ _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W),
+ _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X),
+ _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X),
+ _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X),
+ _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y),
+ _(Z16_UNORM , R5G6B5 , S1, S1, S1, ONE, X, X, X, X),
+ _(S8Z24_UNORM , A8R8G8B8, S1, S1, S1, ONE, X, X, X, X),
+ _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W),
+ _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W),
+ _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W),
+ _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W),
+ {},
+};
+
+static struct nv30_texture_format *
+nv30_fragtex_format(uint pipe_format)
+{
+ struct nv30_texture_format *tf = nv30_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format));
+ return NULL;
+}
+
+
+struct nouveau_stateobj *
+nv30_fragtex_build(struct nvfx_context *nvfx, int unit)
+{
+ struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
+ struct nvfx_miptree *nv30mt = nvfx->tex_miptree[unit];
+ struct pipe_texture *pt = &nv30mt->base;
+ struct nouveau_bo *bo = nouveau_bo(nv30mt->buffer);
+ struct nv30_texture_format *tf;
+ struct nouveau_stateobj *so;
+ uint32_t txf, txs;
+ unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+ tf = nv30_fragtex_format(pt->format);
+ if (!tf)
+ return NULL;
+
+ txf = tf->format;
+ txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0);
+ txf |= log2i(pt->width0) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
+ txf |= log2i(pt->height0) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
+ txf |= log2i(pt->depth0) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
+ txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV34TCL_TX_FORMAT_CUBIC;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_2D;
+ break;
+ case PIPE_TEXTURE_3D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_3D;
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_1D;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return NULL;
+ }
+
+ txs = tf->swizzle;
+
+ so = so_new(1, 8, 2);
+ so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8);
+ so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR,
+ NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
+ so_data (so, ps->wrap);
+ so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en);
+ so_data (so, txs);
+ so_data (so, ps->filt | 0x2000 /*voodoo*/);
+ so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
+ pt->height0);
+ so_data (so, ps->bcol);
+
+ return so;
+}
diff --git a/src/gallium/drivers/nvfx/nv30_vertprog.h b/src/gallium/drivers/nvfx/nv30_vertprog.h
new file mode 100644
index 00000000000..ec0444c07f8
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv30_vertprog.h
@@ -0,0 +1,169 @@
+#ifndef __NV30_SHADER_H__
+#define __NV30_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * 128bit opcodes, split into 4 32-bit ones for ease of use.
+ *
+ * Non-native instructions
+ * ABS - MOV + NV40_VP_INST0_DEST_ABS
+ * POW - EX2 + MUL + LG2
+ * SUB - ADD, second source negated
+ * SWZ - MOV
+ * XPD -
+ *
+ * Register access
+ * - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
+ * - Only one CONST can be accessed per-instruction (move extras into TEMPs)
+ *
+ * Relative Addressing
+ * According to the value returned for
+ * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB
+ *
+ * there are only two address registers available. The destination in the
+ * ARL instruction is set to TEMP <n> (The temp isn't actually written).
+ *
+ * When using vanilla ARB_v_p, the proprietary driver will squish both the
+ * available ADDRESS regs into the first hardware reg in the X and Y
+ * components.
+ *
+ * To use an address reg as an index into consts, the CONST_SRC is set to
+ * (const_base + offset) and INDEX_CONST is set.
+ *
+ * To access the second address reg use ADDR_REG_SELECT_1. A particular
+ * component of the address regs is selected with ADDR_SWZ.
+ *
+ * Only one address register can be accessed per instruction.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details) Conditional
+ * execution of an instruction is enabled by setting COND_TEST_ENABLE, and
+ * selecting the condition which will allow the test to pass with
+ * COND_{FL,LT,...}. It is possible to swizzle the values in the condition
+ * register, which allows for testing against an individual component.
+ *
+ * Branching:
+ *
+ * The BRA/CAL instructions seem to follow a slightly different opcode
+ * layout. The destination instruction ID (IADDR) overlaps a source field.
+ * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO
+ * command, and is incremented automatically on each UPLOAD_INST FIFO
+ * command.
+ *
+ * Conditional branching is achieved by using the condition tests described
+ * above. There doesn't appear to be dedicated looping instructions, but
+ * this can be done using a temp reg + conditional branching.
+ *
+ * Subroutines may be uploaded before the main program itself, but the first
+ * executed instruction is determined by the PROGRAM_START_ID FIFO command.
+ *
+ */
+
+/* DWORD 0 */
+
+#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
+#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */
+#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */
+#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */
+#define NV30_VP_INST_VEC_RESULT (1 << 20)
+#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16
+#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16)
+#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15)
+#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16)
+#define NV30_VP_INST_COND_TEST_ENABLE (1<<14)
+#define NV30_VP_INST_COND_SHIFT 11
+#define NV30_VP_INST_COND_MASK (0x07 << 11)
+#define NV30_VP_INST_COND_SWZ_X_SHIFT 9
+#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9)
+#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7
+#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7)
+#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5
+#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5)
+#define NV30_VP_INST_COND_SWZ_W_SHIFT 3
+#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3)
+#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3
+#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3)
+#define NV30_VP_INST_ADDR_SWZ_SHIFT 1
+#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1)
+#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0
+#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0)
+
+/* DWORD 1 */
+#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28
+#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28)
+#define NV30_VP_INST_VEC_OPCODE_SHIFT 23
+#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23)
+#define NV30_VP_INST_CONST_SRC_SHIFT 14
+#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14)
+#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/
+#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/
+#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/
+#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/
+
+/* Please note: the IADDR fields overlap other fields because they are used
+ * only for branch instructions. See Branching: label above
+ *
+ * DWORD 2
+ */
+#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/
+#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */
+#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/
+#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/
+#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/
+#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/
+#define NV30_VP_INST_IADDR_SHIFT 2
+#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */
+
+/* DWORD 3 */
+#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/
+#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/
+#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24
+#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24)
+#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20
+#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20)
+#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16
+#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16)
+#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/
+#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/
+#define NV30_VP_INST_DEST_SHIFT 2
+#define NV30_VP_INST_DEST_MASK (0x0F << 2)
+# define NV30_VP_INST_DEST_POS 0
+# define NV30_VP_INST_DEST_BFC0 1
+# define NV30_VP_INST_DEST_BFC1 2
+# define NV30_VP_INST_DEST_COL0 3
+# define NV30_VP_INST_DEST_COL1 4
+# define NV30_VP_INST_DEST_FOGC 5
+# define NV30_VP_INST_DEST_PSZ 6
+# define NV30_VP_INST_DEST_TC(n) (8+n)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV30_VP_SRC0_HIGH_SHIFT 6
+#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0
+#define NV30_VP_SRC0_LOW_MASK 0x0000003F
+#define NV30_VP_SRC2_HIGH_SHIFT 4
+#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0
+#define NV30_VP_SRC2_LOW_MASK 0x0000000F
+
+
+/* Source-register definition - matches NV20 exactly */
+#define NV30_VP_SRC_NEGATE (1<<14)
+#define NV30_VP_SRC_SWZ_X_SHIFT 12
+#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12)
+#define NV30_VP_SRC_SWZ_Y_SHIFT 10
+#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10)
+#define NV30_VP_SRC_SWZ_Z_SHIFT 8
+#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8)
+#define NV30_VP_SRC_SWZ_W_SHIFT 6
+#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6)
+#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6
+#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6)
+#define NV30_VP_SRC_TEMP_SRC_SHIFT 2
+#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0)
+#define NV30_VP_SRC_REG_TYPE_SHIFT 0
+#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0)
+#define NV30_VP_SRC_REG_TYPE_TEMP 1
+#define NV30_VP_SRC_REG_TYPE_INPUT 2
+#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */
+
+#include "nvfx_shader.h"
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c
new file mode 100644
index 00000000000..5889b5e40d5
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv40_fragtex.c
@@ -0,0 +1,174 @@
+#include "util/u_format.h"
+#include "nvfx_context.h"
+#include "nvfx_tex.h"
+
+void
+nv40_sampler_state_init(struct pipe_context *pipe,
+ struct nvfx_sampler_state *ps,
+ const struct pipe_sampler_state *cso)
+{
+ if (cso->max_anisotropy >= 2) {
+ /* no idea, binary driver sets it, works without it.. meh.. */
+ ps->wrap |= (1 << 5);
+
+ if (cso->max_anisotropy >= 16) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X;
+ } else
+ if (cso->max_anisotropy >= 12) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X;
+ } else
+ if (cso->max_anisotropy >= 10) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X;
+ } else
+ if (cso->max_anisotropy >= 8) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 6) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X;
+ } else
+ if (cso->max_anisotropy >= 4) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X;
+ } else {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X;
+ }
+ }
+
+ {
+ float limit;
+
+ limit = CLAMP(cso->lod_bias, -16.0, 15.0);
+ ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+
+ limit = CLAMP(cso->max_lod, 0.0, 15.0);
+ ps->en |= (int)(limit * 256.0) << 7;
+
+ limit = CLAMP(cso->min_lod, 0.0, 15.0);
+ ps->en |= (int)(limit * 256.0) << 19;
+ }
+}
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV40TCL_TEX_FORMAT_FORMAT_##tf, \
+ (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \
+ NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \
+ NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \
+ NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w), \
+ ((NV34TCL_TX_FILTER_SIGNED_RED*sx) | (NV34TCL_TX_FILTER_SIGNED_GREEN*sy) | \
+ (NV34TCL_TX_FILTER_SIGNED_BLUE*sz) | (NV34TCL_TX_FILTER_SIGNED_ALPHA*sw)) \
+}
+
+struct nv40_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+ int swizzle;
+ int sign;
+};
+
+static struct nv40_texture_format
+nv40_texture_formats[] = {
+ _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
+ _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
+ _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0),
+ _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1),
+ _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0),
+ _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0),
+ _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(S8Z24_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ {},
+};
+
+static struct nv40_texture_format *
+nv40_fragtex_format(uint pipe_format)
+{
+ struct nv40_texture_format *tf = nv40_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format));
+ return NULL;
+}
+
+
+struct nouveau_stateobj *
+nv40_fragtex_build(struct nvfx_context *nvfx, int unit)
+{
+ struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
+ struct nvfx_miptree *nv40mt = nvfx->tex_miptree[unit];
+ struct nouveau_bo *bo = nouveau_bo(nv40mt->buffer);
+ struct pipe_texture *pt = &nv40mt->base;
+ struct nv40_texture_format *tf;
+ struct nouveau_stateobj *so;
+ uint32_t txf, txs, txp;
+ unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+ tf = nv40_fragtex_format(pt->format);
+ if (!tf)
+ assert(0);
+
+ txf = ps->fmt;
+ txf |= tf->format | 0x8000;
+ txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT);
+
+ if (1) /* XXX */
+ txf |= NV34TCL_TX_FORMAT_NO_BORDER;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV34TCL_TX_FORMAT_CUBIC;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_2D;
+ break;
+ case PIPE_TEXTURE_3D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_3D;
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_1D;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return NULL;
+ }
+
+ if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ txp = 0;
+ } else {
+ txp = nv40mt->level[0].pitch;
+ txf |= NV40TCL_TEX_FORMAT_LINEAR;
+ }
+
+ txs = tf->swizzle;
+
+ so = so_new(2, 9, 2);
+ so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8);
+ so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR,
+ NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
+ so_data (so, ps->wrap);
+ so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en);
+ so_data (so, txs);
+ so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/);
+ so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
+ pt->height0);
+ so_data (so, ps->bcol);
+ so_method(so, nvfx->screen->eng3d, NV40TCL_TEX_SIZE1(unit), 1);
+ so_data (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
+
+ return so;
+}
diff --git a/src/gallium/drivers/nvfx/nv40_vertprog.h b/src/gallium/drivers/nvfx/nv40_vertprog.h
new file mode 100644
index 00000000000..7337293babc
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv40_vertprog.h
@@ -0,0 +1,177 @@
+#ifndef __NV40_SHADER_H__
+#define __NV40_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * The NV40 instruction set is very similar to NV30. Most fields are in
+ * a slightly different position in the instruction however.
+ *
+ * Merged instructions
+ * In some cases it is possible to put two instructions into one opcode
+ * slot. The rules for when this is OK is not entirely clear to me yet.
+ *
+ * There are separate writemasks and dest temp register fields for each
+ * grouping of instructions. There is however only one field with the
+ * ID of a result register. Writing to temp/result regs is selected by
+ * setting VEC_RESULT/SCA_RESULT.
+ *
+ * Temporary registers
+ * The source/dest temp register fields have been extended by 1 bit, to
+ * give a total of 32 temporary registers.
+ *
+ * Relative Addressing
+ * NV40 can use an address register to index into vertex attribute regs.
+ * This is done by putting the offset value into INPUT_SRC and setting
+ * the INDEX_INPUT flag.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details)
+ * There is a second condition code register on NV40, it's use is enabled
+ * by setting the COND_REG_SELECT_1 flag.
+ *
+ * Texture lookup
+ * TODO
+ */
+
+/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */
+#define NV40_VP_INST_VEC_RESULT (1 << 30)
+/* uncertain.. */
+#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29)
+/* use address reg as index into attribs */
+#define NV40_VP_INST_INDEX_INPUT (1 << 27)
+#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25)
+#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
+#define NV40_VP_INST_SRC2_ABS (1 << 23)
+#define NV40_VP_INST_SRC1_ABS (1 << 22)
+#define NV40_VP_INST_SRC0_ABS (1 << 21)
+#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15
+#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15)
+#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13)
+#define NV40_VP_INST_COND_SHIFT 10
+#define NV40_VP_INST_COND_MASK (0x7 << 10)
+#define NV40_VP_INST_COND_SWZ_X_SHIFT 8
+#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8)
+#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6
+#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6)
+#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4
+#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4)
+#define NV40_VP_INST_COND_SWZ_W_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2)
+#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2)
+#define NV40_VP_INST_ADDR_SWZ_SHIFT 0
+#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0)
+#define NV40_VP_INST0_KNOWN ( \
+ NV40_VP_INST_INDEX_INPUT | \
+ NV40_VP_INST_COND_REG_SELECT_1 | \
+ NV40_VP_INST_ADDR_REG_SELECT_1 | \
+ NV40_VP_INST_SRC2_ABS | \
+ NV40_VP_INST_SRC1_ABS | \
+ NV40_VP_INST_SRC0_ABS | \
+ NV40_VP_INST_VEC_DEST_TEMP_MASK | \
+ NV40_VP_INST_COND_TEST_ENABLE | \
+ NV40_VP_INST_COND_MASK | \
+ NV40_VP_INST_COND_SWZ_ALL_MASK | \
+ NV40_VP_INST_ADDR_SWZ_MASK)
+
+/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */
+#define NV40_VP_INST_VEC_OPCODE_SHIFT 22
+#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22)
+#define NV40_VP_INST_SCA_OPCODE_SHIFT 27
+#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27)
+#define NV40_VP_INST_CONST_SRC_SHIFT 12
+#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12)
+#define NV40_VP_INST_INPUT_SRC_SHIFT 8
+#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8)
+#define NV40_VP_INST_SRC0H_SHIFT 0
+#define NV40_VP_INST_SRC0H_MASK (0xFF << 0)
+#define NV40_VP_INST1_KNOWN ( \
+ NV40_VP_INST_VEC_OPCODE_MASK | \
+ NV40_VP_INST_SCA_OPCODE_MASK | \
+ NV40_VP_INST_CONST_SRC_MASK | \
+ NV40_VP_INST_INPUT_SRC_MASK | \
+ NV40_VP_INST_SRC0H_MASK \
+ )
+
+/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */
+#define NV40_VP_INST_SRC0L_SHIFT 23
+#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23)
+#define NV40_VP_INST_SRC1_SHIFT 6
+#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6)
+#define NV40_VP_INST_SRC2H_SHIFT 0
+#define NV40_VP_INST_SRC2H_MASK (0x3F << 0)
+#define NV40_VP_INST_IADDRH_SHIFT 0
+#define NV40_VP_INST_IADDRH_MASK (0x1F << 0)
+
+/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */
+#define NV40_VP_INST_IADDRL_SHIFT 29
+#define NV40_VP_INST_IADDRL_MASK (7 << 29)
+#define NV40_VP_INST_SRC2L_SHIFT 21
+#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21)
+#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17
+#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17)
+# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20)
+# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19)
+# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18)
+# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17)
+#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13
+#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13)
+# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16)
+# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15)
+# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14)
+# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13)
+#define NV40_VP_INST_SCA_RESULT (1 << 12)
+#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7
+#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7)
+#define NV40_VP_INST_DEST_SHIFT 2
+#define NV40_VP_INST_DEST_MASK (31 << 2)
+# define NV40_VP_INST_DEST_POS 0
+# define NV40_VP_INST_DEST_COL0 1
+# define NV40_VP_INST_DEST_COL1 2
+# define NV40_VP_INST_DEST_BFC0 3
+# define NV40_VP_INST_DEST_BFC1 4
+# define NV40_VP_INST_DEST_FOGC 5
+# define NV40_VP_INST_DEST_PSZ 6
+# define NV40_VP_INST_DEST_TC0 7
+# define NV40_VP_INST_DEST_TC(n) (7+n)
+# define NV40_VP_INST_DEST_TEMP 0x1F
+#define NV40_VP_INST_INDEX_CONST (1 << 1)
+#define NV40_VP_INST3_KNOWN ( \
+ NV40_VP_INST_SRC2L_MASK |\
+ NV40_VP_INST_SCA_WRITEMASK_MASK |\
+ NV40_VP_INST_VEC_WRITEMASK_MASK |\
+ NV40_VP_INST_SCA_DEST_TEMP_MASK |\
+ NV40_VP_INST_DEST_MASK |\
+ NV40_VP_INST_INDEX_CONST)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV40_VP_SRC0_HIGH_SHIFT 9
+#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00
+#define NV40_VP_SRC0_LOW_MASK 0x000001FF
+#define NV40_VP_SRC2_HIGH_SHIFT 11
+#define NV40_VP_SRC2_HIGH_MASK 0x0001F800
+#define NV40_VP_SRC2_LOW_MASK 0x000007FF
+
+/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */
+#define NV40_VP_SRC_NEGATE (1 << 16)
+#define NV40_VP_SRC_SWZ_X_SHIFT 14
+#define NV40_VP_SRC_SWZ_X_MASK (3 << 14)
+#define NV40_VP_SRC_SWZ_Y_SHIFT 12
+#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12)
+#define NV40_VP_SRC_SWZ_Z_SHIFT 10
+#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10)
+#define NV40_VP_SRC_SWZ_W_SHIFT 8
+#define NV40_VP_SRC_SWZ_W_MASK (3 << 8)
+#define NV40_VP_SRC_SWZ_ALL_SHIFT 8
+#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8)
+#define NV40_VP_SRC_TEMP_SRC_SHIFT 2
+#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2)
+#define NV40_VP_SRC_REG_TYPE_SHIFT 0
+#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0)
+# define NV40_VP_SRC_REG_TYPE_UNK0 0
+# define NV40_VP_SRC_REG_TYPE_TEMP 1
+# define NV40_VP_SRC_REG_TYPE_INPUT 2
+# define NV40_VP_SRC_REG_TYPE_CONST 3
+
+#include "nvfx_shader.h"
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c
index 8700e143297..7308f0667c7 100644
--- a/src/gallium/drivers/nvfx/nvfx_draw.c
+++ b/src/gallium/drivers/nvfx/nvfx_draw.c
@@ -9,8 +9,8 @@
#include "nvfx_context.h"
#define NVFX_SHADER_NO_FUCKEDNESS
-#include "nv30/nv30_shader.h"
-#include "nv40/nv40_shader.h"
+#include "nv30_vertprog.h"
+#include "nv40_vertprog.h"
/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
* often at all. Uses "quadro style" vertex submission + a fixed vertex
diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c
index 730361a9827..3d0e8c23a15 100644
--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
@@ -33,8 +33,8 @@
#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
#define DEF_SCALE 0
#define DEF_CTEST 0
-#include "nv30/nv30_shader.h"
-#include "nv40/nv40_shader.h"
+#include "nv30_vertprog.h"
+#include "nv40_vertprog.h"
#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
#define neg(s) nvfx_sr_neg((s))