summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nvfx
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nvfx')
-rw-r--r--src/gallium/drivers/nvfx/Makefile37
-rw-r--r--src/gallium/drivers/nvfx/SConscript40
-rw-r--r--src/gallium/drivers/nvfx/nv04_surface_2d.c535
-rw-r--r--src/gallium/drivers/nvfx/nv04_surface_2d.h43
-rw-r--r--src/gallium/drivers/nvfx/nv30_fragtex.c149
-rw-r--r--src/gallium/drivers/nvfx/nv30_vertprog.h169
-rw-r--r--src/gallium/drivers/nvfx/nv40_fragtex.c176
-rw-r--r--src/gallium/drivers/nvfx/nv40_vertprog.h177
-rw-r--r--src/gallium/drivers/nvfx/nvfx_buffer.c153
-rw-r--r--src/gallium/drivers/nvfx/nvfx_clear.c14
-rw-r--r--src/gallium/drivers/nvfx/nvfx_context.c84
-rw-r--r--src/gallium/drivers/nvfx/nvfx_context.h251
-rw-r--r--src/gallium/drivers/nvfx/nvfx_draw.c350
-rw-r--r--src/gallium/drivers/nvfx/nvfx_fragprog.c1004
-rw-r--r--src/gallium/drivers/nvfx/nvfx_fragtex.c58
-rw-r--r--src/gallium/drivers/nvfx/nvfx_miptree.c310
-rw-r--r--src/gallium/drivers/nvfx/nvfx_query.c137
-rw-r--r--src/gallium/drivers/nvfx/nvfx_resource.c67
-rw-r--r--src/gallium/drivers/nvfx/nvfx_resource.h91
-rw-r--r--src/gallium/drivers/nvfx/nvfx_screen.c466
-rw-r--r--src/gallium/drivers/nvfx/nvfx_screen.h43
-rw-r--r--src/gallium/drivers/nvfx/nvfx_shader.h429
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state.c658
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state.h78
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_blend.c22
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_emit.c180
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_fb.c250
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_rasterizer.c9
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_scissor.c23
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_stipple.c26
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_viewport.c35
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_zsa.c21
-rw-r--r--src/gallium/drivers/nvfx/nvfx_surface.c61
-rw-r--r--src/gallium/drivers/nvfx/nvfx_tex.h133
-rw-r--r--src/gallium/drivers/nvfx/nvfx_transfer.c205
-rw-r--r--src/gallium/drivers/nvfx/nvfx_transfer.h26
-rw-r--r--src/gallium/drivers/nvfx/nvfx_vbo.c628
-rw-r--r--src/gallium/drivers/nvfx/nvfx_vertprog.c1066
38 files changed, 8204 insertions, 0 deletions
diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile
new file mode 100644
index 00000000000..c1d57ca3969
--- /dev/null
+++ b/src/gallium/drivers/nvfx/Makefile
@@ -0,0 +1,37 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nvfx
+
+C_SOURCES = \
+ nv04_surface_2d.c \
+ nvfx_buffer.c \
+ nvfx_context.c \
+ nvfx_clear.c \
+ nvfx_draw.c \
+ nvfx_fragprog.c \
+ nvfx_fragtex.c \
+ nv30_fragtex.c \
+ nv40_fragtex.c \
+ nvfx_miptree.c \
+ nvfx_query.c \
+ nvfx_resource.c \
+ nvfx_screen.c \
+ nvfx_state.c \
+ nvfx_state_blend.c \
+ nvfx_state_emit.c \
+ nvfx_state_fb.c \
+ nvfx_state_rasterizer.c \
+ nvfx_state_scissor.c \
+ nvfx_state_stipple.c \
+ nvfx_state_viewport.c \
+ nvfx_state_zsa.c \
+ nvfx_surface.c \
+ nvfx_transfer.c \
+ nvfx_vbo.c \
+ nvfx_vertprog.c
+
+LIBRARY_INCLUDES = \
+ -I$(TOP)/src/gallium/drivers/nouveau/include
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/nvfx/SConscript b/src/gallium/drivers/nvfx/SConscript
new file mode 100644
index 00000000000..02d931b10e8
--- /dev/null
+++ b/src/gallium/drivers/nvfx/SConscript
@@ -0,0 +1,40 @@
+Import('*')
+
+env = env.Clone()
+
+env.PrependUnique(delete_existing=1, CPPPATH = [
+ '#/src/gallium/drivers',
+])
+
+nvfx = env.ConvenienceLibrary(
+ target = 'nvfx',
+ source = [
+ 'nv04_surface_2d.c',
+ 'nvfx_buffer.c',
+ 'nvfx_context.c',
+ 'nvfx_clear.c',
+ 'nvfx_draw.c',
+ 'nvfx_fragprog.c',
+ 'nvfx_fragtex.c',
+ 'nv30_fragtex.c',
+ 'nv40_fragtex.c',
+ 'nvfx_miptree.c',
+ 'nvfx_query.c',
+ 'nvfx_resource.c',
+ 'nvfx_screen.c',
+ 'nvfx_state.c',
+ 'nvfx_state_blend.c',
+ 'nvfx_state_emit.c',
+ 'nvfx_state_fb.c',
+ 'nvfx_state_rasterizer.c',
+ 'nvfx_state_scissor.c',
+ 'nvfx_state_stipple.c',
+ 'nvfx_state_viewport.c',
+ 'nvfx_state_zsa.c',
+ 'nvfx_surface.c',
+ 'nvfx_transfer.c',
+ 'nvfx_vbo.c',
+ 'nvfx_vertprog.c',
+ ])
+
+Export('nvfx')
diff --git a/src/gallium/drivers/nvfx/nv04_surface_2d.c b/src/gallium/drivers/nvfx/nv04_surface_2d.c
new file mode 100644
index 00000000000..4ed574227d6
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv04_surface_2d.c
@@ -0,0 +1,535 @@
+#include "pipe/p_context.h"
+#include "pipe/p_format.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_util.h"
+#include "nouveau/nouveau_screen.h"
+#include "nv04_surface_2d.h"
+
+static INLINE int
+nv04_surface_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
+ case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_L8A8_UNORM:
+ return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5;
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32;
+ default:
+ return -1;
+ }
+}
+
+static INLINE int
+nv04_rect_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_A8_UNORM:
+ return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5;
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
+ default:
+ return -1;
+ }
+}
+
+static INLINE int
+nv04_scaled_image_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8;
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8;
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8;
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_L8A8_UNORM:
+ return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5;
+ default:
+ return -1;
+ }
+}
+
+static INLINE unsigned
+nv04_swizzle_bits_square(unsigned x, unsigned y)
+{
+ unsigned u = (x & 0x001) << 0 |
+ (x & 0x002) << 1 |
+ (x & 0x004) << 2 |
+ (x & 0x008) << 3 |
+ (x & 0x010) << 4 |
+ (x & 0x020) << 5 |
+ (x & 0x040) << 6 |
+ (x & 0x080) << 7 |
+ (x & 0x100) << 8 |
+ (x & 0x200) << 9 |
+ (x & 0x400) << 10 |
+ (x & 0x800) << 11;
+
+ unsigned v = (y & 0x001) << 1 |
+ (y & 0x002) << 2 |
+ (y & 0x004) << 3 |
+ (y & 0x008) << 4 |
+ (y & 0x010) << 5 |
+ (y & 0x020) << 6 |
+ (y & 0x040) << 7 |
+ (y & 0x080) << 8 |
+ (y & 0x100) << 9 |
+ (y & 0x200) << 10 |
+ (y & 0x400) << 11 |
+ (y & 0x800) << 12;
+ return v | u;
+}
+
+/* rectangular swizzled textures are linear concatenations of swizzled square tiles */
+static INLINE unsigned
+nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ unsigned s = MIN2(w, h);
+ unsigned m = s - 1;
+ return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m);
+}
+
+static int
+nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
+ struct pipe_surface *dst, int dx, int dy,
+ struct pipe_surface *src, int sx, int sy,
+ int w, int h)
+{
+ struct nouveau_channel *chan = ctx->swzsurf->channel;
+ struct nouveau_grobj *swzsurf = ctx->swzsurf;
+ struct nouveau_grobj *sifm = ctx->sifm;
+ struct nouveau_bo *src_bo = ctx->buf(src);
+ struct nouveau_bo *dst_bo = ctx->buf(dst);
+ const unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
+ /* Max width & height may not be the same on all HW, but must be POT */
+ const unsigned max_w = 1024;
+ const unsigned max_h = 1024;
+ unsigned sub_w = w > max_w ? max_w : w;
+ unsigned sub_h = h > max_h ? max_h : h;
+ unsigned x;
+ unsigned y;
+
+ /* Swizzled surfaces must be POT */
+ assert(util_is_pot(dst->width) && util_is_pot(dst->height));
+
+ /* If area is too large to copy in one shot we must copy it in POT chunks to meet alignment requirements */
+ assert(sub_w == w || util_is_pot(sub_w));
+ assert(sub_h == h || util_is_pot(sub_h));
+
+ MARK_RING (chan, 8 + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*17, 2 +
+ ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*2);
+
+ BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
+ OUT_RELOCo(chan, dst_bo,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1);
+ OUT_RING (chan, nv04_surface_format(dst->format) |
+ log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT |
+ log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT);
+
+ BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
+ OUT_RELOCo(chan, src_bo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1);
+ OUT_RING (chan, swzsurf->handle);
+
+ for (y = 0; y < h; y += sub_h) {
+ sub_h = MIN2(sub_h, h - y);
+
+ for (x = 0; x < w; x += sub_w) {
+ sub_w = MIN2(sub_w, w - x);
+
+ assert(!(dst->offset & 63));
+
+ BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
+ OUT_RELOCl(chan, dst_bo, dst->offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, sifm, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
+ OUT_RING (chan, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
+ OUT_RING (chan, nv04_scaled_image_format(src->format));
+ OUT_RING (chan, NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
+ OUT_RING (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT));
+ OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w);
+ OUT_RING (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT));
+ OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w);
+ OUT_RING (chan, 1 << 20);
+ OUT_RING (chan, 1 << 20);
+
+ BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_SIZE, 4);
+ OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | sub_w);
+ OUT_RING (chan, src_pitch |
+ NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
+ NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
+ OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * util_format_get_blocksize(src->texture->format),
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, 0);
+ }
+ }
+
+ return 0;
+}
+
+static int
+nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
+ struct pipe_surface *dst, int dx, int dy,
+ struct pipe_surface *src, int sx, int sy, int w, int h)
+{
+ struct nouveau_channel *chan = ctx->m2mf->channel;
+ struct nouveau_grobj *m2mf = ctx->m2mf;
+ struct nouveau_bo *src_bo = ctx->buf(src);
+ struct nouveau_bo *dst_bo = ctx->buf(dst);
+ unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
+ unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
+ unsigned dst_offset = dst->offset + dy * dst_pitch +
+ dx * util_format_get_blocksize(dst->texture->format);
+ unsigned src_offset = src->offset + sy * src_pitch +
+ sx * util_format_get_blocksize(src->texture->format);
+
+ MARK_RING (chan, 3 + ((h / 2047) + 1) * 9, 2 + ((h / 2047) + 1) * 2);
+ BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
+ OUT_RELOCo(chan, src_bo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCo(chan, dst_bo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ while (h) {
+ int count = (h > 2047) ? 2047 : h;
+
+ BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+ OUT_RELOCl(chan, src_bo, src_offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, dst_bo, dst_offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RING (chan, src_pitch);
+ OUT_RING (chan, dst_pitch);
+ OUT_RING (chan, w * util_format_get_blocksize(src->texture->format));
+ OUT_RING (chan, count);
+ OUT_RING (chan, 0x0101);
+ OUT_RING (chan, 0);
+
+ h -= count;
+ src_offset += src_pitch * count;
+ dst_offset += dst_pitch * count;
+ }
+
+ return 0;
+}
+
+static int
+nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
+ int dx, int dy, struct pipe_surface *src, int sx, int sy,
+ int w, int h)
+{
+ struct nouveau_channel *chan = ctx->surf2d->channel;
+ struct nouveau_grobj *surf2d = ctx->surf2d;
+ struct nouveau_grobj *blit = ctx->blit;
+ struct nouveau_bo *src_bo = ctx->buf(src);
+ struct nouveau_bo *dst_bo = ctx->buf(dst);
+ unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
+ unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
+ int format;
+
+ format = nv04_surface_format(dst->format);
+ if (format < 0)
+ return 1;
+
+ MARK_RING (chan, 12, 4);
+ BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
+ OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
+ OUT_RING (chan, format);
+ OUT_RING (chan, (dst_pitch << 16) | src_pitch);
+ OUT_RELOCl(chan, src_bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, blit, 0x0300, 3);
+ OUT_RING (chan, (sy << 16) | sx);
+ OUT_RING (chan, (dy << 16) | dx);
+ OUT_RING (chan, ( h << 16) | w);
+
+ return 0;
+}
+
+static void
+nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
+ int dx, int dy, struct pipe_surface *src, int sx, int sy,
+ int w, int h)
+{
+ int src_linear = src->texture->flags & NVFX_RESOURCE_FLAG_LINEAR;
+ int dst_linear = dst->texture->flags & NVFX_RESOURCE_FLAG_LINEAR;
+
+ assert(src->format == dst->format);
+
+ /* Setup transfer to swizzle the texture to vram if needed */
+ if (src_linear && !dst_linear && w > 1 && h > 1) {
+ nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h);
+ return;
+ }
+
+ /* Use M2MF instead of the blitter since it always works
+ * Any possible performance drop is likely to be not very significant
+ * and dwarfed anyway by the current buffer management problems
+ */
+ nv04_surface_copy_m2mf(ctx, dst, dx, dy, src, sx, sy, w, h);
+}
+
+static void
+nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
+ int dx, int dy, int w, int h, unsigned value)
+{
+ struct nouveau_channel *chan = ctx->surf2d->channel;
+ struct nouveau_grobj *surf2d = ctx->surf2d;
+ struct nouveau_grobj *rect = ctx->rect;
+ struct nouveau_bo *dst_bo = ctx->buf(dst);
+ unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
+ int cs2d_format, gdirect_format;
+
+ cs2d_format = nv04_surface_format(dst->format);
+ assert(cs2d_format >= 0);
+
+ gdirect_format = nv04_rect_format(dst->format);
+ assert(gdirect_format >= 0);
+
+ MARK_RING (chan, 16, 4);
+ BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
+ OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
+ OUT_RING (chan, cs2d_format);
+ OUT_RING (chan, (dst_pitch << 16) | dst_pitch);
+ OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1);
+ OUT_RING (chan, gdirect_format);
+ BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1);
+ OUT_RING (chan, value);
+ BEGIN_RING(chan, rect,
+ NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2);
+ OUT_RING (chan, (dx << 16) | dy);
+ OUT_RING (chan, ( w << 16) | h);
+}
+
+void
+nv04_surface_2d_takedown(struct nv04_surface_2d **pctx)
+{
+ struct nv04_surface_2d *ctx;
+
+ if (!pctx || !*pctx)
+ return;
+ ctx = *pctx;
+ *pctx = NULL;
+
+ nouveau_notifier_free(&ctx->ntfy);
+ nouveau_grobj_free(&ctx->m2mf);
+ nouveau_grobj_free(&ctx->surf2d);
+ nouveau_grobj_free(&ctx->swzsurf);
+ nouveau_grobj_free(&ctx->rect);
+ nouveau_grobj_free(&ctx->blit);
+ nouveau_grobj_free(&ctx->sifm);
+
+ FREE(ctx);
+}
+
+struct nv04_surface_2d *
+nv04_surface_2d_init(struct nouveau_screen *screen)
+{
+ struct nv04_surface_2d *ctx = CALLOC_STRUCT(nv04_surface_2d);
+ struct nouveau_channel *chan = screen->channel;
+ unsigned handle = 0x88000000, class;
+ int ret;
+
+ if (!ctx)
+ return NULL;
+
+ ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ BEGIN_RING(chan, ctx->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
+ OUT_RING (chan, ctx->ntfy->handle);
+
+ if (chan->device->chipset < 0x10)
+ class = NV04_CONTEXT_SURFACES_2D;
+ else
+ class = NV10_CONTEXT_SURFACES_2D;
+
+ ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ BEGIN_RING(chan, ctx->surf2d,
+ NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->vram->handle);
+
+ if (chan->device->chipset < 0x10)
+ class = NV04_IMAGE_BLIT;
+ else
+ class = NV12_IMAGE_BLIT;
+
+ ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_DMA_NOTIFY, 1);
+ OUT_RING (chan, ctx->ntfy->handle);
+ BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACE, 1);
+ OUT_RING (chan, ctx->surf2d->handle);
+ BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_OPERATION, 1);
+ OUT_RING (chan, NV01_IMAGE_BLIT_OPERATION_SRCCOPY);
+
+ ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT,
+ &ctx->rect);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1);
+ OUT_RING (chan, ctx->ntfy->handle);
+ BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1);
+ OUT_RING (chan, ctx->surf2d->handle);
+ BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1);
+ OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY);
+ BEGIN_RING(chan, ctx->rect,
+ NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1);
+ OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE);
+
+ switch (chan->device->chipset & 0xf0) {
+ case 0x00:
+ case 0x10:
+ class = NV04_SWIZZLED_SURFACE;
+ break;
+ case 0x20:
+ class = NV20_SWIZZLED_SURFACE;
+ break;
+ case 0x30:
+ class = NV30_SWIZZLED_SURFACE;
+ break;
+ case 0x40:
+ case 0x60:
+ class = NV40_SWIZZLED_SURFACE;
+ break;
+ default:
+ /* Famous last words: this really can't happen.. */
+ assert(0);
+ break;
+ }
+
+ ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ switch (chan->device->chipset & 0xf0) {
+ case 0x10:
+ case 0x20:
+ class = NV10_SCALED_IMAGE_FROM_MEMORY;
+ break;
+ case 0x30:
+ class = NV30_SCALED_IMAGE_FROM_MEMORY;
+ break;
+ case 0x40:
+ case 0x60:
+ class = NV40_SCALED_IMAGE_FROM_MEMORY;
+ break;
+ default:
+ class = NV04_SCALED_IMAGE_FROM_MEMORY;
+ break;
+ }
+
+ ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm);
+ if (ret) {
+ nv04_surface_2d_takedown(&ctx);
+ return NULL;
+ }
+
+ ctx->copy = nv04_surface_copy;
+ ctx->fill = nv04_surface_fill;
+ return ctx;
+}
+
+struct nv04_surface*
+nv04_surface_wrap_for_render(struct pipe_screen *pscreen,
+ struct nv04_surface_2d* eng2d, struct nv04_surface* ns)
+{
+ struct pipe_resource templ;
+ struct pipe_resource* temp_tex;
+ struct nv04_surface* temp_ns;
+ int temp_flags;
+
+ temp_flags = (ns->base.usage |
+ PIPE_BIND_BLIT_SOURCE |
+ PIPE_BIND_BLIT_DESTINATION);
+
+ ns->base.usage = (PIPE_BIND_BLIT_SOURCE |
+ PIPE_BIND_BLIT_DESTINATION);
+
+ memset(&templ, 0, sizeof(templ));
+ templ.format = ns->base.texture->format;
+ templ.target = PIPE_TEXTURE_2D;
+ templ.width0 = ns->base.width;
+ templ.height0 = ns->base.height;
+ templ.depth0 = 1;
+ templ.last_level = 0;
+
+ // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented
+ templ.nr_samples = ns->base.texture->nr_samples;
+
+ templ.bind = ns->base.texture->bind | PIPE_BIND_RENDER_TARGET;
+
+ temp_tex = pscreen->resource_create(pscreen, &templ);
+ temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags);
+ temp_ns->backing = ns;
+
+ if(ns->base.usage & PIPE_BIND_BLIT_SOURCE)
+ eng2d->copy(eng2d, &temp_ns->backing->base,
+ 0, 0, &ns->base,
+ 0, 0, ns->base.width, ns->base.height);
+
+ return temp_ns;
+}
diff --git a/src/gallium/drivers/nvfx/nv04_surface_2d.h b/src/gallium/drivers/nvfx/nv04_surface_2d.h
new file mode 100644
index 00000000000..2123c3ed08b
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv04_surface_2d.h
@@ -0,0 +1,43 @@
+#ifndef __NV04_SURFACE_2D_H__
+#define __NV04_SURFACE_2D_H__
+
+#include "pipe/p_state.h"
+
+struct nouveau_screen;
+
+struct nv04_surface {
+ struct pipe_surface base;
+ unsigned pitch;
+ struct nv04_surface* backing;
+};
+
+struct nv04_surface_2d {
+ struct nouveau_notifier *ntfy;
+ struct nouveau_grobj *surf2d;
+ struct nouveau_grobj *swzsurf;
+ struct nouveau_grobj *m2mf;
+ struct nouveau_grobj *rect;
+ struct nouveau_grobj *blit;
+ struct nouveau_grobj *sifm;
+
+ struct nouveau_bo *(*buf)(struct pipe_surface *);
+
+ void (*copy)(struct nv04_surface_2d *, struct pipe_surface *dst,
+ int dx, int dy, struct pipe_surface *src, int sx, int sy,
+ int w, int h);
+ void (*fill)(struct nv04_surface_2d *, struct pipe_surface *dst,
+ int dx, int dy, int w, int h, unsigned value);
+};
+
+struct nv04_surface_2d *
+nv04_surface_2d_init(struct nouveau_screen *screen);
+
+void
+nv04_surface_2d_takedown(struct nv04_surface_2d **);
+
+struct nv04_surface*
+nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns);
+
+#define NVFX_RESOURCE_FLAG_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c
new file mode 100644
index 00000000000..dec073ac900
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv30_fragtex.c
@@ -0,0 +1,149 @@
+#include "util/u_format.h"
+
+#include "nvfx_context.h"
+#include "nouveau/nouveau_util.h"
+#include "nvfx_tex.h"
+#include "nvfx_resource.h"
+
+void
+nv30_sampler_state_init(struct pipe_context *pipe,
+ struct nvfx_sampler_state *ps,
+ const struct pipe_sampler_state *cso)
+{
+ if (cso->max_anisotropy >= 8) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 4) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_4X;
+ } else
+ if (cso->max_anisotropy >= 2) {
+ ps->en |= NV34TCL_TX_ENABLE_ANISO_2X;
+ }
+
+ {
+ float limit;
+
+ limit = CLAMP(cso->lod_bias, -16.0, 15.0);
+ ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+
+ limit = CLAMP(cso->max_lod, 0.0, 15.0);
+ ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/;
+
+ limit = CLAMP(cso->min_lod, 0.0, 15.0);
+ ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/;
+ }
+}
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV34TCL_TX_FORMAT_FORMAT_##tf, \
+ (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \
+ NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \
+ NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \
+ NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w) \
+}
+
+struct nv30_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+ int swizzle;
+};
+
+static struct nv30_texture_format
+nv30_texture_formats[] = {
+ _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W),
+ _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W),
+ _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W),
+ _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W),
+ _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W),
+ _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X),
+ _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X),
+ _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X),
+ _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y),
+ _(Z16_UNORM , R5G6B5 , S1, S1, S1, ONE, X, X, X, X),
+ _(S8_USCALED_Z24_UNORM , A8R8G8B8, S1, S1, S1, ONE, X, X, X, X),
+ _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W),
+ _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W),
+ _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W),
+ _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W),
+ {},
+};
+
+static struct nv30_texture_format *
+nv30_fragtex_format(uint pipe_format)
+{
+ struct nv30_texture_format *tf = nv30_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format));
+ return NULL;
+}
+
+
+void
+nv30_fragtex_set(struct nvfx_context *nvfx, int unit)
+{
+ struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
+ struct nvfx_miptree *nv30mt = (struct nvfx_miptree *)nvfx->fragment_sampler_views[unit]->texture;
+ struct pipe_resource *pt = &nv30mt->base.base;
+ struct nouveau_bo *bo = nv30mt->base.bo;
+ struct nv30_texture_format *tf;
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ uint32_t txf, txs;
+ unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+ tf = nv30_fragtex_format(pt->format);
+ if (!tf)
+ return;
+
+ txf = tf->format;
+ txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0);
+ txf |= log2i(pt->width0) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
+ txf |= log2i(pt->height0) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
+ txf |= log2i(pt->depth0) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
+ txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV34TCL_TX_FORMAT_CUBIC;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_2D;
+ break;
+ case PIPE_TEXTURE_3D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_3D;
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_1D;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return;
+ }
+
+ txs = tf->swizzle;
+
+ MARK_RING(chan, 9, 2);
+ OUT_RING(chan, RING_3D(NV34TCL_TX_OFFSET(unit), 8));
+ OUT_RELOC(chan, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+ OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR,
+ NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
+ OUT_RING(chan, ps->wrap);
+ OUT_RING(chan, NV34TCL_TX_ENABLE_ENABLE | ps->en);
+ OUT_RING(chan, txs);
+ OUT_RING(chan, ps->filt | 0x2000 /*voodoo*/);
+ OUT_RING(chan, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
+ pt->height0);
+ OUT_RING(chan, ps->bcol);
+
+ nvfx->hw_txf[unit] = txf;
+ nvfx->hw_samplers |= (1 << unit);
+}
diff --git a/src/gallium/drivers/nvfx/nv30_vertprog.h b/src/gallium/drivers/nvfx/nv30_vertprog.h
new file mode 100644
index 00000000000..ec0444c07f8
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv30_vertprog.h
@@ -0,0 +1,169 @@
+#ifndef __NV30_SHADER_H__
+#define __NV30_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * 128bit opcodes, split into 4 32-bit ones for ease of use.
+ *
+ * Non-native instructions
+ * ABS - MOV + NV40_VP_INST0_DEST_ABS
+ * POW - EX2 + MUL + LG2
+ * SUB - ADD, second source negated
+ * SWZ - MOV
+ * XPD -
+ *
+ * Register access
+ * - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
+ * - Only one CONST can be accessed per-instruction (move extras into TEMPs)
+ *
+ * Relative Addressing
+ * According to the value returned for
+ * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB
+ *
+ * there are only two address registers available. The destination in the
+ * ARL instruction is set to TEMP <n> (The temp isn't actually written).
+ *
+ * When using vanilla ARB_v_p, the proprietary driver will squish both the
+ * available ADDRESS regs into the first hardware reg in the X and Y
+ * components.
+ *
+ * To use an address reg as an index into consts, the CONST_SRC is set to
+ * (const_base + offset) and INDEX_CONST is set.
+ *
+ * To access the second address reg use ADDR_REG_SELECT_1. A particular
+ * component of the address regs is selected with ADDR_SWZ.
+ *
+ * Only one address register can be accessed per instruction.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details) Conditional
+ * execution of an instruction is enabled by setting COND_TEST_ENABLE, and
+ * selecting the condition which will allow the test to pass with
+ * COND_{FL,LT,...}. It is possible to swizzle the values in the condition
+ * register, which allows for testing against an individual component.
+ *
+ * Branching:
+ *
+ * The BRA/CAL instructions seem to follow a slightly different opcode
+ * layout. The destination instruction ID (IADDR) overlaps a source field.
+ * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO
+ * command, and is incremented automatically on each UPLOAD_INST FIFO
+ * command.
+ *
+ * Conditional branching is achieved by using the condition tests described
+ * above. There doesn't appear to be dedicated looping instructions, but
+ * this can be done using a temp reg + conditional branching.
+ *
+ * Subroutines may be uploaded before the main program itself, but the first
+ * executed instruction is determined by the PROGRAM_START_ID FIFO command.
+ *
+ */
+
+/* DWORD 0 */
+
+#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
+#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */
+#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */
+#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */
+#define NV30_VP_INST_VEC_RESULT (1 << 20)
+#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16
+#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16)
+#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15)
+#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16)
+#define NV30_VP_INST_COND_TEST_ENABLE (1<<14)
+#define NV30_VP_INST_COND_SHIFT 11
+#define NV30_VP_INST_COND_MASK (0x07 << 11)
+#define NV30_VP_INST_COND_SWZ_X_SHIFT 9
+#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9)
+#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7
+#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7)
+#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5
+#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5)
+#define NV30_VP_INST_COND_SWZ_W_SHIFT 3
+#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3)
+#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3
+#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3)
+#define NV30_VP_INST_ADDR_SWZ_SHIFT 1
+#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1)
+#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0
+#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0)
+
+/* DWORD 1 */
+#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28
+#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28)
+#define NV30_VP_INST_VEC_OPCODE_SHIFT 23
+#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23)
+#define NV30_VP_INST_CONST_SRC_SHIFT 14
+#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14)
+#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/
+#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/
+#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/
+#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/
+
+/* Please note: the IADDR fields overlap other fields because they are used
+ * only for branch instructions. See Branching: label above
+ *
+ * DWORD 2
+ */
+#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/
+#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */
+#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/
+#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/
+#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/
+#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/
+#define NV30_VP_INST_IADDR_SHIFT 2
+#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */
+
+/* DWORD 3 */
+#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/
+#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/
+#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24
+#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24)
+#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20
+#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20)
+#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16
+#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16)
+#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/
+#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/
+#define NV30_VP_INST_DEST_SHIFT 2
+#define NV30_VP_INST_DEST_MASK (0x0F << 2)
+# define NV30_VP_INST_DEST_POS 0
+# define NV30_VP_INST_DEST_BFC0 1
+# define NV30_VP_INST_DEST_BFC1 2
+# define NV30_VP_INST_DEST_COL0 3
+# define NV30_VP_INST_DEST_COL1 4
+# define NV30_VP_INST_DEST_FOGC 5
+# define NV30_VP_INST_DEST_PSZ 6
+# define NV30_VP_INST_DEST_TC(n) (8+n)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV30_VP_SRC0_HIGH_SHIFT 6
+#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0
+#define NV30_VP_SRC0_LOW_MASK 0x0000003F
+#define NV30_VP_SRC2_HIGH_SHIFT 4
+#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0
+#define NV30_VP_SRC2_LOW_MASK 0x0000000F
+
+
+/* Source-register definition - matches NV20 exactly */
+#define NV30_VP_SRC_NEGATE (1<<14)
+#define NV30_VP_SRC_SWZ_X_SHIFT 12
+#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12)
+#define NV30_VP_SRC_SWZ_Y_SHIFT 10
+#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10)
+#define NV30_VP_SRC_SWZ_Z_SHIFT 8
+#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8)
+#define NV30_VP_SRC_SWZ_W_SHIFT 6
+#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6)
+#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6
+#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6)
+#define NV30_VP_SRC_TEMP_SRC_SHIFT 2
+#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0)
+#define NV30_VP_SRC_REG_TYPE_SHIFT 0
+#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0)
+#define NV30_VP_SRC_REG_TYPE_TEMP 1
+#define NV30_VP_SRC_REG_TYPE_INPUT 2
+#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */
+
+#include "nvfx_shader.h"
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c
new file mode 100644
index 00000000000..0068b1ba54a
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv40_fragtex.c
@@ -0,0 +1,176 @@
+#include "util/u_format.h"
+#include "nvfx_context.h"
+#include "nvfx_tex.h"
+#include "nvfx_resource.h"
+
+void
+nv40_sampler_state_init(struct pipe_context *pipe,
+ struct nvfx_sampler_state *ps,
+ const struct pipe_sampler_state *cso)
+{
+ if (cso->max_anisotropy >= 2) {
+ /* no idea, binary driver sets it, works without it.. meh.. */
+ ps->wrap |= (1 << 5);
+
+ if (cso->max_anisotropy >= 16) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X;
+ } else
+ if (cso->max_anisotropy >= 12) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X;
+ } else
+ if (cso->max_anisotropy >= 10) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X;
+ } else
+ if (cso->max_anisotropy >= 8) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X;
+ } else
+ if (cso->max_anisotropy >= 6) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X;
+ } else
+ if (cso->max_anisotropy >= 4) {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X;
+ } else {
+ ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X;
+ }
+ }
+
+ {
+ float limit;
+
+ limit = CLAMP(cso->lod_bias, -16.0, 15.0);
+ ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+
+ limit = CLAMP(cso->max_lod, 0.0, 15.0);
+ ps->en |= (int)(limit * 256.0) << 7;
+
+ limit = CLAMP(cso->min_lod, 0.0, 15.0);
+ ps->en |= (int)(limit * 256.0) << 19;
+ }
+}
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV40TCL_TEX_FORMAT_FORMAT_##tf, \
+ (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \
+ NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \
+ NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \
+ NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w), \
+ ((NV34TCL_TX_FILTER_SIGNED_RED*sx) | (NV34TCL_TX_FILTER_SIGNED_GREEN*sy) | \
+ (NV34TCL_TX_FILTER_SIGNED_BLUE*sz) | (NV34TCL_TX_FILTER_SIGNED_ALPHA*sw)) \
+}
+
+struct nv40_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+ int swizzle;
+ int sign;
+};
+
+static struct nv40_texture_format
+nv40_texture_formats[] = {
+ _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
+ _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
+ _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0),
+ _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1),
+ _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0),
+ _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0),
+ _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(S8_USCALED_Z24_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0),
+ _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
+ {},
+};
+
+static struct nv40_texture_format *
+nv40_fragtex_format(uint pipe_format)
+{
+ struct nv40_texture_format *tf = nv40_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format));
+ return NULL;
+}
+
+
+void
+nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
+ struct nvfx_miptree *nv40mt = (struct nvfx_miptree *)nvfx->fragment_sampler_views[unit]->texture;
+ struct nouveau_bo *bo = nv40mt->base.bo;
+ struct pipe_resource *pt = &nv40mt->base.base;
+ struct nv40_texture_format *tf;
+
+ uint32_t txf, txs, txp;
+ unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+ tf = nv40_fragtex_format(pt->format);
+ if (!tf)
+ assert(0);
+
+ txf = ps->fmt;
+ txf |= tf->format | 0x8000;
+ txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT);
+
+ if (1) /* XXX */
+ txf |= NV34TCL_TX_FORMAT_NO_BORDER;
+
+ switch (pt->target) {
+ case PIPE_TEXTURE_CUBE:
+ txf |= NV34TCL_TX_FORMAT_CUBIC;
+ /* fall-through */
+ case PIPE_TEXTURE_2D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_2D;
+ break;
+ case PIPE_TEXTURE_3D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_3D;
+ break;
+ case PIPE_TEXTURE_1D:
+ txf |= NV34TCL_TX_FORMAT_DIMS_1D;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown target %d\n", pt->target);
+ return;
+ }
+
+ if (!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR)) {
+ txp = 0;
+ } else {
+ txp = nv40mt->level[0].pitch;
+ txf |= NV40TCL_TEX_FORMAT_LINEAR;
+ }
+
+ txs = tf->swizzle;
+
+ MARK_RING(chan, 11 + 2 * !unit, 2);
+ OUT_RING(chan, RING_3D(NV34TCL_TX_OFFSET(unit), 8));
+ OUT_RELOC(chan, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+ OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR,
+ NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
+ OUT_RING(chan, ps->wrap);
+ OUT_RING(chan, NV40TCL_TEX_ENABLE_ENABLE | ps->en);
+ OUT_RING(chan, txs);
+ OUT_RING(chan, ps->filt | tf->sign | 0x2000 /*voodoo*/);
+ OUT_RING(chan, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | pt->height0);
+ OUT_RING(chan, ps->bcol);
+ OUT_RING(chan, RING_3D(NV40TCL_TEX_SIZE1(unit), 1));
+ OUT_RING(chan, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
+
+ nvfx->hw_txf[unit] = txf;
+ nvfx->hw_samplers |= (1 << unit);
+}
diff --git a/src/gallium/drivers/nvfx/nv40_vertprog.h b/src/gallium/drivers/nvfx/nv40_vertprog.h
new file mode 100644
index 00000000000..7337293babc
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv40_vertprog.h
@@ -0,0 +1,177 @@
+#ifndef __NV40_SHADER_H__
+#define __NV40_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * The NV40 instruction set is very similar to NV30. Most fields are in
+ * a slightly different position in the instruction however.
+ *
+ * Merged instructions
+ * In some cases it is possible to put two instructions into one opcode
+ * slot. The rules for when this is OK is not entirely clear to me yet.
+ *
+ * There are separate writemasks and dest temp register fields for each
+ * grouping of instructions. There is however only one field with the
+ * ID of a result register. Writing to temp/result regs is selected by
+ * setting VEC_RESULT/SCA_RESULT.
+ *
+ * Temporary registers
+ * The source/dest temp register fields have been extended by 1 bit, to
+ * give a total of 32 temporary registers.
+ *
+ * Relative Addressing
+ * NV40 can use an address register to index into vertex attribute regs.
+ * This is done by putting the offset value into INPUT_SRC and setting
+ * the INDEX_INPUT flag.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details)
+ * There is a second condition code register on NV40, it's use is enabled
+ * by setting the COND_REG_SELECT_1 flag.
+ *
+ * Texture lookup
+ * TODO
+ */
+
+/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */
+#define NV40_VP_INST_VEC_RESULT (1 << 30)
+/* uncertain.. */
+#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29)
+/* use address reg as index into attribs */
+#define NV40_VP_INST_INDEX_INPUT (1 << 27)
+#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25)
+#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
+#define NV40_VP_INST_SRC2_ABS (1 << 23)
+#define NV40_VP_INST_SRC1_ABS (1 << 22)
+#define NV40_VP_INST_SRC0_ABS (1 << 21)
+#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15
+#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15)
+#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13)
+#define NV40_VP_INST_COND_SHIFT 10
+#define NV40_VP_INST_COND_MASK (0x7 << 10)
+#define NV40_VP_INST_COND_SWZ_X_SHIFT 8
+#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8)
+#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6
+#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6)
+#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4
+#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4)
+#define NV40_VP_INST_COND_SWZ_W_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2)
+#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2)
+#define NV40_VP_INST_ADDR_SWZ_SHIFT 0
+#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0)
+#define NV40_VP_INST0_KNOWN ( \
+ NV40_VP_INST_INDEX_INPUT | \
+ NV40_VP_INST_COND_REG_SELECT_1 | \
+ NV40_VP_INST_ADDR_REG_SELECT_1 | \
+ NV40_VP_INST_SRC2_ABS | \
+ NV40_VP_INST_SRC1_ABS | \
+ NV40_VP_INST_SRC0_ABS | \
+ NV40_VP_INST_VEC_DEST_TEMP_MASK | \
+ NV40_VP_INST_COND_TEST_ENABLE | \
+ NV40_VP_INST_COND_MASK | \
+ NV40_VP_INST_COND_SWZ_ALL_MASK | \
+ NV40_VP_INST_ADDR_SWZ_MASK)
+
+/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */
+#define NV40_VP_INST_VEC_OPCODE_SHIFT 22
+#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22)
+#define NV40_VP_INST_SCA_OPCODE_SHIFT 27
+#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27)
+#define NV40_VP_INST_CONST_SRC_SHIFT 12
+#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12)
+#define NV40_VP_INST_INPUT_SRC_SHIFT 8
+#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8)
+#define NV40_VP_INST_SRC0H_SHIFT 0
+#define NV40_VP_INST_SRC0H_MASK (0xFF << 0)
+#define NV40_VP_INST1_KNOWN ( \
+ NV40_VP_INST_VEC_OPCODE_MASK | \
+ NV40_VP_INST_SCA_OPCODE_MASK | \
+ NV40_VP_INST_CONST_SRC_MASK | \
+ NV40_VP_INST_INPUT_SRC_MASK | \
+ NV40_VP_INST_SRC0H_MASK \
+ )
+
+/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */
+#define NV40_VP_INST_SRC0L_SHIFT 23
+#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23)
+#define NV40_VP_INST_SRC1_SHIFT 6
+#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6)
+#define NV40_VP_INST_SRC2H_SHIFT 0
+#define NV40_VP_INST_SRC2H_MASK (0x3F << 0)
+#define NV40_VP_INST_IADDRH_SHIFT 0
+#define NV40_VP_INST_IADDRH_MASK (0x1F << 0)
+
+/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */
+#define NV40_VP_INST_IADDRL_SHIFT 29
+#define NV40_VP_INST_IADDRL_MASK (7 << 29)
+#define NV40_VP_INST_SRC2L_SHIFT 21
+#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21)
+#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17
+#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17)
+# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20)
+# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19)
+# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18)
+# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17)
+#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13
+#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13)
+# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16)
+# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15)
+# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14)
+# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13)
+#define NV40_VP_INST_SCA_RESULT (1 << 12)
+#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7
+#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7)
+#define NV40_VP_INST_DEST_SHIFT 2
+#define NV40_VP_INST_DEST_MASK (31 << 2)
+# define NV40_VP_INST_DEST_POS 0
+# define NV40_VP_INST_DEST_COL0 1
+# define NV40_VP_INST_DEST_COL1 2
+# define NV40_VP_INST_DEST_BFC0 3
+# define NV40_VP_INST_DEST_BFC1 4
+# define NV40_VP_INST_DEST_FOGC 5
+# define NV40_VP_INST_DEST_PSZ 6
+# define NV40_VP_INST_DEST_TC0 7
+# define NV40_VP_INST_DEST_TC(n) (7+n)
+# define NV40_VP_INST_DEST_TEMP 0x1F
+#define NV40_VP_INST_INDEX_CONST (1 << 1)
+#define NV40_VP_INST3_KNOWN ( \
+ NV40_VP_INST_SRC2L_MASK |\
+ NV40_VP_INST_SCA_WRITEMASK_MASK |\
+ NV40_VP_INST_VEC_WRITEMASK_MASK |\
+ NV40_VP_INST_SCA_DEST_TEMP_MASK |\
+ NV40_VP_INST_DEST_MASK |\
+ NV40_VP_INST_INDEX_CONST)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV40_VP_SRC0_HIGH_SHIFT 9
+#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00
+#define NV40_VP_SRC0_LOW_MASK 0x000001FF
+#define NV40_VP_SRC2_HIGH_SHIFT 11
+#define NV40_VP_SRC2_HIGH_MASK 0x0001F800
+#define NV40_VP_SRC2_LOW_MASK 0x000007FF
+
+/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */
+#define NV40_VP_SRC_NEGATE (1 << 16)
+#define NV40_VP_SRC_SWZ_X_SHIFT 14
+#define NV40_VP_SRC_SWZ_X_MASK (3 << 14)
+#define NV40_VP_SRC_SWZ_Y_SHIFT 12
+#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12)
+#define NV40_VP_SRC_SWZ_Z_SHIFT 10
+#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10)
+#define NV40_VP_SRC_SWZ_W_SHIFT 8
+#define NV40_VP_SRC_SWZ_W_MASK (3 << 8)
+#define NV40_VP_SRC_SWZ_ALL_SHIFT 8
+#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8)
+#define NV40_VP_SRC_TEMP_SRC_SHIFT 2
+#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2)
+#define NV40_VP_SRC_REG_TYPE_SHIFT 0
+#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0)
+# define NV40_VP_SRC_REG_TYPE_UNK0 0
+# define NV40_VP_SRC_REG_TYPE_TEMP 1
+# define NV40_VP_SRC_REG_TYPE_INPUT 2
+# define NV40_VP_SRC_REG_TYPE_CONST 3
+
+#include "nvfx_shader.h"
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_buffer.c b/src/gallium/drivers/nvfx/nvfx_buffer.c
new file mode 100644
index 00000000000..05b824b8f74
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_buffer.c
@@ -0,0 +1,153 @@
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "nouveau/nouveau_screen.h"
+#include "nouveau/nouveau_winsys.h"
+#include "nvfx_resource.h"
+
+
+/* Currently using separate implementations for buffers and textures,
+ * even though gallium has a unified abstraction of these objects.
+ * Eventually these should be combined, and mechanisms like transfers
+ * be adapted to work for both buffer and texture uploads.
+ */
+static void nvfx_buffer_destroy(struct pipe_screen *pscreen,
+ struct pipe_resource *presource)
+{
+ struct nvfx_resource *buffer = nvfx_resource(presource);
+
+ nouveau_screen_bo_release(pscreen, buffer->bo);
+ FREE(buffer);
+}
+
+
+
+
+/* Utility functions for transfer create/destroy are hooked in and
+ * just record the arguments to those functions.
+ */
+static void *
+nvfx_buffer_transfer_map( struct pipe_context *pipe,
+ struct pipe_transfer *transfer )
+{
+ struct nvfx_resource *buffer = nvfx_resource(transfer->resource);
+ uint8_t *map;
+
+ map = nouveau_screen_bo_map_range( pipe->screen,
+ buffer->bo,
+ transfer->box.x,
+ transfer->box.width,
+ nouveau_screen_transfer_flags(transfer->usage) );
+ if (map == NULL)
+ return NULL;
+
+ return map + transfer->box.x;
+}
+
+
+
+static void nvfx_buffer_transfer_flush_region( struct pipe_context *pipe,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
+{
+ struct nvfx_resource *buffer = nvfx_resource(transfer->resource);
+
+ nouveau_screen_bo_map_flush_range(pipe->screen,
+ buffer->bo,
+ transfer->box.x + box->x,
+ box->width);
+}
+
+static void nvfx_buffer_transfer_unmap( struct pipe_context *pipe,
+ struct pipe_transfer *transfer )
+{
+ struct nvfx_resource *buffer = nvfx_resource(transfer->resource);
+
+ nouveau_screen_bo_unmap(pipe->screen, buffer->bo);
+}
+
+
+
+
+struct u_resource_vtbl nvfx_buffer_vtbl =
+{
+ u_default_resource_get_handle, /* get_handle */
+ nvfx_buffer_destroy, /* resource_destroy */
+ NULL, /* is_resource_referenced */
+ u_default_get_transfer, /* get_transfer */
+ u_default_transfer_destroy, /* transfer_destroy */
+ nvfx_buffer_transfer_map, /* transfer_map */
+ nvfx_buffer_transfer_flush_region, /* transfer_flush_region */
+ nvfx_buffer_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
+
+
+struct pipe_resource *
+nvfx_buffer_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *template)
+{
+ struct nvfx_resource *buffer;
+
+ buffer = CALLOC_STRUCT(nvfx_resource);
+ if (!buffer)
+ return NULL;
+
+ buffer->base = *template;
+ buffer->vtbl = &nvfx_buffer_vtbl;
+ pipe_reference_init(&buffer->base.reference, 1);
+ buffer->base.screen = pscreen;
+
+ buffer->bo = nouveau_screen_bo_new(pscreen,
+ 16,
+ buffer->base.usage,
+ buffer->base.bind,
+ buffer->base.width0);
+
+ if (buffer->bo == NULL)
+ goto fail;
+
+ return &buffer->base;
+
+fail:
+ FREE(buffer);
+ return NULL;
+}
+
+
+struct pipe_resource *
+nvfx_user_buffer_create(struct pipe_screen *pscreen,
+ void *ptr,
+ unsigned bytes,
+ unsigned usage)
+{
+ struct nvfx_resource *buffer;
+
+ buffer = CALLOC_STRUCT(nvfx_resource);
+ if (!buffer)
+ return NULL;
+
+ pipe_reference_init(&buffer->base.reference, 1);
+ buffer->vtbl = &nvfx_buffer_vtbl;
+ buffer->base.screen = pscreen;
+ buffer->base.format = PIPE_FORMAT_R8_UNORM;
+ buffer->base.usage = PIPE_USAGE_IMMUTABLE;
+ buffer->base.bind = usage;
+ buffer->base.width0 = bytes;
+ buffer->base.height0 = 1;
+ buffer->base.depth0 = 1;
+
+ buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes);
+ if (!buffer->bo)
+ goto fail;
+
+ return &buffer->base;
+
+fail:
+ FREE(buffer);
+ return NULL;
+}
+
diff --git a/src/gallium/drivers/nvfx/nvfx_clear.c b/src/gallium/drivers/nvfx/nvfx_clear.c
new file mode 100644
index 00000000000..2be70fcee40
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_clear.c
@@ -0,0 +1,14 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_clear.h"
+
+#include "nvfx_context.h"
+
+void
+nvfx_clear(struct pipe_context *pipe, unsigned buffers,
+ const float *rgba, double depth, unsigned stencil)
+{
+ util_clear(pipe, &nvfx_context(pipe)->framebuffer, buffers, rgba, depth,
+ stencil);
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c
new file mode 100644
index 00000000000..6d2dc4d5bf6
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_context.c
@@ -0,0 +1,84 @@
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+
+#include "nvfx_context.h"
+#include "nvfx_screen.h"
+#include "nvfx_resource.h"
+
+static void
+nvfx_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_screen *screen = nvfx->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
+
+ if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
+ BEGIN_RING(chan, eng3d, 0x1fd8, 1);
+ OUT_RING (chan, 2);
+ BEGIN_RING(chan, eng3d, 0x1fd8, 1);
+ OUT_RING (chan, 1);
+ }
+
+ FIRE_RING(chan);
+ if (fence)
+ *fence = NULL;
+}
+
+static void
+nvfx_destroy(struct pipe_context *pipe)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ if (nvfx->draw)
+ draw_destroy(nvfx->draw);
+ FREE(nvfx);
+}
+
+struct pipe_context *
+nvfx_create(struct pipe_screen *pscreen, void *priv)
+{
+ struct nvfx_screen *screen = nvfx_screen(pscreen);
+ struct pipe_winsys *ws = pscreen->winsys;
+ struct nvfx_context *nvfx;
+ struct nouveau_winsys *nvws = screen->nvws;
+
+ nvfx = CALLOC(1, sizeof(struct nvfx_context));
+ if (!nvfx)
+ return NULL;
+ nvfx->screen = screen;
+
+ nvfx->nvws = nvws;
+
+ nvfx->pipe.winsys = ws;
+ nvfx->pipe.screen = pscreen;
+ nvfx->pipe.priv = priv;
+ nvfx->pipe.destroy = nvfx_destroy;
+ nvfx->pipe.draw_arrays = nvfx_draw_arrays;
+ nvfx->pipe.draw_elements = nvfx_draw_elements;
+ nvfx->pipe.clear = nvfx_clear;
+ nvfx->pipe.flush = nvfx_flush;
+
+ screen->base.channel->user_private = nvfx;
+
+ nvfx->is_nv4x = screen->is_nv4x;
+
+ nvfx_init_query_functions(nvfx);
+ nvfx_init_surface_functions(nvfx);
+ nvfx_init_state_functions(nvfx);
+ nvfx_init_resource_functions(&nvfx->pipe);
+
+ /* Create, configure, and install fallback swtnl path */
+ nvfx->draw = draw_create(&nvfx->pipe);
+ draw_wide_point_threshold(nvfx->draw, 9999999.0);
+ draw_wide_line_threshold(nvfx->draw, 9999999.0);
+ draw_enable_line_stipple(nvfx->draw, FALSE);
+ draw_enable_point_sprites(nvfx->draw, FALSE);
+ draw_set_rasterize_stage(nvfx->draw, nvfx_draw_render_stage(nvfx));
+
+ /* set these to that we init them on first validation */
+ nvfx->state.scissor_enabled = ~0;
+ nvfx->state.stipple_enabled = ~0;
+ return &nvfx->pipe;
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h
new file mode 100644
index 00000000000..e48f9f3aa88
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_context.h
@@ -0,0 +1,251 @@
+#ifndef __NVFX_CONTEXT_H__
+#define __NVFX_CONTEXT_H__
+
+#include <stdio.h>
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_inlines.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_gldefs.h"
+
+#include "nvfx_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+#include "nvfx_screen.h"
+
+#define NVFX_NEW_BLEND (1 << 0)
+#define NVFX_NEW_RAST (1 << 1)
+#define NVFX_NEW_ZSA (1 << 2)
+#define NVFX_NEW_SAMPLER (1 << 3)
+#define NVFX_NEW_FB (1 << 4)
+#define NVFX_NEW_STIPPLE (1 << 5)
+#define NVFX_NEW_SCISSOR (1 << 6)
+#define NVFX_NEW_VIEWPORT (1 << 7)
+#define NVFX_NEW_BCOL (1 << 8)
+#define NVFX_NEW_VERTPROG (1 << 9)
+#define NVFX_NEW_FRAGPROG (1 << 10)
+#define NVFX_NEW_ARRAYS (1 << 11)
+#define NVFX_NEW_UCP (1 << 12)
+#define NVFX_NEW_SR (1 << 13)
+#define NVFX_NEW_VERTCONST (1 << 14)
+#define NVFX_NEW_FRAGCONST (1 << 15)
+
+struct nvfx_rasterizer_state {
+ struct pipe_rasterizer_state pipe;
+ unsigned sb_len;
+ uint32_t sb[32];
+};
+
+struct nvfx_zsa_state {
+ struct pipe_depth_stencil_alpha_state pipe;
+ unsigned sb_len;
+ uint32_t sb[26];
+};
+
+struct nvfx_blend_state {
+ struct pipe_blend_state pipe;
+ unsigned sb_len;
+ uint32_t sb[13];
+};
+
+
+struct nvfx_state {
+ unsigned scissor_enabled;
+ unsigned stipple_enabled;
+ unsigned fp_samplers;
+};
+
+struct nvfx_vtxelt_state {
+ struct pipe_vertex_element pipe[16];
+ unsigned num_elements;
+};
+
+struct nvfx_render_target {
+ struct nouveau_bo* bo;
+ unsigned offset;
+ unsigned pitch;
+};
+
+struct nvfx_context {
+ struct pipe_context pipe;
+
+ struct nouveau_winsys *nvws;
+ struct nvfx_screen *screen;
+
+ unsigned is_nv4x; /* either 0 or ~0 */
+
+ struct draw_context *draw;
+
+ /* HW state derived from pipe states */
+ struct nvfx_state state;
+ struct {
+ struct nvfx_vertex_program *vertprog;
+
+ unsigned nr_attribs;
+ unsigned hw[PIPE_MAX_SHADER_INPUTS];
+ unsigned draw[PIPE_MAX_SHADER_INPUTS];
+ unsigned emit[PIPE_MAX_SHADER_INPUTS];
+ } swtnl;
+
+ enum {
+ HW, SWTNL, SWRAST
+ } render_mode;
+ unsigned fallback_swtnl;
+
+ /* Context state */
+ unsigned dirty, draw_dirty;
+ struct pipe_scissor_state scissor;
+ unsigned stipple[32];
+ struct pipe_clip_state clip;
+ struct nvfx_vertex_program *vertprog;
+ struct nvfx_fragment_program *fragprog;
+ struct pipe_resource *constbuf[PIPE_SHADER_TYPES];
+ unsigned constbuf_nr[PIPE_SHADER_TYPES];
+ struct nvfx_rasterizer_state *rasterizer;
+ struct nvfx_zsa_state *zsa;
+ struct nvfx_blend_state *blend;
+ struct pipe_blend_color blend_colour;
+ struct pipe_stencil_ref stencil_ref;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_resource *idxbuf;
+ unsigned idxbuf_format;
+ struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
+ unsigned nr_samplers;
+ unsigned nr_textures;
+ unsigned dirty_samplers;
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned vtxbuf_nr;
+ struct nvfx_vtxelt_state *vtxelt;
+
+ unsigned vbo_bo;
+ unsigned hw_vtxelt_nr;
+ uint8_t hw_samplers;
+ uint32_t hw_txf[8];
+ struct nvfx_render_target hw_rt[4];
+ struct nvfx_render_target hw_zeta;
+};
+
+static INLINE struct nvfx_context *
+nvfx_context(struct pipe_context *pipe)
+{
+ return (struct nvfx_context *)pipe;
+}
+
+extern struct nvfx_state_entry nvfx_state_blend;
+extern struct nvfx_state_entry nvfx_state_blend_colour;
+extern struct nvfx_state_entry nvfx_state_fragprog;
+extern struct nvfx_state_entry nvfx_state_fragtex;
+extern struct nvfx_state_entry nvfx_state_framebuffer;
+extern struct nvfx_state_entry nvfx_state_rasterizer;
+extern struct nvfx_state_entry nvfx_state_scissor;
+extern struct nvfx_state_entry nvfx_state_sr;
+extern struct nvfx_state_entry nvfx_state_stipple;
+extern struct nvfx_state_entry nvfx_state_vbo;
+extern struct nvfx_state_entry nvfx_state_vertprog;
+extern struct nvfx_state_entry nvfx_state_viewport;
+extern struct nvfx_state_entry nvfx_state_vtxfmt;
+extern struct nvfx_state_entry nvfx_state_zsa;
+
+extern void nvfx_init_query_functions(struct nvfx_context *nvfx);
+extern void nvfx_init_surface_functions(struct nvfx_context *nvfx);
+
+/* nvfx_context.c */
+struct pipe_context *
+nvfx_create(struct pipe_screen *pscreen, void *priv);
+
+/* nvfx_clear.c */
+extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers,
+ const float *rgba, double depth, unsigned stencil);
+
+/* nvfx_draw.c */
+extern struct draw_stage *nvfx_draw_render_stage(struct nvfx_context *nvfx);
+extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe,
+ struct pipe_resource *idxbuf,
+ unsigned ib_size, int ib_bias,
+ unsigned mode,
+ unsigned start, unsigned count);
+extern void nvfx_vtxfmt_validate(struct nvfx_context *nvfx);
+
+/* nvfx_fb.c */
+extern void nvfx_state_framebuffer_validate(struct nvfx_context *nvfx);
+void
+nvfx_framebuffer_relocate(struct nvfx_context *nvfx);
+
+/* nvfx_fragprog.c */
+extern void nvfx_fragprog_destroy(struct nvfx_context *,
+ struct nvfx_fragment_program *);
+extern void nvfx_fragprog_validate(struct nvfx_context *nvfx);
+extern void
+nvfx_fragprog_relocate(struct nvfx_context *nvfx);
+
+/* nvfx_fragtex.c */
+extern void nvfx_fragtex_validate(struct nvfx_context *nvfx);
+extern void
+nvfx_fragtex_relocate(struct nvfx_context *nvfx);
+
+/* nv30_fragtex.c */
+extern void
+nv30_sampler_state_init(struct pipe_context *pipe,
+ struct nvfx_sampler_state *ps,
+ const struct pipe_sampler_state *cso);
+extern void nv30_fragtex_set(struct nvfx_context *nvfx, int unit);
+
+/* nv40_fragtex.c */
+extern void
+nv40_sampler_state_init(struct pipe_context *pipe,
+ struct nvfx_sampler_state *ps,
+ const struct pipe_sampler_state *cso);
+extern void nv40_fragtex_set(struct nvfx_context *nvfx, int unit);
+
+/* nvfx_state.c */
+extern void nvfx_init_state_functions(struct nvfx_context *nvfx);
+extern void nvfx_state_scissor_validate(struct nvfx_context *nvfx);
+extern void nvfx_state_stipple_validate(struct nvfx_context *nvfx);
+extern void nvfx_state_blend_validate(struct nvfx_context *nvfx);
+extern void nvfx_state_blend_colour_validate(struct nvfx_context *nvfx);
+extern void nvfx_state_viewport_validate(struct nvfx_context *nvfx);
+extern void nvfx_state_rasterizer_validate(struct nvfx_context *nvfx);
+extern void nvfx_state_sr_validate(struct nvfx_context *nvfx);
+extern void nvfx_state_zsa_validate(struct nvfx_context *nvfx);
+
+/* nvfx_state_emit.c */
+extern void nvfx_state_relocate(struct nvfx_context *nvfx);
+extern boolean nvfx_state_validate(struct nvfx_context *nvfx);
+extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx);
+extern void nvfx_state_emit(struct nvfx_context *nvfx);
+
+/* nvfx_transfer.c */
+extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx);
+
+/* nvfx_vbo.c */
+extern boolean nvfx_vbo_validate(struct nvfx_context *nvfx);
+extern void nvfx_vbo_relocate(struct nvfx_context *nvfx);
+extern void nvfx_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern void nvfx_draw_elements(struct pipe_context *pipe,
+ struct pipe_resource *indexBuffer,
+ unsigned indexSize, int indexBias,
+ unsigned mode, unsigned start,
+ unsigned count);
+
+/* nvfx_vertprog.c */
+extern boolean nvfx_vertprog_validate(struct nvfx_context *nvfx);
+extern void nvfx_vertprog_destroy(struct nvfx_context *,
+ struct nvfx_vertex_program *);
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c
new file mode 100644
index 00000000000..22cff370b77
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_draw.c
@@ -0,0 +1,350 @@
+#include "pipe/p_shader_tokens.h"
+#include "util/u_inlines.h"
+#include "tgsi/tgsi_ureg.h"
+
+#include "util/u_pack_color.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pipe.h"
+
+#include "nvfx_context.h"
+
+/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
+ * often at all. Uses "quadro style" vertex submission + a fixed vertex
+ * layout to avoid the need to generate a vertex program or vtxfmt.
+ */
+
+struct nvfx_render_stage {
+ struct draw_stage stage;
+ struct nvfx_context *nvfx;
+ unsigned prim;
+};
+
+static INLINE struct nvfx_render_stage *
+nvfx_render_stage(struct draw_stage *stage)
+{
+ return (struct nvfx_render_stage *)stage;
+}
+
+static INLINE void
+nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v)
+{
+ struct nvfx_screen *screen = nvfx->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
+ unsigned i;
+
+ for (i = 0; i < nvfx->swtnl.nr_attribs; i++) {
+ unsigned idx = nvfx->swtnl.draw[i];
+ unsigned hw = nvfx->swtnl.hw[i];
+
+ switch (nvfx->swtnl.emit[i]) {
+ case EMIT_OMIT:
+ break;
+ case EMIT_1F:
+ BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_1F(hw), 1);
+ OUT_RING (chan, fui(v->data[idx][0]));
+ break;
+ case EMIT_2F:
+ BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_2F_X(hw), 2);
+ OUT_RING (chan, fui(v->data[idx][0]));
+ OUT_RING (chan, fui(v->data[idx][1]));
+ break;
+ case EMIT_3F:
+ BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_3F_X(hw), 3);
+ OUT_RING (chan, fui(v->data[idx][0]));
+ OUT_RING (chan, fui(v->data[idx][1]));
+ OUT_RING (chan, fui(v->data[idx][2]));
+ break;
+ case EMIT_4F:
+ BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4);
+ OUT_RING (chan, fui(v->data[idx][0]));
+ OUT_RING (chan, fui(v->data[idx][1]));
+ OUT_RING (chan, fui(v->data[idx][2]));
+ OUT_RING (chan, fui(v->data[idx][3]));
+ break;
+ case 0xff:
+ BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4);
+ OUT_RING (chan, fui(v->data[idx][0] / v->data[idx][3]));
+ OUT_RING (chan, fui(v->data[idx][1] / v->data[idx][3]));
+ OUT_RING (chan, fui(v->data[idx][2] / v->data[idx][3]));
+ OUT_RING (chan, fui(1.0f / v->data[idx][3]));
+ break;
+ case EMIT_4UB:
+ BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1);
+ OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]),
+ float_to_ubyte(v->data[idx][1]),
+ float_to_ubyte(v->data[idx][2]),
+ float_to_ubyte(v->data[idx][3])));
+ case EMIT_4UB_BGRA:
+ BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1);
+ OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][2]),
+ float_to_ubyte(v->data[idx][1]),
+ float_to_ubyte(v->data[idx][0]),
+ float_to_ubyte(v->data[idx][3])));
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+}
+
+static INLINE void
+nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
+ unsigned mode, unsigned count)
+{
+ struct nvfx_render_stage *rs = nvfx_render_stage(stage);
+ struct nvfx_context *nvfx = rs->nvfx;
+
+ struct nvfx_screen *screen = nvfx->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
+ unsigned i;
+
+ /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
+ if (AVAIL_RING(chan) < ((count * 20) + 6)) {
+ if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) {
+ NOUVEAU_ERR("AIII, missed flush\n");
+ assert(0);
+ }
+ FIRE_RING(chan);
+ nvfx_state_emit(nvfx);
+ }
+
+ /* Switch primitive modes if necessary */
+ if (rs->prim != mode) {
+ if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) {
+ BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP);
+ }
+
+ BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, mode);
+ rs->prim = mode;
+ }
+
+ /* Emit vertex data */
+ for (i = 0; i < count; i++)
+ nvfx_render_vertex(nvfx, prim->v[i]);
+
+ /* If it's likely we'll need to empty the push buffer soon, finish
+ * off the primitive now.
+ */
+ if (AVAIL_RING(chan) < ((count * 20) + 6)) {
+ BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP);
+ rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP;
+ }
+}
+
+static void
+nvfx_render_point(struct draw_stage *draw, struct prim_header *prim)
+{
+ nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_POINTS, 1);
+}
+
+static void
+nvfx_render_line(struct draw_stage *draw, struct prim_header *prim)
+{
+ nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_LINES, 2);
+}
+
+static void
+nvfx_render_tri(struct draw_stage *draw, struct prim_header *prim)
+{
+ nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_TRIANGLES, 3);
+}
+
+static void
+nvfx_render_flush(struct draw_stage *draw, unsigned flags)
+{
+ struct nvfx_render_stage *rs = nvfx_render_stage(draw);
+ struct nvfx_context *nvfx = rs->nvfx;
+ struct nvfx_screen *screen = nvfx->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
+
+ if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) {
+ BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP);
+ rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP;
+ }
+}
+
+static void
+nvfx_render_reset_stipple_counter(struct draw_stage *draw)
+{
+}
+
+static void
+nvfx_render_destroy(struct draw_stage *draw)
+{
+ FREE(draw);
+}
+
+static struct nvfx_vertex_program *
+nvfx_create_drawvp(struct nvfx_context *nvfx)
+{
+ struct ureg_program *ureg;
+ uint i;
+
+ ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
+ if (ureg == NULL)
+ return NULL;
+
+ ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), ureg_DECL_vs_input(ureg, 0));
+ ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0), ureg_DECL_vs_input(ureg, 3));
+ ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1), ureg_DECL_vs_input(ureg, 4));
+ ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 0), ureg_DECL_vs_input(ureg, 3));
+ ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 1), ureg_DECL_vs_input(ureg, 4));
+ ureg_MOV(ureg,
+ ureg_writemask(ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 1), TGSI_WRITEMASK_X),
+ ureg_DECL_vs_input(ureg, 5));
+ for (i = 0; i < 8; ++i)
+ ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, i), ureg_DECL_vs_input(ureg, 8 + i));
+
+ ureg_END( ureg );
+
+ return ureg_create_shader_and_destroy( ureg, &nvfx->pipe );
+}
+
+struct draw_stage *
+nvfx_draw_render_stage(struct nvfx_context *nvfx)
+{
+ struct nvfx_render_stage *render = CALLOC_STRUCT(nvfx_render_stage);
+
+ if (!nvfx->swtnl.vertprog)
+ nvfx->swtnl.vertprog = nvfx_create_drawvp(nvfx);
+
+ render->nvfx = nvfx;
+ render->stage.draw = nvfx->draw;
+ render->stage.point = nvfx_render_point;
+ render->stage.line = nvfx_render_line;
+ render->stage.tri = nvfx_render_tri;
+ render->stage.flush = nvfx_render_flush;
+ render->stage.reset_stipple_counter = nvfx_render_reset_stipple_counter;
+ render->stage.destroy = nvfx_render_destroy;
+
+ return &render->stage;
+}
+
+void
+nvfx_draw_elements_swtnl(struct pipe_context *pipe,
+ struct pipe_resource *idxbuf,
+ unsigned idxbuf_size, int idxbuf_bias,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
+ struct pipe_transfer *ib_transfer = NULL;
+ struct pipe_transfer *cb_transfer = NULL;
+ unsigned i;
+ void *map;
+
+ if (!nvfx_state_validate_swtnl(nvfx))
+ return;
+ nvfx_state_emit(nvfx);
+
+ for (i = 0; i < nvfx->vtxbuf_nr; i++) {
+ map = pipe_buffer_map(pipe, nvfx->vtxbuf[i].buffer,
+ PIPE_TRANSFER_READ,
+ &vb_transfer[i]);
+ draw_set_mapped_vertex_buffer(nvfx->draw, i, map);
+ }
+
+ if (idxbuf) {
+ map = pipe_buffer_map(pipe, idxbuf,
+ PIPE_TRANSFER_READ,
+ &ib_transfer);
+ draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, idxbuf_bias, map);
+ } else {
+ draw_set_mapped_element_buffer(nvfx->draw, 0, 0, NULL);
+ }
+
+ if (nvfx->constbuf[PIPE_SHADER_VERTEX]) {
+ const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX];
+
+ map = pipe_buffer_map(pipe,
+ nvfx->constbuf[PIPE_SHADER_VERTEX],
+ PIPE_TRANSFER_READ,
+ &cb_transfer);
+ draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0,
+ map, nr);
+ }
+
+ draw_arrays(nvfx->draw, mode, start, count);
+
+ for (i = 0; i < nvfx->vtxbuf_nr; i++)
+ pipe_buffer_unmap(pipe, nvfx->vtxbuf[i].buffer, vb_transfer[i]);
+
+ if (idxbuf)
+ pipe_buffer_unmap(pipe, idxbuf, ib_transfer);
+
+ if (nvfx->constbuf[PIPE_SHADER_VERTEX])
+ pipe_buffer_unmap(pipe, nvfx->constbuf[PIPE_SHADER_VERTEX],
+ cb_transfer);
+
+ draw_flush(nvfx->draw);
+ pipe->flush(pipe, 0, NULL);
+}
+
+static INLINE void
+emit_attrib(struct nvfx_context *nvfx, unsigned hw, unsigned emit,
+ unsigned semantic, unsigned index)
+{
+ unsigned draw_out = draw_find_shader_output(nvfx->draw, semantic, index);
+ unsigned a = nvfx->swtnl.nr_attribs++;
+
+ nvfx->swtnl.hw[a] = hw;
+ nvfx->swtnl.emit[a] = emit;
+ nvfx->swtnl.draw[a] = draw_out;
+}
+
+void
+nvfx_vtxfmt_validate(struct nvfx_context *nvfx)
+{
+ struct nvfx_fragment_program *fp = nvfx->fragprog;
+ unsigned colour = 0, texcoords = 0, fog = 0, i;
+
+ /* Determine needed fragprog inputs */
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ switch (fp->info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ colour |= (1 << fp->info.input_semantic_index[i]);
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ texcoords |= (1 << fp->info.input_semantic_index[i]);
+ break;
+ case TGSI_SEMANTIC_FOG:
+ fog = 1;
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ nvfx->swtnl.nr_attribs = 0;
+
+ /* Map draw vtxprog output to hw attribute IDs */
+ for (i = 0; i < 2; i++) {
+ if (!(colour & (1 << i)))
+ continue;
+ emit_attrib(nvfx, 3 + i, EMIT_4F, TGSI_SEMANTIC_COLOR, i);
+ }
+
+ for (i = 0; i < 8; i++) {
+ if (!(texcoords & (1 << i)))
+ continue;
+ emit_attrib(nvfx, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i);
+ }
+
+ if (fog) {
+ emit_attrib(nvfx, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0);
+ }
+
+ emit_attrib(nvfx, 0, 0xff, TGSI_SEMANTIC_POSITION, 0);
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c
new file mode 100644
index 00000000000..6772d9bd516
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
@@ -0,0 +1,1004 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nvfx_context.h"
+#include "nvfx_shader.h"
+
+#define MAX_CONSTS 128
+#define MAX_IMM 32
+struct nvfx_fpc {
+ struct nvfx_fragment_program *fp;
+
+ uint attrib_map[PIPE_MAX_SHADER_INPUTS];
+
+ unsigned r_temps;
+ unsigned r_temps_discard;
+ struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+ struct nvfx_sreg *r_temp;
+
+ int num_regs;
+
+ unsigned inst_offset;
+ unsigned have_const;
+
+ struct {
+ int pipe;
+ float vals[4];
+ } consts[MAX_CONSTS];
+ int nr_consts;
+
+ struct nvfx_sreg imm[MAX_IMM];
+ unsigned nr_imm;
+};
+
+static INLINE struct nvfx_sreg
+temp(struct nvfx_fpc *fpc)
+{
+ int idx = ffs(~fpc->r_temps) - 1;
+
+ if (idx < 0) {
+ NOUVEAU_ERR("out of temps!!\n");
+ assert(0);
+ return nvfx_sr(NVFXSR_TEMP, 0);
+ }
+
+ fpc->r_temps |= (1 << idx);
+ fpc->r_temps_discard |= (1 << idx);
+ return nvfx_sr(NVFXSR_TEMP, idx);
+}
+
+static INLINE void
+release_temps(struct nvfx_fpc *fpc)
+{
+ fpc->r_temps &= ~fpc->r_temps_discard;
+ fpc->r_temps_discard = 0;
+}
+
+static INLINE struct nvfx_sreg
+constant(struct nvfx_fpc *fpc, int pipe, float vals[4])
+{
+ int idx;
+
+ if (fpc->nr_consts == MAX_CONSTS)
+ assert(0);
+ idx = fpc->nr_consts++;
+
+ fpc->consts[idx].pipe = pipe;
+ if (pipe == -1)
+ memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
+ return nvfx_sr(NVFXSR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nvfx_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \
+ (d), (m), (s0), (s1), (s2))
+#define tex(cc,s,o,u,d,m,s0,s1,s2) \
+ nvfx_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \
+ (d), (m), (s0), none, none)
+
+static void
+grow_insns(struct nvfx_fpc *fpc, int size)
+{
+ struct nvfx_fragment_program *fp = fpc->fp;
+
+ fp->insn_len += size;
+ fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
+}
+
+static void
+emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src)
+{
+ struct nvfx_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NVFXSR_INPUT:
+ sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
+ hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
+ break;
+ case NVFXSR_OUTPUT:
+ sr |= NVFX_FP_REG_SRC_HALF;
+ /* fall-through */
+ case NVFXSR_TEMP:
+ sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
+ sr |= (src.index << NVFX_FP_REG_SRC_SHIFT);
+ break;
+ case NVFXSR_CONST:
+ if (!fpc->have_const) {
+ grow_insns(fpc, 4);
+ fpc->have_const = 1;
+ }
+
+ hw = &fp->insn[fpc->inst_offset];
+ if (fpc->consts[src.index].pipe >= 0) {
+ struct nvfx_fragment_program_data *fpd;
+
+ fp->consts = realloc(fp->consts, ++fp->nr_consts *
+ sizeof(*fpd));
+ fpd = &fp->consts[fp->nr_consts - 1];
+ fpd->offset = fpc->inst_offset + 4;
+ fpd->index = fpc->consts[src.index].pipe;
+ memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
+ } else {
+ memcpy(&fp->insn[fpc->inst_offset + 4],
+ fpc->consts[src.index].vals,
+ sizeof(uint32_t) * 4);
+ }
+
+ sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
+ break;
+ case NVFXSR_NONE:
+ sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NVFX_FP_REG_NEGATE;
+
+ if (src.abs)
+ hw[1] |= (1 << (29 + pos));
+
+ sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
+ (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
+ (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
+ (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));
+
+ hw[pos + 1] |= sr;
+}
+
+static void
+emit_dst(struct nvfx_fpc *fpc, struct nvfx_sreg dst)
+{
+ struct nvfx_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fpc->inst_offset];
+
+ switch (dst.type) {
+ case NVFXSR_TEMP:
+ if (fpc->num_regs < (dst.index + 1))
+ fpc->num_regs = dst.index + 1;
+ break;
+ case NVFXSR_OUTPUT:
+ if (dst.index == 1) {
+ fp->fp_control |= 0xe;
+ } else {
+ hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
+ }
+ break;
+ case NVFXSR_NONE:
+ hw[0] |= (1 << 30);
+ break;
+ default:
+ assert(0);
+ }
+
+ hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
+}
+
+static void
+nvfx_fp_arith(struct nvfx_fpc *fpc, int sat, int op,
+ struct nvfx_sreg dst, int mask,
+ struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
+{
+ struct nvfx_fragment_program *fp = fpc->fp;
+ uint32_t *hw;
+
+ fpc->inst_offset = fp->insn_len;
+ fpc->have_const = 0;
+ grow_insns(fpc, 4);
+ hw = &fp->insn[fpc->inst_offset];
+ memset(hw, 0, sizeof(uint32_t) * 4);
+
+ if (op == NVFX_FP_OP_OPCODE_KIL)
+ fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
+ hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT);
+ hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT);
+ hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT);
+
+ if (sat)
+ hw[0] |= NVFX_FP_OP_OUT_SAT;
+
+ if (dst.cc_update)
+ hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
+ hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT);
+ hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
+ (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
+ (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
+ (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
+
+ emit_dst(fpc, dst);
+ emit_src(fpc, 0, s0);
+ emit_src(fpc, 1, s1);
+ emit_src(fpc, 2, s2);
+}
+
+static void
+nvfx_fp_tex(struct nvfx_fpc *fpc, int sat, int op, int unit,
+ struct nvfx_sreg dst, int mask,
+ struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
+{
+ struct nvfx_fragment_program *fp = fpc->fp;
+
+ nvfx_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
+
+ fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
+ fp->samplers |= (1 << unit);
+}
+
+static INLINE struct nvfx_sreg
+tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
+{
+ struct nvfx_sreg src = { 0 };
+
+ switch (fsrc->Register.File) {
+ case TGSI_FILE_INPUT:
+ src = nvfx_sr(NVFXSR_INPUT,
+ fpc->attrib_map[fsrc->Register.Index]);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(fpc, fsrc->Register.Index, NULL);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ assert(fsrc->Register.Index < fpc->nr_imm);
+ src = fpc->imm[fsrc->Register.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ src = fpc->r_temp[fsrc->Register.Index];
+ break;
+ /* NV40 fragprog result regs are just temps, so this is simple */
+ case TGSI_FILE_OUTPUT:
+ src = fpc->r_result[fsrc->Register.Index];
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->Register.Absolute;
+ src.negate = fsrc->Register.Negate;
+ src.swz[0] = fsrc->Register.SwizzleX;
+ src.swz[1] = fsrc->Register.SwizzleY;
+ src.swz[2] = fsrc->Register.SwizzleZ;
+ src.swz[3] = fsrc->Register.SwizzleW;
+ return src;
+}
+
+static INLINE struct nvfx_sreg
+tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
+ switch (fdst->Register.File) {
+ case TGSI_FILE_OUTPUT:
+ return fpc->r_result[fdst->Register.Index];
+ case TGSI_FILE_TEMPORARY:
+ return fpc->r_temp[fdst->Register.Index];
+ case TGSI_FILE_NULL:
+ return nvfx_sr(NVFXSR_NONE, 0);
+ default:
+ NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
+ return nvfx_sr(NVFXSR_NONE, 0);
+ }
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W;
+ return mask;
+}
+
+static boolean
+nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
+ const struct tgsi_full_instruction *finst)
+{
+ const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+ struct nvfx_sreg src[3], dst, tmp;
+ int mask, sat, unit = 0;
+ int ai = -1, ci = -1, ii = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->Src[i];
+ if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(fpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->Src[i];
+
+ switch (fsrc->Register.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->Register.Index) {
+ ai = fsrc->Register.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ if ((ci == -1 && ii == -1) ||
+ ci == fsrc->Register.Index) {
+ ci = fsrc->Register.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ if ((ci == -1 && ii == -1) ||
+ ii == fsrc->Register.Index) {
+ ii = fsrc->Register.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ case TGSI_FILE_SAMPLER:
+ unit = fsrc->Register.Index;
+ break;
+ case TGSI_FILE_OUTPUT:
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(fpc, &finst->Dst[0]);
+ mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
+ sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_CMP:
+ tmp = nvfx_sr(NVFXSR_NONE, 0);
+ tmp.cc_update = 1;
+ arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NVFX_COND_GE;
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ dst.cc_test = NVFX_COND_LT;
+ arith(fpc, sat, MOV, dst, mask, src[1], none, none);
+ break;
+ case TGSI_OPCODE_COS:
+ arith(fpc, sat, COS, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DDX:
+ if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
+ tmp = temp(fpc);
+ arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y,
+ swz(src[0], Z, W, Z, W), none, none);
+ arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W,
+ swz(tmp, X, Y, X, Y), none, none);
+ arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0],
+ none, none);
+ arith(fpc, 0, MOV, dst, mask, tmp, none, none);
+ } else {
+ arith(fpc, sat, DDX, dst, mask, src[0], none, none);
+ }
+ break;
+ case TGSI_OPCODE_DDY:
+ if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
+ tmp = temp(fpc);
+ arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y,
+ swz(src[0], Z, W, Z, W), none, none);
+ arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W,
+ swz(tmp, X, Y, X, Y), none, none);
+ arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0],
+ none, none);
+ arith(fpc, 0, MOV, dst, mask, tmp, none, none);
+ } else {
+ arith(fpc, sat, DDY, dst, mask, src[0], none, none);
+ }
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ tmp = temp(fpc);
+ arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[1], none);
+ arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
+ swz(src[1], W, W, W, W), none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(fpc, sat, EX2, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(fpc, sat, FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(fpc, sat, FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_KILP:
+ arith(fpc, 0, KIL, none, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_KIL:
+ dst = nvfx_sr(NVFXSR_NONE, 0);
+ dst.cc_update = 1;
+ arith(fpc, 0, MOV, dst, NVFX_FP_MASK_ALL, src[0], none, none);
+ dst.cc_update = 0; dst.cc_test = NVFX_COND_LT;
+ arith(fpc, 0, KIL, dst, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(fpc, sat, LG2, dst, mask, src[0], none, none);
+ break;
+// case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_LRP:
+ if(!nvfx->is_nv4x)
+ arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]);
+ else {
+ tmp = temp(fpc);
+ arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
+ arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
+ }
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(fpc, sat, MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_POW:
+ if(!nvfx->is_nv4x)
+ arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none);
+ else {
+ tmp = temp(fpc);
+ arith(fpc, 0, LG2, tmp, NVFX_FP_MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ arith(fpc, 0, MUL, tmp, NVFX_FP_MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(fpc, sat, EX2, dst, mask,
+ swz(tmp, X, X, X, X), none, none);
+ }
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(fpc, sat, RCP, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_RET:
+ assert(0);
+ break;
+ case TGSI_OPCODE_RFL:
+ if(!nvfx->is_nv4x)
+ arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none);
+ else {
+ tmp = temp(fpc);
+ arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[0], none);
+ arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_Y, src[0], src[1], none);
+ arith(fpc, 0, DIV, scale(tmp, 2X), NVFX_FP_MASK_Z,
+ swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
+ arith(fpc, sat, MAD, dst, mask,
+ swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
+ }
+ break;
+ case TGSI_OPCODE_RSQ:
+ if(!nvfx->is_nv4x)
+ arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
+ else {
+ tmp = temp(fpc);
+ arith(fpc, 0, LG2, scale(tmp, INV_2X), NVFX_FP_MASK_X,
+ abs(swz(src[0], X, X, X, X)), none, none);
+ arith(fpc, sat, EX2, dst, mask,
+ neg(swz(tmp, X, X, X, X)), none, none);
+ }
+ break;
+ case TGSI_OPCODE_SCS:
+ /* avoid overwriting the source */
+ if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X)
+ {
+ if (mask & NVFX_FP_MASK_X) {
+ arith(fpc, sat, COS, dst, NVFX_FP_MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & NVFX_FP_MASK_Y) {
+ arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ }
+ else
+ {
+ if (mask & NVFX_FP_MASK_Y) {
+ arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & NVFX_FP_MASK_X) {
+ arith(fpc, sat, COS, dst, NVFX_FP_MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ }
+ break;
+ case TGSI_OPCODE_SEQ:
+ arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SFL:
+ arith(fpc, sat, SFL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SIN:
+ arith(fpc, sat, SIN, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_SLE:
+ arith(fpc, sat, SLE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SNE:
+ arith(fpc, sat, SNE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_STR:
+ arith(fpc, sat, STR, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
+ break;
+ case TGSI_OPCODE_TEX:
+ tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXB:
+ tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXP:
+ tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(fpc);
+ arith(fpc, 0, MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(fpc, sat, MAD, dst, (mask & ~NVFX_FP_MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ release_temps(fpc);
+ return TRUE;
+}
+
+static boolean
+nvfx_fragprog_parse_decl_attrib(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.Name) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NVFX_FP_OP_INPUT_SRC_POSITION;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.Index == 0) {
+ hw = NVFX_FP_OP_INPUT_SRC_COL0;
+ } else
+ if (fdec->Semantic.Index == 1) {
+ hw = NVFX_FP_OP_INPUT_SRC_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NVFX_FP_OP_INPUT_SRC_FOGC;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.Index <= 7) {
+ hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.
+ Index);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad input semantic\n");
+ return FALSE;
+ }
+
+ fpc->attrib_map[fdec->Range.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ unsigned idx = fdec->Range.First;
+ unsigned hw;
+
+ switch (fdec->Semantic.Name) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = 1;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ hw = ~0;
+ switch (fdec->Semantic.Index) {
+ case 0: hw = 0; break;
+ case 1: hw = 2; break;
+ case 2: hw = 3; break;
+ case 3: hw = 4; break;
+ }
+ if(hw > ((nvfx->is_nv4x) ? 4 : 2)) {
+ NOUVEAU_ERR("bad rcol index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);
+ fpc->r_temps |= (1 << hw);
+ return TRUE;
+}
+
+static boolean
+nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
+{
+ struct tgsi_parse_context p;
+ int high_temp = -1, i;
+
+ tgsi_parse_init(&p, fpc->fp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &p.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ if (!nvfx_fragprog_parse_decl_attrib(nvfx, fpc, fdec))
+ goto out_err;
+ break;
+ case TGSI_FILE_OUTPUT:
+ if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec))
+ goto out_err;
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (fdec->Range.Last > high_temp) {
+ high_temp =
+ fdec->Range.Last;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ struct tgsi_full_immediate *imm;
+ float vals[4];
+
+ imm = &p.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+ assert(fpc->nr_imm < MAX_IMM);
+
+ vals[0] = imm->u[0].Float;
+ vals[1] = imm->u[1].Float;
+ vals[2] = imm->u[2].Float;
+ vals[3] = imm->u[3].Float;
+ fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (++high_temp) {
+ fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
+ for (i = 0; i < high_temp; i++)
+ fpc->r_temp[i] = temp(fpc);
+ fpc->r_temps_discard = 0;
+ }
+
+ return TRUE;
+
+out_err:
+ if (fpc->r_temp)
+ FREE(fpc->r_temp);
+ tgsi_parse_free(&p);
+ return FALSE;
+}
+
+static void
+nvfx_fragprog_translate(struct nvfx_context *nvfx,
+ struct nvfx_fragment_program *fp)
+{
+ struct tgsi_parse_context parse;
+ struct nvfx_fpc *fpc = NULL;
+
+ fpc = CALLOC(1, sizeof(struct nvfx_fpc));
+ if (!fpc)
+ return;
+ fpc->fp = fp;
+ fpc->num_regs = 2;
+
+ if (!nvfx_fragprog_prepare(nvfx, fpc)) {
+ FREE(fpc);
+ return;
+ }
+
+ tgsi_parse_init(&parse, fp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+
+ finst = &parse.FullToken.FullInstruction;
+ if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if(!nvfx->is_nv4x)
+ fp->fp_control |= (fpc->num_regs-1)/2;
+ else
+ fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT;
+
+ /* Terminate final instruction */
+ if(fp->insn)
+ fp->insn[fpc->inst_offset] |= 0x00000001;
+
+ /* Append NOP + END instruction, may or may not be necessary. */
+ fpc->inst_offset = fp->insn_len;
+ grow_insns(fpc, 4);
+ fp->insn[fpc->inst_offset + 0] = 0x00000001;
+ fp->insn[fpc->inst_offset + 1] = 0x00000000;
+ fp->insn[fpc->inst_offset + 2] = 0x00000000;
+ fp->insn[fpc->inst_offset + 3] = 0x00000000;
+
+ fp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ if (fpc->r_temp)
+ FREE(fpc->r_temp);
+ FREE(fpc);
+}
+
+static inline void
+nvfx_fp_memcpy(void* dst, const void* src, size_t len)
+{
+#ifndef WORDS_BIGENDIAN
+ memcpy(dst, src, len);
+#else
+ size_t i;
+ for(i = 0; i < len; i += 4) {
+ uint32_t v = (uint32_t*)((char*)src + i);
+ *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16);
+ }
+#endif
+}
+
+void
+nvfx_fragprog_validate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nvfx_fragment_program *fp = nvfx->fragprog;
+ int update = 0;
+ int i;
+
+ if (!fp->translated)
+ {
+ const int min_size = 4096;
+
+ nvfx_fragprog_translate(nvfx, fp);
+ if (!fp->translated) {
+ static unsigned dummy[8] = {1, 0, 0, 0, 1, 0, 0, 0};
+ static int warned = 0;
+ if(!warned)
+ {
+ fprintf(stderr, "nvfx: failed to translate fragment program!\n");
+ warned = 1;
+ }
+
+ /* use dummy program: we cannot fail here */
+ fp->translated = TRUE;
+ fp->insn = malloc(sizeof(dummy));
+ memcpy(fp->insn, dummy, sizeof(dummy));
+ fp->insn_len = sizeof(dummy) / sizeof(dummy[0]);
+ }
+ update = TRUE;
+
+ fp->prog_size = (fp->insn_len * 4 + 63) & ~63;
+
+ if(fp->prog_size >= min_size)
+ fp->progs_per_bo = 1;
+ else
+ fp->progs_per_bo = min_size / fp->prog_size;
+ fp->bo_prog_idx = fp->progs_per_bo - 1;
+ }
+
+ /* we must update constants even on "just" fragprog changes, because
+ we don't check whether the current constant buffer matches the latest
+ one bound to this fragment program */
+ if (nvfx->dirty & (NVFX_NEW_FRAGCONST | NVFX_NEW_FRAGPROG))
+ update = TRUE;
+
+ if(update) {
+ int offset;
+
+ ++fp->bo_prog_idx;
+ if(fp->bo_prog_idx >= fp->progs_per_bo)
+ {
+ if(fp->fpbo && !nouveau_bo_busy(fp->fpbo->next->bo, NOUVEAU_BO_WR))
+ {
+ fp->fpbo = fp->fpbo->next;
+ }
+ else
+ {
+ struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + fp->prog_size * fp->progs_per_bo, 16);
+ char *map, *buf;
+
+ if(fp->fpbo)
+ {
+ fpbo->next = fp->fpbo->next;
+ fp->fpbo->next = fpbo;
+ }
+ else
+ fpbo->next = fpbo;
+ fp->fpbo = fpbo;
+ fpbo->bo = 0;
+ nouveau_bo_new(nvfx->screen->base.device, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 64, fp->prog_size * fp->progs_per_bo, &fpbo->bo);
+ nouveau_bo_map(fpbo->bo, NOUVEAU_BO_NOSYNC);
+
+ map = fpbo->bo->map;
+ buf = fpbo->insn;
+ for(int i = 0; i < fp->progs_per_bo; ++i)
+ {
+ memcpy(buf, fp->insn, fp->insn_len * 4);
+ nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4);
+ map += fp->prog_size;
+ buf += fp->prog_size;
+ }
+ }
+ fp->bo_prog_idx = 0;
+ }
+
+ offset = fp->bo_prog_idx * fp->prog_size;
+
+ if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) {
+ struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT];
+ // TODO: avoid using transfers, just directly the buffer
+ struct pipe_transfer* transfer;
+ // TODO: does this check make any sense, or should we do this unconditionally?
+ uint32_t* map = pipe_buffer_map(&nvfx->pipe, constbuf, PIPE_TRANSFER_READ, &transfer);
+ uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
+ uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset);
+ for (i = 0; i < fp->nr_consts; ++i) {
+ unsigned off = fp->consts[i].offset;
+ unsigned idx = fp->consts[i].index * 4;
+
+ /* TODO: is checking a good idea? */
+ if(memcmp(&buf[off], &map[idx], 4 * sizeof(uint32_t))) {
+ memcpy(&buf[off], &map[idx], 4 * sizeof(uint32_t));
+ nvfx_fp_memcpy(&fpmap[off], &map[idx], 4 * sizeof(uint32_t));
+ }
+ }
+ pipe_buffer_unmap(&nvfx->pipe, constbuf, transfer);
+ }
+ }
+
+ if(update || (nvfx->dirty & NVFX_NEW_FRAGPROG)) {
+ int offset = fp->bo_prog_idx * fp->prog_size;
+ MARK_RING(chan, 8, 1);
+ OUT_RING(chan, RING_3D(NV34TCL_FP_ACTIVE_PROGRAM, 1));
+ OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
+ NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
+ NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
+ OUT_RING(chan, RING_3D(NV34TCL_FP_CONTROL, 1));
+ OUT_RING(chan, fp->fp_control);
+ if(!nvfx->is_nv4x) {
+ OUT_RING(chan, RING_3D(NV34TCL_FP_REG_CONTROL, 1));
+ OUT_RING(chan, (1<<16)|0x4);
+ OUT_RING(chan, RING_3D(NV34TCL_TX_UNITS_ENABLE, 1));
+ OUT_RING(chan, fp->samplers);
+ }
+ }
+}
+
+void
+nvfx_fragprog_relocate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct nvfx_fragment_program *fp = nvfx->fragprog;
+ struct nouveau_bo* bo = fp->fpbo->bo;
+ int offset = fp->bo_prog_idx * fp->prog_size;
+ unsigned fp_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; // TODO: GART?
+ fp_flags |= NOUVEAU_BO_DUMMY;
+ MARK_RING(chan, 2, 2);
+ OUT_RELOC(chan, bo, RING_3D(NV34TCL_FP_ACTIVE_PROGRAM, 1), fp_flags, 0, 0);
+ OUT_RELOC(chan, bo, offset, fp_flags | NOUVEAU_BO_LOW |
+ NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
+ NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
+}
+
+void
+nvfx_fragprog_destroy(struct nvfx_context *nvfx,
+ struct nvfx_fragment_program *fp)
+{
+ struct nvfx_fragment_program_bo* fpbo = fp->fpbo;
+ if(fpbo)
+ {
+ do
+ {
+ struct nvfx_fragment_program_bo* next = fpbo->next;
+ nouveau_bo_unmap(fpbo->bo);
+ nouveau_bo_ref(0, &fpbo->bo);
+ free(fpbo);
+ fpbo = next;
+ }
+ while(fpbo != fp->fpbo);
+ }
+
+ if (fp->insn_len)
+ FREE(fp->insn);
+}
+
diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c
new file mode 100644
index 00000000000..0b4a434fecc
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c
@@ -0,0 +1,58 @@
+#include "nvfx_context.h"
+#include "nvfx_resource.h"
+
+void
+nvfx_fragtex_validate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ unsigned samplers, unit;
+
+ samplers = nvfx->dirty_samplers;
+ if(!samplers)
+ return;
+
+ while (samplers) {
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ if(nvfx->fragment_sampler_views[unit] && nvfx->tex_sampler[unit]) {
+ if(!nvfx->is_nv4x)
+ nv30_fragtex_set(nvfx, unit);
+ else
+ nv40_fragtex_set(nvfx, unit);
+ } else {
+ WAIT_RING(chan, 2);
+ /* this is OK for nv40 too */
+ OUT_RING(chan, RING_3D(NV34TCL_TX_ENABLE(unit), 1));
+ OUT_RING(chan, 0);
+ nvfx->hw_samplers &= ~(1 << unit);
+ }
+ }
+ nvfx->dirty_samplers = 0;
+}
+
+void
+nvfx_fragtex_relocate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ unsigned samplers, unit;
+ unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+ samplers = nvfx->hw_samplers;
+ while (samplers) {
+ struct nvfx_miptree* mt;
+ struct nouveau_bo *bo;
+
+ unit = ffs(samplers) - 1;
+ samplers &= ~(1 << unit);
+
+ mt = (struct nvfx_miptree*)nvfx->fragment_sampler_views[unit]->texture;
+ bo = mt->base.bo;
+
+ MARK_RING(chan, 3, 3);
+ OUT_RELOC(chan, bo, RING_3D(NV34TCL_TX_OFFSET(unit), 2), tex_flags | NOUVEAU_BO_DUMMY, 0, 0);
+ OUT_RELOC(chan, bo, 0, tex_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_DUMMY, 0, 0);
+ OUT_RELOC(chan, bo, nvfx->hw_txf[unit], tex_flags | NOUVEAU_BO_OR | NOUVEAU_BO_DUMMY,
+ NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
+ }
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c
new file mode 100644
index 00000000000..aeb88e9ac96
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_miptree.c
@@ -0,0 +1,310 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+
+#include "nvfx_context.h"
+#include "nvfx_resource.h"
+#include "nvfx_transfer.h"
+#include "nv04_surface_2d.h"
+
+/* Currently using separate implementations for buffers and textures,
+ * even though gallium has a unified abstraction of these objects.
+ * Eventually these should be combined, and mechanisms like transfers
+ * be adapted to work for both buffer and texture uploads.
+ */
+
+static void
+nvfx_miptree_layout(struct nvfx_miptree *mt)
+{
+ struct pipe_resource *pt = &mt->base.base;
+ uint width = pt->width0;
+ uint offset = 0;
+ int nr_faces, l, f;
+ uint wide_pitch = pt->bind & (PIPE_BIND_SAMPLER_VIEW |
+ PIPE_BIND_DEPTH_STENCIL |
+ PIPE_BIND_RENDER_TARGET |
+ PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT);
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ nr_faces = 6;
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ nr_faces = pt->depth0;
+ } else {
+ nr_faces = 1;
+ }
+
+ for (l = 0; l <= pt->last_level; l++) {
+ if (wide_pitch && (pt->flags & NVFX_RESOURCE_FLAG_LINEAR))
+ mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64);
+ else
+ mt->level[l].pitch = util_format_get_stride(pt->format, width);
+
+ mt->level[l].image_offset =
+ CALLOC(nr_faces, sizeof(unsigned));
+
+ width = u_minify(width, 1);
+ }
+
+ for (f = 0; f < nr_faces; f++) {
+ for (l = 0; l < pt->last_level; l++) {
+ mt->level[l].image_offset[f] = offset;
+
+ if (!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR) &&
+ u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1)
+ offset += align(mt->level[l].pitch * u_minify(pt->height0, l), 64);
+ else
+ offset += mt->level[l].pitch * u_minify(pt->height0, l);
+ }
+
+ mt->level[l].image_offset[f] = offset;
+ offset += mt->level[l].pitch * u_minify(pt->height0, l);
+ }
+
+ mt->total_size = offset;
+}
+
+static boolean
+nvfx_miptree_get_handle(struct pipe_screen *pscreen,
+ struct pipe_resource *ptexture,
+ struct winsys_handle *whandle)
+{
+ struct nvfx_miptree* mt = (struct nvfx_miptree*)ptexture;
+
+ if (!mt || !mt->base.bo)
+ return FALSE;
+
+ return nouveau_screen_bo_get_handle(pscreen,
+ mt->base.bo,
+ mt->level[0].pitch,
+ whandle);
+}
+
+
+static void
+nvfx_miptree_destroy(struct pipe_screen *screen, struct pipe_resource *pt)
+{
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
+ int l;
+
+ nouveau_screen_bo_release(screen, mt->base.bo);
+
+ for (l = 0; l <= pt->last_level; l++) {
+ if (mt->level[l].image_offset)
+ FREE(mt->level[l].image_offset);
+ }
+
+ FREE(mt);
+}
+
+
+
+
+struct u_resource_vtbl nvfx_miptree_vtbl =
+{
+ nvfx_miptree_get_handle, /* get_handle */
+ nvfx_miptree_destroy, /* resource_destroy */
+ NULL, /* is_resource_referenced */
+ nvfx_miptree_transfer_new, /* get_transfer */
+ nvfx_miptree_transfer_del, /* transfer_destroy */
+ nvfx_miptree_transfer_map, /* transfer_map */
+ u_default_transfer_flush_region, /* transfer_flush_region */
+ nvfx_miptree_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
+
+
+struct pipe_resource *
+nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *pt)
+{
+ struct nvfx_miptree *mt;
+ static int no_swizzle = -1;
+ if(no_swizzle < 0)
+ no_swizzle = debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE);
+
+ mt = CALLOC_STRUCT(nvfx_miptree);
+ if (!mt)
+ return NULL;
+
+ mt->base.base = *pt;
+ mt->base.vtbl = &nvfx_miptree_vtbl;
+ pipe_reference_init(&mt->base.base.reference, 1);
+ mt->base.base.screen = pscreen;
+
+ /* Swizzled textures must be POT */
+ if (pt->width0 & (pt->width0 - 1) ||
+ pt->height0 & (pt->height0 - 1))
+ mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+ else
+ if (pt->bind & (PIPE_BIND_SCANOUT |
+ PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_DEPTH_STENCIL))
+ mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+ else
+ if (pt->usage == PIPE_USAGE_DYNAMIC)
+ mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+ else {
+ switch (pt->format) {
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ /* TODO: we can actually swizzle these formats on nv40, we
+ are just preserving the pre-unification behavior.
+ The whole 2D code is going to be rewritten anyway. */
+ if(nvfx_screen(pscreen)->is_nv4x) {
+ mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+ break;
+ }
+ /* TODO: Figure out which formats can be swizzled */
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ {
+ if (no_swizzle)
+ mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+ break;
+ }
+ default:
+ mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+ }
+ }
+
+ /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+ * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+ * This also happens for small mipmaps of large textures. */
+ if (pt->bind & PIPE_BIND_RENDER_TARGET &&
+ util_format_get_stride(pt->format, pt->width0) < 64)
+ mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+
+ nvfx_miptree_layout(mt);
+
+ mt->base.bo = nouveau_screen_bo_new(pscreen, 256,
+ pt->usage, pt->bind, mt->total_size);
+ if (!mt->base.bo) {
+ FREE(mt);
+ return NULL;
+ }
+ return &mt->base.base;
+}
+
+
+
+
+struct pipe_resource *
+nvfx_miptree_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *template,
+ struct winsys_handle *whandle)
+{
+ struct nvfx_miptree *mt;
+ unsigned stride;
+
+ /* Only supports 2D, non-mipmapped textures for the moment */
+ if (template->target != PIPE_TEXTURE_2D ||
+ template->last_level != 0 ||
+ template->depth0 != 1)
+ return NULL;
+
+ mt = CALLOC_STRUCT(nvfx_miptree);
+ if (!mt)
+ return NULL;
+
+ mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
+ if (mt->base.bo == NULL) {
+ FREE(mt);
+ return NULL;
+ }
+
+ mt->base.base = *template;
+ mt->base.vtbl = &nvfx_miptree_vtbl;
+ pipe_reference_init(&mt->base.base.reference, 1);
+ mt->base.base.screen = pscreen;
+ mt->level[0].pitch = stride;
+ mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
+
+ /* Assume whoever created this buffer expects it to be linear for now */
+ mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+
+ /* XXX: Need to adjust bo refcount??
+ */
+ /* nouveau_bo_ref(bo, &mt->base.bo); */
+ return &mt->base.base;
+}
+
+
+
+
+
+/* Surface helpers, not strictly required to implement the resource vtbl:
+ */
+struct pipe_surface *
+nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
+ struct nv04_surface *ns;
+
+ ns = CALLOC_STRUCT(nv04_surface);
+ if (!ns)
+ return NULL;
+ pipe_resource_reference(&ns->base.texture, pt);
+ ns->base.format = pt->format;
+ ns->base.width = u_minify(pt->width0, level);
+ ns->base.height = u_minify(pt->height0, level);
+ ns->base.usage = flags;
+ pipe_reference_init(&ns->base.reference, 1);
+ ns->base.face = face;
+ ns->base.level = level;
+ ns->base.zslice = zslice;
+ ns->pitch = mt->level[level].pitch;
+
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ ns->base.offset = mt->level[level].image_offset[face];
+ } else
+ if (pt->target == PIPE_TEXTURE_3D) {
+ ns->base.offset = mt->level[level].image_offset[zslice];
+ } else {
+ ns->base.offset = mt->level[level].image_offset[0];
+ }
+
+ /* create a linear temporary that we can render into if
+ * necessary.
+ *
+ * Note that ns->pitch is always a multiple of 64 for linear
+ * surfaces and swizzled surfaces are POT, so ns->pitch & 63
+ * is equivalent to (ns->pitch < 64 && swizzled)
+ */
+
+ if ((ns->pitch & 63) &&
+ (ns->base.usage & PIPE_BIND_RENDER_TARGET))
+ {
+ struct nv04_surface_2d* eng2d =
+ ((struct nvfx_screen*)pscreen)->eng2d;
+
+ ns = nv04_surface_wrap_for_render(pscreen, eng2d, ns);
+ }
+
+ return &ns->base;
+}
+
+void
+nvfx_miptree_surface_del(struct pipe_surface *ps)
+{
+ struct nv04_surface* ns = (struct nv04_surface*)ps;
+ if(ns->backing)
+ {
+ struct nvfx_screen* screen = (struct nvfx_screen*)ps->texture->screen;
+ if(ns->backing->base.usage & PIPE_BIND_BLIT_DESTINATION)
+ screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+ nvfx_miptree_surface_del(&ns->backing->base);
+ }
+
+ pipe_resource_reference(&ps->texture, NULL);
+ FREE(ps);
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c
new file mode 100644
index 00000000000..1b20b5245d7
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_query.c
@@ -0,0 +1,137 @@
+#include "pipe/p_context.h"
+
+#include "nvfx_context.h"
+
+struct nvfx_query {
+ struct list_head list;
+ struct nouveau_resource *object;
+ unsigned type;
+ boolean ready;
+ uint64_t result;
+};
+
+static INLINE struct nvfx_query *
+nvfx_query(struct pipe_query *pipe)
+{
+ return (struct nvfx_query *)pipe;
+}
+
+static struct pipe_query *
+nvfx_query_create(struct pipe_context *pipe, unsigned query_type)
+{
+ struct nvfx_query *q;
+
+ q = CALLOC(1, sizeof(struct nvfx_query));
+ q->type = query_type;
+
+ assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nvfx_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nvfx_query *q = nvfx_query(pq);
+
+ if (q->object)
+ {
+ nouveau_resource_free(&q->object);
+ LIST_DEL(&q->list);
+ }
+ FREE(q);
+}
+
+static void
+nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_query *q = nvfx_query(pq);
+ struct nvfx_screen *screen = nvfx->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
+ uint64_t tmp;
+
+ /* Happens when end_query() is called, then another begin_query()
+ * without querying the result in-between. For now we'll wait for
+ * the existing query to notify completion, but it could be better.
+ */
+ if (q->object)
+ pipe->get_query_result(pipe, pq, 1, &tmp);
+
+ while (nouveau_resource_alloc(nvfx->screen->query_heap, 1, NULL, &q->object))
+ {
+ struct nvfx_query* oldestq;
+ assert(!LIST_IS_EMPTY(&nvfx->screen->query_list));
+ oldestq = LIST_ENTRY(struct nvfx_query, nvfx->screen->query_list.next, list);
+ pipe->get_query_result(pipe, (struct pipe_query*)oldestq, 1, &tmp);
+ }
+
+ LIST_ADDTAIL(&q->list, &nvfx->screen->query_list);
+
+ nouveau_notifier_reset(nvfx->screen->query, q->object->start);
+
+ BEGIN_RING(chan, eng3d, NV34TCL_QUERY_RESET, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, eng3d, NV34TCL_QUERY_UNK17CC, 1);
+ OUT_RING (chan, 1);
+
+ q->ready = FALSE;
+}
+
+static void
+nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_screen *screen = nvfx->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
+ struct nvfx_query *q = nvfx_query(pq);
+
+ BEGIN_RING(chan, eng3d, NV34TCL_QUERY_GET, 1);
+ OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) |
+ ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT));
+ FIRE_RING(chan);
+}
+
+static boolean
+nvfx_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, uint64_t *result)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_query *q = nvfx_query(pq);
+
+ if (!q->ready) {
+ unsigned status;
+
+ status = nouveau_notifier_status(nvfx->screen->query,
+ q->object->start);
+ if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) {
+ if (wait == FALSE)
+ return FALSE;
+
+ nouveau_notifier_wait_status(nvfx->screen->query,
+ q->object->start,
+ NV_NOTIFY_STATE_STATUS_COMPLETED, 0);
+ }
+
+ q->result = nouveau_notifier_return_val(nvfx->screen->query,
+ q->object->start);
+ q->ready = TRUE;
+ nouveau_resource_free(&q->object);
+ LIST_DEL(&q->list);
+ }
+
+ *result = q->result;
+ return TRUE;
+}
+
+void
+nvfx_init_query_functions(struct nvfx_context *nvfx)
+{
+ nvfx->pipe.create_query = nvfx_query_create;
+ nvfx->pipe.destroy_query = nvfx_query_destroy;
+ nvfx->pipe.begin_query = nvfx_query_begin;
+ nvfx->pipe.end_query = nvfx_query_end;
+ nvfx->pipe.get_query_result = nvfx_query_result;
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_resource.c b/src/gallium/drivers/nvfx/nvfx_resource.c
new file mode 100644
index 00000000000..10cdeed2a37
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_resource.c
@@ -0,0 +1,67 @@
+
+#include "pipe/p_context.h"
+#include "nvfx_resource.h"
+#include "nouveau/nouveau_screen.h"
+
+
+/* This doesn't look quite right - this query is supposed to ask
+ * whether the particular context has references to the resource in
+ * any unflushed rendering command buffer, and hence requires a
+ * pipe->flush() for serializing some modification to that resource.
+ *
+ * This seems to be answering the question of whether the resource is
+ * currently on hardware.
+ */
+static unsigned int
+nvfx_resource_is_referenced(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned face, unsigned level)
+{
+ return nouveau_reference_flags(nvfx_resource(resource)->bo);
+}
+
+static struct pipe_resource *
+nvfx_resource_create(struct pipe_screen *screen,
+ const struct pipe_resource *template)
+{
+ if (template->target == PIPE_BUFFER)
+ return nvfx_buffer_create(screen, template);
+ else
+ return nvfx_miptree_create(screen, template);
+}
+
+static struct pipe_resource *
+nvfx_resource_from_handle(struct pipe_screen * screen,
+ const struct pipe_resource *template,
+ struct winsys_handle *whandle)
+{
+ if (template->target == PIPE_BUFFER)
+ return NULL;
+ else
+ return nvfx_miptree_from_handle(screen, template, whandle);
+}
+
+void
+nvfx_init_resource_functions(struct pipe_context *pipe)
+{
+ pipe->get_transfer = u_get_transfer_vtbl;
+ pipe->transfer_map = u_transfer_map_vtbl;
+ pipe->transfer_flush_region = u_transfer_flush_region_vtbl;
+ pipe->transfer_unmap = u_transfer_unmap_vtbl;
+ pipe->transfer_destroy = u_transfer_destroy_vtbl;
+ pipe->transfer_inline_write = u_transfer_inline_write_vtbl;
+ pipe->is_resource_referenced = nvfx_resource_is_referenced;
+}
+
+void
+nvfx_screen_init_resource_functions(struct pipe_screen *pscreen)
+{
+ pscreen->resource_create = nvfx_resource_create;
+ pscreen->resource_from_handle = nvfx_resource_from_handle;
+ pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+ pscreen->resource_destroy = u_resource_destroy_vtbl;
+ pscreen->user_buffer_create = nvfx_user_buffer_create;
+
+ pscreen->get_tex_surface = nvfx_miptree_surface_new;
+ pscreen->tex_surface_destroy = nvfx_miptree_surface_del;
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h
new file mode 100644
index 00000000000..a68c14cf3fb
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_resource.h
@@ -0,0 +1,91 @@
+
+#ifndef NVFX_RESOURCE_H
+#define NVFX_RESOURCE_H
+
+#include "util/u_transfer.h"
+
+struct pipe_resource;
+struct nouveau_bo;
+
+
+/* This gets further specialized into either buffer or texture
+ * structures. In the future we'll want to remove much of that
+ * distinction, but for now try to keep as close to the existing code
+ * as possible and use the vtbl struct to choose between the two
+ * underlying implementations.
+ */
+struct nvfx_resource {
+ struct pipe_resource base;
+ struct u_resource_vtbl *vtbl;
+ struct nouveau_bo *bo;
+};
+
+#define NVFX_MAX_TEXTURE_LEVELS 16
+
+struct nvfx_miptree {
+ struct nvfx_resource base;
+ uint total_size;
+
+ struct {
+ uint pitch;
+ uint *image_offset;
+ } level[NVFX_MAX_TEXTURE_LEVELS];
+
+ unsigned image_nr;
+};
+
+static INLINE
+struct nvfx_resource *nvfx_resource(struct pipe_resource *resource)
+{
+ return (struct nvfx_resource *)resource;
+}
+
+static INLINE struct nouveau_bo *
+nvfx_surface_buffer(struct pipe_surface *surf)
+{
+ struct nvfx_resource *mt = nvfx_resource(surf->texture);
+
+ return mt->bo;
+}
+
+
+void
+nvfx_init_resource_functions(struct pipe_context *pipe);
+
+void
+nvfx_screen_init_resource_functions(struct pipe_screen *pscreen);
+
+
+/* Internal:
+ */
+
+struct pipe_resource *
+nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *pt);
+
+struct pipe_resource *
+nvfx_miptree_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *template,
+ struct winsys_handle *whandle);
+
+struct pipe_resource *
+nvfx_buffer_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *template);
+
+struct pipe_resource *
+nvfx_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes,
+ unsigned usage);
+
+
+
+void
+nvfx_miptree_surface_del(struct pipe_surface *ps);
+
+struct pipe_surface *
+nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags);
+
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
new file mode 100644
index 00000000000..9f03ab1833b
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -0,0 +1,466 @@
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+#include "util/u_simple_screen.h"
+
+#include "nouveau/nouveau_screen.h"
+
+#include "nvfx_context.h"
+#include "nvfx_screen.h"
+#include "nvfx_resource.h"
+
+#define NV30TCL_CHIPSET_3X_MASK 0x00000003
+#define NV34TCL_CHIPSET_3X_MASK 0x00000010
+#define NV35TCL_CHIPSET_3X_MASK 0x000001e0
+
+/* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h
+* to get the pointer to the context front buffer, so I copied nouveau_winsys here.
+* nv30_screen_surface_format_supported() can then use it to enforce creating fbo
+* with same number of bits everywhere.
+*/
+struct nouveau_winsys {
+ struct pipe_winsys base;
+
+ struct pipe_screen *pscreen;
+
+ struct pipe_surface *front;
+};
+#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf
+#define NV4X_GRCLASS4497_CHIPSETS 0x00005450
+#define NV6X_GRCLASS4497_CHIPSETS 0x00000088
+
+static int
+nvfx_screen_get_param(struct pipe_screen *pscreen, int param)
+{
+ struct nvfx_screen *screen = nvfx_screen(pscreen);
+
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ /* TODO: check this */
+ return screen->is_nv4x ? 16 : 8;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return !!screen->is_nv4x;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return screen->is_nv4x ? 4 : 2;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return !!screen->is_nv4x;
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0; /* We have 4 on nv40 - but unsupported currently */
+ case PIPE_CAP_TGSI_CONT_SUPPORTED:
+ return 0;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ return !!screen->is_nv4x;
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 16;
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ /* TODO: on nv40 we have separate color masks */
+ /* TODO: nv40 mrt blending is probably broken */
+ return 0;
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return 0;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return 0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nvfx_screen_get_paramf(struct pipe_screen *pscreen, int param)
+{
+ struct nvfx_screen *screen = nvfx_screen(pscreen);
+
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 10.0;
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 64.0;
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return screen->is_nv4x ? 16.0 : 8.0;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return screen->is_nv4x ? 16.0 : 4.0;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static boolean
+nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage, unsigned geom_flags)
+{
+ struct nvfx_screen *screen = nvfx_screen(pscreen);
+ struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front;
+
+ if (tex_usage & PIPE_BIND_RENDER_TARGET) {
+ switch (format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ return TRUE;
+ default:
+ break;
+ }
+ } else
+ if (tex_usage & PIPE_BIND_DEPTH_STENCIL) {
+ switch (format) {
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ return TRUE;
+ case PIPE_FORMAT_Z16_UNORM:
+ /* TODO: this nv30 limitation probably does not exist */
+ if (!screen->is_nv4x && front)
+ return (front->format == PIPE_FORMAT_B5G6R5_UNORM);
+ return TRUE;
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ case PIPE_FORMAT_B4G4R4A4_UNORM:
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ return TRUE;
+ /* TODO: does nv30 support this? */
+ case PIPE_FORMAT_R16_SNORM:
+ return !!screen->is_nv4x;
+ default:
+ break;
+ }
+ }
+
+ return FALSE;
+}
+
+
+static void
+nvfx_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nvfx_screen *screen = nvfx_screen(pscreen);
+
+ nouveau_resource_destroy(&screen->vp_exec_heap);
+ nouveau_resource_destroy(&screen->vp_data_heap);
+ nouveau_resource_destroy(&screen->query_heap);
+ nouveau_notifier_free(&screen->query);
+ nouveau_notifier_free(&screen->sync);
+ nouveau_grobj_free(&screen->eng3d);
+ nv04_surface_2d_takedown(&screen->eng2d);
+
+ nouveau_screen_fini(&screen->base);
+
+ FREE(pscreen);
+}
+
+static void nv30_screen_init(struct nvfx_screen *screen)
+{
+ struct nouveau_channel *chan = screen->base.channel;
+ int i;
+
+ /* TODO: perhaps we should do some of this on nv40 too? */
+ for (i=1; i<8; i++) {
+ OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1));
+ OUT_RING(chan, 0);
+ OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_VERT(i), 1));
+ OUT_RING(chan, 0);
+ }
+
+ OUT_RING(chan, RING_3D(0x220, 1));
+ OUT_RING(chan, 1);
+
+ OUT_RING(chan, RING_3D(0x03b0, 1));
+ OUT_RING(chan, 0x00100000);
+ OUT_RING(chan, RING_3D(0x1454, 1));
+ OUT_RING(chan, 0);
+ OUT_RING(chan, RING_3D(0x1d80, 1));
+ OUT_RING(chan, 3);
+ OUT_RING(chan, RING_3D(0x1450, 1));
+ OUT_RING(chan, 0x00030004);
+
+ /* NEW */
+ OUT_RING(chan, RING_3D(0x1e98, 1));
+ OUT_RING(chan, 0);
+ OUT_RING(chan, RING_3D(0x17e0, 3));
+ OUT_RING(chan, fui(0.0));
+ OUT_RING(chan, fui(0.0));
+ OUT_RING(chan, fui(1.0));
+ OUT_RING(chan, RING_3D(0x1f80, 16));
+ for (i=0; i<16; i++) {
+ OUT_RING(chan, (i==8) ? 0x0000ffff : 0);
+ }
+
+ OUT_RING(chan, RING_3D(0x120, 3));
+ OUT_RING(chan, 0);
+ OUT_RING(chan, 1);
+ OUT_RING(chan, 2);
+
+ OUT_RING(chan, RING_3D(0x1d88, 1));
+ OUT_RING(chan, 0x00001200);
+
+ OUT_RING(chan, RING_3D(NV34TCL_RC_ENABLE, 1));
+ OUT_RING(chan, 0);
+
+ OUT_RING(chan, RING_3D(NV34TCL_DEPTH_RANGE_NEAR, 2));
+ OUT_RING(chan, fui(0.0));
+ OUT_RING(chan, fui(1.0));
+
+ OUT_RING(chan, RING_3D(NV34TCL_MULTISAMPLE_CONTROL, 1));
+ OUT_RING(chan, 0xffff0000);
+
+ /* enables use of vp rather than fixed-function somehow */
+ OUT_RING(chan, RING_3D(0x1e94, 1));
+ OUT_RING(chan, 0x13);
+}
+
+static void nv40_screen_init(struct nvfx_screen *screen)
+{
+ struct nouveau_channel *chan = screen->base.channel;
+
+ OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR2, 2));
+ OUT_RING(chan, screen->base.channel->vram->handle);
+ OUT_RING(chan, screen->base.channel->vram->handle);
+
+ OUT_RING(chan, RING_3D(0x1ea4, 3));
+ OUT_RING(chan, 0x00000010);
+ OUT_RING(chan, 0x01000100);
+ OUT_RING(chan, 0xff800006);
+
+ /* vtxprog output routing */
+ OUT_RING(chan, RING_3D(0x1fc4, 1));
+ OUT_RING(chan, 0x06144321);
+ OUT_RING(chan, RING_3D(0x1fc8, 2));
+ OUT_RING(chan, 0xedcba987);
+ OUT_RING(chan, 0x00000021);
+ OUT_RING(chan, RING_3D(0x1fd0, 1));
+ OUT_RING(chan, 0x00171615);
+ OUT_RING(chan, RING_3D(0x1fd4, 1));
+ OUT_RING(chan, 0x001b1a19);
+
+ OUT_RING(chan, RING_3D(0x1ef8, 1));
+ OUT_RING(chan, 0x0020ffff);
+ OUT_RING(chan, RING_3D(0x1d64, 1));
+ OUT_RING(chan, 0x00d30000);
+ OUT_RING(chan, RING_3D(0x1e94, 1));
+ OUT_RING(chan, 0x00000001);
+}
+
+static unsigned
+nvfx_screen_get_vertex_buffer_flags(struct nvfx_screen* screen)
+{
+ int vram_hack_default = 0;
+ int vram_hack;
+ // TODO: this is a bit of a guess; also add other cards that may need this hack.
+ // It may also depend on the specific card or the AGP/PCIe chipset.
+ if(screen->base.device->chipset == 0x47 /* G70 */
+ || screen->base.device->chipset == 0x49 /* G71 */
+ || screen->base.device->chipset == 0x46 /* G72 */
+ )
+ vram_hack_default = 1;
+ vram_hack = debug_get_bool_option("NOUVEAU_VTXIDX_IN_VRAM", vram_hack_default);
+
+#ifdef DEBUG
+ if(!vram_hack)
+ {
+ fprintf(stderr, "Some systems may experience graphics corruption due to randomly misplaced vertices.\n"
+ "If this is happening, export NOUVEAU_VTXIDX_IN_VRAM=1 may reduce or eliminate the problem\n");
+ }
+ else
+ {
+ fprintf(stderr, "A performance reducing hack is being used to help avoid graphics corruption.\n"
+ "You can try export NOUVEAU_VTXIDX_IN_VRAM=0 to disable it.\n");
+ }
+#endif
+
+ return vram_hack ? NOUVEAU_BO_VRAM : NOUVEAU_BO_GART;
+}
+
+struct pipe_screen *
+nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
+{
+ static const unsigned query_sizes[] = {(4096 - 4 * 32) / 32, 3 * 1024 / 32, 2 * 1024 / 32, 1024 / 32};
+ struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen);
+ struct nouveau_channel *chan;
+ struct pipe_screen *pscreen;
+ unsigned eng3d_class = 0;
+ int ret, i;
+
+ if (!screen)
+ return NULL;
+
+ pscreen = &screen->base.base;
+
+ ret = nouveau_screen_init(&screen->base, dev);
+ if (ret) {
+ nvfx_screen_destroy(pscreen);
+ return NULL;
+ }
+ chan = screen->base.channel;
+
+ pscreen->winsys = ws;
+ pscreen->destroy = nvfx_screen_destroy;
+ pscreen->get_param = nvfx_screen_get_param;
+ pscreen->get_paramf = nvfx_screen_get_paramf;
+ pscreen->is_format_supported = nvfx_screen_surface_format_supported;
+ pscreen->context_create = nvfx_create;
+
+ switch (dev->chipset & 0xf0) {
+ case 0x30:
+ if (NV30TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
+ eng3d_class = 0x0397;
+ else if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
+ eng3d_class = 0x0697;
+ else if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
+ eng3d_class = 0x0497;
+ break;
+ case 0x40:
+ if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f)))
+ eng3d_class = NV40TCL;
+ else if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
+ eng3d_class = NV44TCL;
+ screen->is_nv4x = ~0;
+ break;
+ case 0x60:
+ if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
+ eng3d_class = NV44TCL;
+ screen->is_nv4x = ~0;
+ break;
+ }
+
+ if (!eng3d_class) {
+ NOUVEAU_ERR("Unknown nv3x/nv4x chipset: nv%02x\n", dev->chipset);
+ return NULL;
+ }
+
+ screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE);
+
+ screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen);
+
+ /* surely both nv3x and nv44 support index buffers too: find out how and test that */
+ if(eng3d_class == NV40TCL)
+ screen->index_buffer_reloc_flags = screen->vertex_buffer_reloc_flags;
+
+ if(!screen->force_swtnl && screen->vertex_buffer_reloc_flags == screen->index_buffer_reloc_flags)
+ screen->base.vertex_buffer_flags = screen->base.index_buffer_flags = screen->vertex_buffer_reloc_flags;
+
+ nvfx_screen_init_resource_functions(pscreen);
+
+ ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, &screen->eng3d);
+ if (ret) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ /* 2D engine setup */
+ screen->eng2d = nv04_surface_2d_init(&screen->base);
+ screen->eng2d->buf = nvfx_surface_buffer;
+
+ /* Notifier for sync purposes */
+ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ nvfx_screen_destroy(pscreen);
+ return NULL;
+ }
+
+ /* Query objects */
+ for(i = 0; i < sizeof(query_sizes) / sizeof(query_sizes[0]); ++i)
+ {
+ ret = nouveau_notifier_alloc(chan, 0xbeef0302, query_sizes[i], &screen->query);
+ if(!ret)
+ break;
+ }
+
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
+ nvfx_screen_destroy(pscreen);
+ return NULL;
+ }
+
+ ret = nouveau_resource_init(&screen->query_heap, 0, query_sizes[i]);
+ if (ret) {
+ NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
+ nvfx_screen_destroy(pscreen);
+ return NULL;
+ }
+
+ LIST_INITHEAD(&screen->query_list);
+
+ /* Vtxprog resources */
+ if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->is_nv4x ? 512 : 256) ||
+ nouveau_resource_init(&screen->vp_data_heap, 0, 256)) {
+ nvfx_screen_destroy(pscreen);
+ return NULL;
+ }
+
+ BIND_RING(chan, screen->eng3d, 7);
+
+ /* Static eng3d initialisation */
+ /* note that we just started using the channel, so we must have space in the pushbuffer */
+ OUT_RING(chan, RING_3D(NV34TCL_DMA_NOTIFY, 1));
+ OUT_RING(chan, screen->sync->handle);
+ OUT_RING(chan, RING_3D(NV34TCL_DMA_TEXTURE0, 2));
+ OUT_RING(chan, chan->vram->handle);
+ OUT_RING(chan, chan->gart->handle);
+ OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR1, 1));
+ OUT_RING(chan, chan->vram->handle);
+ OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 2));
+ OUT_RING(chan, chan->vram->handle);
+ OUT_RING(chan, chan->vram->handle);
+ OUT_RING(chan, RING_3D(NV34TCL_DMA_VTXBUF0, 2));
+ OUT_RING(chan, chan->vram->handle);
+ OUT_RING(chan, chan->gart->handle);
+
+ OUT_RING(chan, RING_3D(NV34TCL_DMA_FENCE, 2));
+ OUT_RING(chan, 0);
+ OUT_RING(chan, screen->query->handle);
+
+ OUT_RING(chan, RING_3D(NV34TCL_DMA_IN_MEMORY7, 2));
+ OUT_RING(chan, chan->vram->handle);
+ OUT_RING(chan, chan->vram->handle);
+
+ if(!screen->is_nv4x)
+ nv30_screen_init(screen);
+ else
+ nv40_screen_init(screen);
+
+ return pscreen;
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h
new file mode 100644
index 00000000000..5e1c3945aef
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_screen.h
@@ -0,0 +1,43 @@
+#ifndef __NVFX_SCREEN_H__
+#define __NVFX_SCREEN_H__
+
+#include "util/u_double_list.h"
+#include "nouveau/nouveau_screen.h"
+#include "nv04_surface_2d.h"
+
+struct nvfx_context;
+
+struct nvfx_screen {
+ struct nouveau_screen base;
+
+ struct nouveau_winsys *nvws;
+
+ struct nvfx_context *cur_ctx;
+
+ unsigned is_nv4x; /* either 0 or ~0 */
+ boolean force_swtnl;
+ unsigned vertex_buffer_reloc_flags;
+ unsigned index_buffer_reloc_flags;
+
+ /* HW graphics objects */
+ struct nv04_surface_2d *eng2d;
+ struct nouveau_grobj *eng3d;
+ struct nouveau_notifier *sync;
+
+ /* Query object resources */
+ struct nouveau_notifier *query;
+ struct nouveau_resource *query_heap;
+ struct list_head query_list;
+
+ /* Vtxprog resources */
+ struct nouveau_resource *vp_exec_heap;
+ struct nouveau_resource *vp_data_heap;
+};
+
+static INLINE struct nvfx_screen *
+nvfx_screen(struct pipe_screen *screen)
+{
+ return (struct nvfx_screen *)screen;
+}
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h
new file mode 100644
index 00000000000..50830b39164
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_shader.h
@@ -0,0 +1,429 @@
+#ifndef __NVFX_SHADER_H__
+#define __NVFX_SHADER_H__
+
+/* this will resolve to either the NV30 or the NV40 version
+ * depending on the current hardware */
+/* unusual, but very fast and compact method */
+#define NVFX_VP(c) ((NV30_VP_##c) + (nvfx->is_nv4x & ((NV40_VP_##c) - (NV30_VP_##c))))
+
+#define NVFX_VP_INST_SLOT_VEC 0
+#define NVFX_VP_INST_SLOT_SCA 1
+
+#define NVFX_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */
+#define NVFX_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */
+#define NVFX_VP_INST_IN_NORMAL 2
+#define NVFX_VP_INST_IN_COL0 3 /* Should probably confirm them all though */
+#define NVFX_VP_INST_IN_COL1 4
+#define NVFX_VP_INST_IN_FOGC 5
+#define NVFX_VP_INST_IN_TC0 8
+#define NVFX_VP_INST_IN_TC(n) (8+n)
+
+#define NVFX_VP_INST_SCA_OP_NOP 0x00
+#define NVFX_VP_INST_SCA_OP_MOV 0x01
+#define NVFX_VP_INST_SCA_OP_RCP 0x02
+#define NVFX_VP_INST_SCA_OP_RCC 0x03
+#define NVFX_VP_INST_SCA_OP_RSQ 0x04
+#define NVFX_VP_INST_SCA_OP_EXP 0x05
+#define NVFX_VP_INST_SCA_OP_LOG 0x06
+#define NVFX_VP_INST_SCA_OP_LIT 0x07
+#define NVFX_VP_INST_SCA_OP_BRA 0x09
+#define NVFX_VP_INST_SCA_OP_CAL 0x0B
+#define NVFX_VP_INST_SCA_OP_RET 0x0C
+#define NVFX_VP_INST_SCA_OP_LG2 0x0D
+#define NVFX_VP_INST_SCA_OP_EX2 0x0E
+#define NVFX_VP_INST_SCA_OP_SIN 0x0F
+#define NVFX_VP_INST_SCA_OP_COS 0x10
+
+#define NV40_VP_INST_SCA_OP_PUSHA 0x13
+#define NV40_VP_INST_SCA_OP_POPA 0x14
+
+#define NVFX_VP_INST_VEC_OP_NOP 0x00
+#define NVFX_VP_INST_VEC_OP_MOV 0x01
+#define NVFX_VP_INST_VEC_OP_MUL 0x02
+#define NVFX_VP_INST_VEC_OP_ADD 0x03
+#define NVFX_VP_INST_VEC_OP_MAD 0x04
+#define NVFX_VP_INST_VEC_OP_DP3 0x05
+#define NVFX_VP_INST_VEC_OP_DPH 0x06
+#define NVFX_VP_INST_VEC_OP_DP4 0x07
+#define NVFX_VP_INST_VEC_OP_DST 0x08
+#define NVFX_VP_INST_VEC_OP_MIN 0x09
+#define NVFX_VP_INST_VEC_OP_MAX 0x0A
+#define NVFX_VP_INST_VEC_OP_SLT 0x0B
+#define NVFX_VP_INST_VEC_OP_SGE 0x0C
+#define NVFX_VP_INST_VEC_OP_ARL 0x0D
+#define NVFX_VP_INST_VEC_OP_FRC 0x0E
+#define NVFX_VP_INST_VEC_OP_FLR 0x0F
+#define NVFX_VP_INST_VEC_OP_SEQ 0x10
+#define NVFX_VP_INST_VEC_OP_SFL 0x11
+#define NVFX_VP_INST_VEC_OP_SGT 0x12
+#define NVFX_VP_INST_VEC_OP_SLE 0x13
+#define NVFX_VP_INST_VEC_OP_SNE 0x14
+#define NVFX_VP_INST_VEC_OP_STR 0x15
+#define NVFX_VP_INST_VEC_OP_SSG 0x16
+#define NVFX_VP_INST_VEC_OP_ARR 0x17
+#define NVFX_VP_INST_VEC_OP_ARA 0x18
+
+#define NV40_VP_INST_VEC_OP_TXL 0x19
+
+/* DWORD 3 */
+#define NVFX_VP_INST_LAST (1 << 0)
+
+/*
+ * Each fragment program opcode appears to be comprised of 4 32-bit values.
+ *
+ * 0 - Opcode, output reg/mask, ATTRIB source
+ * 1 - Source 0
+ * 2 - Source 1
+ * 3 - Source 2
+ *
+ * There appears to be no special difference between result regs and temp regs.
+ * result.color == R0.xyzw
+ * result.depth == R1.z
+ * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0
+ * otherwise it is set to 1.
+ *
+ * Constants are inserted directly after the instruction that uses them.
+ *
+ * It appears that it's not possible to use two input registers in one
+ * instruction as the input sourcing is done in the instruction dword
+ * and not the source selection dwords. As such instructions such as:
+ *
+ * ADD result.color, fragment.color, fragment.texcoord[0];
+ *
+ * must be split into two MOV's and then an ADD (nvidia does this) but
+ * I'm not sure why it's not just one MOV and then source the second input
+ * in the ADD instruction..
+ *
+ * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
+ * negation requires multiplication with a const.
+ *
+ * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE
+ * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO
+ * is implemented simply by not writing to the relevant components of the destination.
+ *
+ * Conditional execution
+ * TODO
+ *
+ * Non-native instructions:
+ * LIT
+ * LRP - MAD+MAD
+ * SUB - ADD, negate second source
+ * RSQ - LG2 + EX2
+ * POW - LG2 + MUL + EX2
+ * SCS - COS + SIN
+ * XPD
+ *
+ * NV40 Looping
+ * Loops appear to be fairly expensive on NV40 at least, the proprietary
+ * driver goes to a lot of effort to avoid using the native looping
+ * instructions. If the total number of *executed* instructions between
+ * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop.
+ * The maximum loop count is 255.
+ *
+ */
+
+//== Opcode / Destination selection ==
+#define NVFX_FP_OP_PROGRAM_END (1 << 0)
+#define NVFX_FP_OP_OUT_REG_SHIFT 1
+#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */
+#define NV40_FP_OP_OUT_REG_MASK (63 << 1)
+/* Needs to be set when writing outputs to get expected result.. */
+#define NVFX_FP_OP_OUT_REG_HALF (1 << 7)
+#define NVFX_FP_OP_COND_WRITE_ENABLE (1 << 8)
+#define NVFX_FP_OP_OUTMASK_SHIFT 9
+#define NVFX_FP_OP_OUTMASK_MASK (0xF << 9)
+# define NVFX_FP_OP_OUT_X (1<<9)
+# define NVFX_FP_OP_OUT_Y (1<<10)
+# define NVFX_FP_OP_OUT_Z (1<<11)
+# define NVFX_FP_OP_OUT_W (1<<12)
+/* Uncertain about these, especially the input_src values.. it's possible that
+ * they can be dynamically changed.
+ */
+#define NVFX_FP_OP_INPUT_SRC_SHIFT 13
+#define NVFX_FP_OP_INPUT_SRC_MASK (15 << 13)
+# define NVFX_FP_OP_INPUT_SRC_POSITION 0x0
+# define NVFX_FP_OP_INPUT_SRC_COL0 0x1
+# define NVFX_FP_OP_INPUT_SRC_COL1 0x2
+# define NVFX_FP_OP_INPUT_SRC_FOGC 0x3
+# define NVFX_FP_OP_INPUT_SRC_TC0 0x4
+# define NVFX_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
+# define NV40_FP_OP_INPUT_SRC_FACING 0xE
+#define NVFX_FP_OP_TEX_UNIT_SHIFT 17
+#define NVFX_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */
+#define NVFX_FP_OP_PRECISION_SHIFT 22
+#define NVFX_FP_OP_PRECISION_MASK (3 << 22)
+# define NVFX_FP_PRECISION_FP32 0
+# define NVFX_FP_PRECISION_FP16 1
+# define NVFX_FP_PRECISION_FX12 2
+#define NVFX_FP_OP_OPCODE_SHIFT 24
+#define NVFX_FP_OP_OPCODE_MASK (0x3F << 24)
+/* NV30/NV40 fragment program opcodes */
+#define NVFX_FP_OP_OPCODE_NOP 0x00
+#define NVFX_FP_OP_OPCODE_MOV 0x01
+#define NVFX_FP_OP_OPCODE_MUL 0x02
+#define NVFX_FP_OP_OPCODE_ADD 0x03
+#define NVFX_FP_OP_OPCODE_MAD 0x04
+#define NVFX_FP_OP_OPCODE_DP3 0x05
+#define NVFX_FP_OP_OPCODE_DP4 0x06
+#define NVFX_FP_OP_OPCODE_DST 0x07
+#define NVFX_FP_OP_OPCODE_MIN 0x08
+#define NVFX_FP_OP_OPCODE_MAX 0x09
+#define NVFX_FP_OP_OPCODE_SLT 0x0A
+#define NVFX_FP_OP_OPCODE_SGE 0x0B
+#define NVFX_FP_OP_OPCODE_SLE 0x0C
+#define NVFX_FP_OP_OPCODE_SGT 0x0D
+#define NVFX_FP_OP_OPCODE_SNE 0x0E
+#define NVFX_FP_OP_OPCODE_SEQ 0x0F
+#define NVFX_FP_OP_OPCODE_FRC 0x10
+#define NVFX_FP_OP_OPCODE_FLR 0x11
+#define NVFX_FP_OP_OPCODE_KIL 0x12
+#define NVFX_FP_OP_OPCODE_PK4B 0x13
+#define NVFX_FP_OP_OPCODE_UP4B 0x14
+#define NVFX_FP_OP_OPCODE_DDX 0x15 /* can only write XY */
+#define NVFX_FP_OP_OPCODE_DDY 0x16 /* can only write XY */
+#define NVFX_FP_OP_OPCODE_TEX 0x17
+#define NVFX_FP_OP_OPCODE_TXP 0x18
+#define NVFX_FP_OP_OPCODE_TXD 0x19
+#define NVFX_FP_OP_OPCODE_RCP 0x1A
+#define NVFX_FP_OP_OPCODE_EX2 0x1C
+#define NVFX_FP_OP_OPCODE_LG2 0x1D
+#define NVFX_FP_OP_OPCODE_STR 0x20
+#define NVFX_FP_OP_OPCODE_SFL 0x21
+#define NVFX_FP_OP_OPCODE_COS 0x22
+#define NVFX_FP_OP_OPCODE_SIN 0x23
+#define NVFX_FP_OP_OPCODE_PK2H 0x24
+#define NVFX_FP_OP_OPCODE_UP2H 0x25
+#define NVFX_FP_OP_OPCODE_PK4UB 0x27
+#define NVFX_FP_OP_OPCODE_UP4UB 0x28
+#define NVFX_FP_OP_OPCODE_PK2US 0x29
+#define NVFX_FP_OP_OPCODE_UP2US 0x2A
+#define NVFX_FP_OP_OPCODE_DP2A 0x2E
+#define NVFX_FP_OP_OPCODE_TXB 0x31
+#define NVFX_FP_OP_OPCODE_DIV 0x3A
+
+/* NV30 only fragment program opcodes */
+#define NVFX_FP_OP_OPCODE_RSQ_NV30 0x1B
+#define NVFX_FP_OP_OPCODE_LIT_NV30 0x1E
+#define NVFX_FP_OP_OPCODE_LRP_NV30 0x1F
+#define NVFX_FP_OP_OPCODE_POW_NV30 0x26
+#define NVFX_FP_OP_OPCODE_RFL_NV30 0x36
+
+/* NV40 only fragment program opcodes */
+#define NVFX_FP_OP_OPCODE_TXL_NV40 0x2F
+/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/
+#define NV40_FP_OP_BRA_OPCODE_BRK 0x0
+#define NV40_FP_OP_BRA_OPCODE_CAL 0x1
+#define NV40_FP_OP_BRA_OPCODE_IF 0x2
+#define NV40_FP_OP_BRA_OPCODE_LOOP 0x3
+#define NV40_FP_OP_BRA_OPCODE_REP 0x4
+#define NV40_FP_OP_BRA_OPCODE_RET 0x5
+
+#define NVFX_FP_OP_OUT_SAT (1 << 31)
+
+/* high order bits of SRC0 */
+#define NVFX_FP_OP_OUT_ABS (1 << 29)
+#define NVFX_FP_OP_COND_SWZ_W_SHIFT 27
+#define NVFX_FP_OP_COND_SWZ_W_MASK (3 << 27)
+#define NVFX_FP_OP_COND_SWZ_Z_SHIFT 25
+#define NVFX_FP_OP_COND_SWZ_Z_MASK (3 << 25)
+#define NVFX_FP_OP_COND_SWZ_Y_SHIFT 23
+#define NVFX_FP_OP_COND_SWZ_Y_MASK (3 << 23)
+#define NVFX_FP_OP_COND_SWZ_X_SHIFT 21
+#define NVFX_FP_OP_COND_SWZ_X_MASK (3 << 21)
+#define NVFX_FP_OP_COND_SWZ_ALL_SHIFT 21
+#define NVFX_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
+#define NVFX_FP_OP_COND_SHIFT 18
+#define NVFX_FP_OP_COND_MASK (0x07 << 18)
+# define NVFX_FP_OP_COND_FL 0
+# define NVFX_FP_OP_COND_LT 1
+# define NVFX_FP_OP_COND_EQ 2
+# define NVFX_FP_OP_COND_LE 3
+# define NVFX_FP_OP_COND_GT 4
+# define NVFX_FP_OP_COND_NE 5
+# define NVFX_FP_OP_COND_GE 6
+# define NVFX_FP_OP_COND_TR 7
+
+/* high order bits of SRC1 */
+#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31)
+#define NVFX_FP_OP_DST_SCALE_SHIFT 28
+#define NVFX_FP_OP_DST_SCALE_MASK (3 << 28)
+#define NVFX_FP_OP_DST_SCALE_1X 0
+#define NVFX_FP_OP_DST_SCALE_2X 1
+#define NVFX_FP_OP_DST_SCALE_4X 2
+#define NVFX_FP_OP_DST_SCALE_8X 3
+#define NVFX_FP_OP_DST_SCALE_INV_2X 5
+#define NVFX_FP_OP_DST_SCALE_INV_4X 6
+#define NVFX_FP_OP_DST_SCALE_INV_8X 7
+
+/* SRC1 LOOP */
+#define NV40_FP_OP_LOOP_INCR_SHIFT 19
+#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19)
+#define NV40_FP_OP_LOOP_INDEX_SHIFT 10
+#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10)
+#define NV40_FP_OP_LOOP_COUNT_SHIFT 2
+#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2)
+
+/* SRC1 IF */
+#define NV40_FP_OP_ELSE_ID_SHIFT 2
+#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2)
+
+/* SRC1 CAL */
+#define NV40_FP_OP_IADDR_SHIFT 2
+#define NV40_FP_OP_IADDR_MASK (0xFF << 2)
+
+/* SRC1 REP
+ * I have no idea why there are 3 count values here.. but they
+ * have always been filled with the same value in my tests so
+ * far..
+ */
+#define NV40_FP_OP_REP_COUNT1_SHIFT 2
+#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2)
+#define NV40_FP_OP_REP_COUNT2_SHIFT 10
+#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10)
+#define NV40_FP_OP_REP_COUNT3_SHIFT 19
+#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19)
+
+/* SRC2 REP/IF */
+#define NV40_FP_OP_END_ID_SHIFT 2
+#define NV40_FP_OP_END_ID_MASK (0xFF << 2)
+
+/* high order bits of SRC2 */
+#define NVFX_FP_OP_INDEX_INPUT (1 << 30)
+#define NV40_FP_OP_ADDR_INDEX_SHIFT 19
+#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19)
+
+//== Register selection ==
+#define NVFX_FP_REG_TYPE_SHIFT 0
+#define NVFX_FP_REG_TYPE_MASK (3 << 0)
+# define NVFX_FP_REG_TYPE_TEMP 0
+# define NVFX_FP_REG_TYPE_INPUT 1
+# define NVFX_FP_REG_TYPE_CONST 2
+#define NVFX_FP_REG_SRC_SHIFT 2
+#define NV30_FP_REG_SRC_MASK (31 << 2)
+#define NV40_FP_REG_SRC_MASK (63 << 2)
+#define NVFX_FP_REG_SRC_HALF (1 << 8)
+#define NVFX_FP_REG_SWZ_ALL_SHIFT 9
+#define NVFX_FP_REG_SWZ_ALL_MASK (255 << 9)
+#define NVFX_FP_REG_SWZ_X_SHIFT 9
+#define NVFX_FP_REG_SWZ_X_MASK (3 << 9)
+#define NVFX_FP_REG_SWZ_Y_SHIFT 11
+#define NVFX_FP_REG_SWZ_Y_MASK (3 << 11)
+#define NVFX_FP_REG_SWZ_Z_SHIFT 13
+#define NVFX_FP_REG_SWZ_Z_MASK (3 << 13)
+#define NVFX_FP_REG_SWZ_W_SHIFT 15
+#define NVFX_FP_REG_SWZ_W_MASK (3 << 15)
+# define NVFX_FP_SWIZZLE_X 0
+# define NVFX_FP_SWIZZLE_Y 1
+# define NVFX_FP_SWIZZLE_Z 2
+# define NVFX_FP_SWIZZLE_W 3
+#define NVFX_FP_REG_NEGATE (1 << 17)
+
+#define NVFXSR_NONE 0
+#define NVFXSR_OUTPUT 1
+#define NVFXSR_INPUT 2
+#define NVFXSR_TEMP 3
+#define NVFXSR_CONST 4
+
+#define NVFX_COND_FL 0
+#define NVFX_COND_LT 1
+#define NVFX_COND_EQ 2
+#define NVFX_COND_LE 3
+#define NVFX_COND_GT 4
+#define NVFX_COND_NE 5
+#define NVFX_COND_GE 6
+#define NVFX_COND_TR 7
+
+/* Yes, this are ordered differently... */
+
+#define NVFX_VP_MASK_X 8
+#define NVFX_VP_MASK_Y 4
+#define NVFX_VP_MASK_Z 2
+#define NVFX_VP_MASK_W 1
+#define NVFX_VP_MASK_ALL 0xf
+
+#define NVFX_FP_MASK_X 1
+#define NVFX_FP_MASK_Y 2
+#define NVFX_FP_MASK_Z 4
+#define NVFX_FP_MASK_W 8
+#define NVFX_FP_MASK_ALL 0xf
+
+#define NVFX_SWZ_X 0
+#define NVFX_SWZ_Y 1
+#define NVFX_SWZ_Z 2
+#define NVFX_SWZ_W 3
+
+#define swz(s,x,y,z,w) nvfx_sr_swz((s), NVFX_SWZ_##x, NVFX_SWZ_##y, NVFX_SWZ_##z, NVFX_SWZ_##w)
+#define neg(s) nvfx_sr_neg((s))
+#define abs(s) nvfx_sr_abs((s))
+#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v)
+
+struct nvfx_sreg {
+ int type;
+ int index;
+
+ int dst_scale;
+
+ int negate;
+ int abs;
+ int swz[4];
+
+ int cc_update;
+ int cc_update_reg;
+ int cc_test;
+ int cc_test_reg;
+ int cc_swz[4];
+};
+
+static INLINE struct nvfx_sreg
+nvfx_sr(int type, int index)
+{
+ struct nvfx_sreg temp = {
+ .type = type,
+ .index = index,
+ .dst_scale = 0,
+ .abs = 0,
+ .negate = 0,
+ .swz = { 0, 1, 2, 3 },
+ .cc_update = 0,
+ .cc_update_reg = 0,
+ .cc_test = NVFX_COND_TR,
+ .cc_test_reg = 0,
+ .cc_swz = { 0, 1, 2, 3 },
+ };
+ return temp;
+}
+
+static INLINE struct nvfx_sreg
+nvfx_sr_swz(struct nvfx_sreg src, int x, int y, int z, int w)
+{
+ struct nvfx_sreg dst = src;
+
+ dst.swz[NVFX_SWZ_X] = src.swz[x];
+ dst.swz[NVFX_SWZ_Y] = src.swz[y];
+ dst.swz[NVFX_SWZ_Z] = src.swz[z];
+ dst.swz[NVFX_SWZ_W] = src.swz[w];
+ return dst;
+}
+
+static INLINE struct nvfx_sreg
+nvfx_sr_neg(struct nvfx_sreg src)
+{
+ src.negate = !src.negate;
+ return src;
+}
+
+static INLINE struct nvfx_sreg
+nvfx_sr_abs(struct nvfx_sreg src)
+{
+ src.abs = 1;
+ return src;
+}
+
+static INLINE struct nvfx_sreg
+nvfx_sr_scale(struct nvfx_sreg src, int scale)
+{
+ src.dst_scale = scale;
+ return src;
+}
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c
new file mode 100644
index 00000000000..315de492dab
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_state.c
@@ -0,0 +1,658 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+
+#include "draw/draw_context.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nvfx_context.h"
+#include "nvfx_state.h"
+#include "nvfx_tex.h"
+
+static void *
+nvfx_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_blend_state *bso = CALLOC(1, sizeof(*bso));
+ struct nouveau_statebuf_builder sb = sb_init(bso->sb);
+
+ if (cso->rt[0].blend_enable) {
+ sb_method(sb, NV34TCL_BLEND_FUNC_ENABLE, 3);
+ sb_data(sb, 1);
+ sb_data(sb, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) |
+ nvgl_blend_func(cso->rt[0].rgb_src_factor));
+ sb_data(sb, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 |
+ nvgl_blend_func(cso->rt[0].rgb_dst_factor));
+ if(nvfx->screen->base.device->chipset < 0x40) {
+ sb_method(sb, NV34TCL_BLEND_EQUATION, 1);
+ sb_data(sb, nvgl_blend_eqn(cso->rt[0].rgb_func));
+ } else {
+ sb_method(sb, NV40TCL_BLEND_EQUATION, 1);
+ sb_data(sb, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 |
+ nvgl_blend_eqn(cso->rt[0].rgb_func));
+ }
+ } else {
+ sb_method(sb, NV34TCL_BLEND_FUNC_ENABLE, 1);
+ sb_data(sb, 0);
+ }
+
+ sb_method(sb, NV34TCL_COLOR_MASK, 1);
+ sb_data(sb, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) |
+ ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) |
+ ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) |
+ ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0)));
+
+ /* TODO: add NV40 MRT color mask */
+
+ if (cso->logicop_enable) {
+ sb_method(sb, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2);
+ sb_data(sb, 1);
+ sb_data(sb, nvgl_logicop_func(cso->logicop_func));
+ } else {
+ sb_method(sb, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1);
+ sb_data(sb, 0);
+ }
+
+ sb_method(sb, NV34TCL_DITHER_ENABLE, 1);
+ sb_data(sb, cso->dither ? 1 : 0);
+
+ bso->sb_len = sb_len(sb, bso->sb);
+ bso->pipe = *cso;
+ return (void *)bso;
+}
+
+static void
+nvfx_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->blend = hwcso;
+ nvfx->dirty |= NVFX_NEW_BLEND;
+}
+
+static void
+nvfx_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_blend_state *bso = hwcso;
+
+ FREE(bso);
+}
+
+static void *
+nvfx_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_sampler_state *ps;
+
+ ps = MALLOC(sizeof(struct nvfx_sampler_state));
+
+ /* on nv30, we use this as an internal flag */
+ ps->fmt = cso->normalized_coords ? 0 : NV40TCL_TEX_FORMAT_RECT;
+ ps->en = 0;
+ ps->filt = nvfx_tex_filter(cso);
+ ps->wrap = (nvfx_tex_wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) |
+ (nvfx_tex_wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) |
+ (nvfx_tex_wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT) |
+ nvfx_tex_wrap_compare_mode(cso);
+ ps->bcol = nvfx_tex_border_color(cso->border_color);
+
+ if(nvfx->is_nv4x)
+ nv40_sampler_state_init(pipe, ps, cso);
+ else
+ nv30_sampler_state_init(pipe, ps, cso);
+
+ return (void *)ps;
+}
+
+static void
+nvfx_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ nvfx->tex_sampler[unit] = sampler[unit];
+ nvfx->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nvfx->nr_samplers; unit++) {
+ nvfx->tex_sampler[unit] = NULL;
+ nvfx->dirty_samplers |= (1 << unit);
+ }
+
+ nvfx->nr_samplers = nr;
+ nvfx->dirty |= NVFX_NEW_SAMPLER;
+}
+
+static void
+nvfx_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nvfx_set_fragment_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ unsigned unit;
+
+ for (unit = 0; unit < nr; unit++) {
+ pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit],
+ views[unit]);
+ nvfx->dirty_samplers |= (1 << unit);
+ }
+
+ for (unit = nr; unit < nvfx->nr_textures; unit++) {
+ pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit],
+ NULL);
+ nvfx->dirty_samplers |= (1 << unit);
+ }
+
+ nvfx->nr_textures = nr;
+ nvfx->dirty |= NVFX_NEW_SAMPLER;
+}
+
+
+static struct pipe_sampler_view *
+nvfx_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ)
+{
+ struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
+
+ if (view) {
+ *view = *templ;
+ view->reference.count = 1;
+ view->texture = NULL;
+ pipe_resource_reference(&view->texture, texture);
+ view->context = pipe;
+ }
+
+ return view;
+}
+
+
+static void
+nvfx_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+ FREE(view);
+}
+
+static void *
+nvfx_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nvfx_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
+ struct nouveau_statebuf_builder sb = sb_init(rsso->sb);
+
+ /*XXX: ignored:
+ * point_smooth -nohw
+ * multisample
+ */
+
+ sb_method(sb, NV34TCL_SHADE_MODEL, 1);
+ sb_data(sb, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT :
+ NV34TCL_SHADE_MODEL_SMOOTH);
+
+ sb_method(sb, NV34TCL_VERTEX_TWO_SIDE_ENABLE, 1);
+ sb_data(sb, cso->light_twoside);
+
+ sb_method(sb, NV34TCL_LINE_WIDTH, 2);
+ sb_data(sb, (unsigned char)(cso->line_width * 8.0) & 0xff);
+ sb_data(sb, cso->line_smooth ? 1 : 0);
+ sb_method(sb, NV34TCL_LINE_STIPPLE_ENABLE, 2);
+ sb_data(sb, cso->line_stipple_enable ? 1 : 0);
+ sb_data(sb, (cso->line_stipple_pattern << 16) |
+ cso->line_stipple_factor);
+
+ sb_method(sb, NV34TCL_POINT_SIZE, 1);
+ sb_data(sb, fui(cso->point_size));
+
+ sb_method(sb, NV34TCL_POLYGON_MODE_FRONT, 6);
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ sb_data(sb, nvgl_polygon_mode(cso->fill_ccw));
+ sb_data(sb, nvgl_polygon_mode(cso->fill_cw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ sb_data(sb, NV34TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_CW:
+ sb_data(sb, NV34TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_BOTH:
+ sb_data(sb, NV34TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ sb_data(sb, NV34TCL_CULL_FACE_BACK);
+ break;
+ }
+ sb_data(sb, NV34TCL_FRONT_FACE_CCW);
+ } else {
+ sb_data(sb, nvgl_polygon_mode(cso->fill_cw));
+ sb_data(sb, nvgl_polygon_mode(cso->fill_ccw));
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ sb_data(sb, NV34TCL_CULL_FACE_BACK);
+ break;
+ case PIPE_WINDING_CW:
+ sb_data(sb, NV34TCL_CULL_FACE_FRONT);
+ break;
+ case PIPE_WINDING_BOTH:
+ sb_data(sb, NV34TCL_CULL_FACE_FRONT_AND_BACK);
+ break;
+ default:
+ sb_data(sb, NV34TCL_CULL_FACE_BACK);
+ break;
+ }
+ sb_data(sb, NV34TCL_FRONT_FACE_CW);
+ }
+ sb_data(sb, cso->poly_smooth ? 1 : 0);
+ sb_data(sb, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0);
+
+ sb_method(sb, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
+ sb_data(sb, cso->poly_stipple_enable ? 1 : 0);
+
+ sb_method(sb, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT))
+ sb_data(sb, 1);
+ else
+ sb_data(sb, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE))
+ sb_data(sb, 1);
+ else
+ sb_data(sb, 0);
+ if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) ||
+ (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL))
+ sb_data(sb, 1);
+ else
+ sb_data(sb, 0);
+ if (cso->offset_cw || cso->offset_ccw) {
+ sb_method(sb, NV34TCL_POLYGON_OFFSET_FACTOR, 2);
+ sb_data(sb, fui(cso->offset_scale));
+ sb_data(sb, fui(cso->offset_units * 2));
+ }
+
+ sb_method(sb, NV34TCL_POINT_SPRITE, 1);
+ if (cso->point_quad_rasterization) {
+ unsigned psctl = (1 << 0), i;
+
+ for (i = 0; i < 8; i++) {
+ if ((cso->sprite_coord_enable >> i) & 1)
+ psctl |= (1 << (8 + i));
+ }
+
+ sb_data(sb, psctl);
+ } else {
+ sb_data(sb, 0);
+ }
+
+ rsso->pipe = *cso;
+ rsso->sb_len = sb_len(sb, rsso->sb);
+ return (void *)rsso;
+}
+
+static void
+nvfx_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ if(nvfx->rasterizer && hwcso)
+ {
+ if(!nvfx->rasterizer || ((struct nvfx_rasterizer_state*)hwcso)->pipe.scissor
+ != nvfx->rasterizer->pipe.scissor)
+ {
+ nvfx->dirty |= NVFX_NEW_SCISSOR;
+ nvfx->draw_dirty |= NVFX_NEW_SCISSOR;
+ }
+
+ if(((struct nvfx_rasterizer_state*)hwcso)->pipe.poly_stipple_enable
+ != nvfx->rasterizer->pipe.poly_stipple_enable)
+ {
+ nvfx->dirty |= NVFX_NEW_STIPPLE;
+ nvfx->draw_dirty |= NVFX_NEW_STIPPLE;
+ }
+ }
+
+ nvfx->rasterizer = hwcso;
+ nvfx->dirty |= NVFX_NEW_RAST;
+ nvfx->draw_dirty |= NVFX_NEW_RAST;
+}
+
+static void
+nvfx_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_rasterizer_state *rsso = hwcso;
+
+ FREE(rsso);
+}
+
+static void *
+nvfx_depth_stencil_alpha_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nvfx_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
+ struct nouveau_statebuf_builder sb = sb_init(zsaso->sb);
+
+ sb_method(sb, NV34TCL_DEPTH_FUNC, 3);
+ sb_data (sb, nvgl_comparison_op(cso->depth.func));
+ sb_data (sb, cso->depth.writemask ? 1 : 0);
+ sb_data (sb, cso->depth.enabled ? 1 : 0);
+
+ sb_method(sb, NV34TCL_ALPHA_FUNC_ENABLE, 3);
+ sb_data (sb, cso->alpha.enabled ? 1 : 0);
+ sb_data (sb, nvgl_comparison_op(cso->alpha.func));
+ sb_data (sb, float_to_ubyte(cso->alpha.ref_value));
+
+ if (cso->stencil[0].enabled) {
+ sb_method(sb, NV34TCL_STENCIL_FRONT_ENABLE, 3);
+ sb_data (sb, cso->stencil[0].enabled ? 1 : 0);
+ sb_data (sb, cso->stencil[0].writemask);
+ sb_data (sb, nvgl_comparison_op(cso->stencil[0].func));
+ sb_method(sb, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4);
+ sb_data (sb, cso->stencil[0].valuemask);
+ sb_data (sb, nvgl_stencil_op(cso->stencil[0].fail_op));
+ sb_data (sb, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ sb_data (sb, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ } else {
+ sb_method(sb, NV34TCL_STENCIL_FRONT_ENABLE, 1);
+ sb_data (sb, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ sb_method(sb, NV34TCL_STENCIL_BACK_ENABLE, 3);
+ sb_data (sb, cso->stencil[1].enabled ? 1 : 0);
+ sb_data (sb, cso->stencil[1].writemask);
+ sb_data (sb, nvgl_comparison_op(cso->stencil[1].func));
+ sb_method(sb, NV34TCL_STENCIL_BACK_FUNC_MASK, 4);
+ sb_data (sb, cso->stencil[1].valuemask);
+ sb_data (sb, nvgl_stencil_op(cso->stencil[1].fail_op));
+ sb_data (sb, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ sb_data (sb, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ } else {
+ sb_method(sb, NV34TCL_STENCIL_BACK_ENABLE, 1);
+ sb_data (sb, 0);
+ }
+
+ zsaso->pipe = *cso;
+ zsaso->sb_len = sb_len(sb, zsaso->sb);
+ return (void *)zsaso;
+}
+
+static void
+nvfx_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->zsa = hwcso;
+ nvfx->dirty |= NVFX_NEW_ZSA;
+}
+
+static void
+nvfx_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_zsa_state *zsaso = hwcso;
+
+ FREE(zsaso);
+}
+
+static void *
+nvfx_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_vertex_program *vp;
+
+ vp = CALLOC(1, sizeof(struct nvfx_vertex_program));
+ vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe);
+
+ return (void *)vp;
+}
+
+static void
+nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->vertprog = hwcso;
+ nvfx->dirty |= NVFX_NEW_VERTPROG;
+ nvfx->draw_dirty |= NVFX_NEW_VERTPROG;
+}
+
+static void
+nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_vertex_program *vp = hwcso;
+
+ draw_delete_vertex_shader(nvfx->draw, vp->draw);
+ nvfx_vertprog_destroy(nvfx, vp);
+ FREE((void*)vp->pipe.tokens);
+ FREE(vp);
+}
+
+static void *
+nvfx_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nvfx_fragment_program *fp;
+
+ fp = CALLOC(1, sizeof(struct nvfx_fragment_program));
+ fp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ tgsi_scan_shader(fp->pipe.tokens, &fp->info);
+
+ return (void *)fp;
+}
+
+static void
+nvfx_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->fragprog = hwcso;
+ nvfx->dirty |= NVFX_NEW_FRAGPROG;
+}
+
+static void
+nvfx_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_fragment_program *fp = hwcso;
+
+ nvfx_fragprog_destroy(nvfx, fp);
+ FREE((void*)fp->pipe.tokens);
+ FREE(fp);
+}
+
+static void
+nvfx_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->blend_colour = *bcol;
+ nvfx->dirty |= NVFX_NEW_BCOL;
+}
+
+static void
+nvfx_set_stencil_ref(struct pipe_context *pipe,
+ const struct pipe_stencil_ref *sr)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->stencil_ref = *sr;
+ nvfx->dirty |= NVFX_NEW_SR;
+}
+
+static void
+nvfx_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->clip = *clip;
+ nvfx->dirty |= NVFX_NEW_UCP;
+ nvfx->draw_dirty |= NVFX_NEW_UCP;
+}
+
+static void
+nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ struct pipe_resource *buf )
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->constbuf[shader] = buf;
+ nvfx->constbuf_nr[shader] = buf->width0 / (4 * sizeof(float));
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ nvfx->dirty |= NVFX_NEW_VERTCONST;
+ } else
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ nvfx->dirty |= NVFX_NEW_FRAGCONST;
+ }
+}
+
+static void
+nvfx_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->framebuffer = *fb;
+ nvfx->dirty |= NVFX_NEW_FB;
+}
+
+static void
+nvfx_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ memcpy(nvfx->stipple, stipple->stipple, 4 * 32);
+ nvfx->dirty |= NVFX_NEW_STIPPLE;
+}
+
+static void
+nvfx_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->scissor = *s;
+ nvfx->dirty |= NVFX_NEW_SCISSOR;
+}
+
+static void
+nvfx_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->viewport = *vpt;
+ nvfx->dirty |= NVFX_NEW_VIEWPORT;
+ nvfx->draw_dirty |= NVFX_NEW_VIEWPORT;
+}
+
+static void
+nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ memcpy(nvfx->vtxbuf, vb, sizeof(*vb) * count);
+ nvfx->vtxbuf_nr = count;
+
+ nvfx->dirty |= NVFX_NEW_ARRAYS;
+ nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
+}
+
+static void *
+nvfx_vtxelts_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
+
+ assert(num_elements < 16); /* not doing fallbacks yet */
+ cso->num_elements = num_elements;
+ memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+/* nvfx_vtxelt_construct(cso);*/
+
+ return (void *)cso;
+}
+
+static void
+nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+
+ nvfx->vtxelt = hwcso;
+ nvfx->dirty |= NVFX_NEW_ARRAYS;
+ /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/
+}
+
+void
+nvfx_init_state_functions(struct nvfx_context *nvfx)
+{
+ nvfx->pipe.create_blend_state = nvfx_blend_state_create;
+ nvfx->pipe.bind_blend_state = nvfx_blend_state_bind;
+ nvfx->pipe.delete_blend_state = nvfx_blend_state_delete;
+
+ nvfx->pipe.create_sampler_state = nvfx_sampler_state_create;
+ nvfx->pipe.bind_fragment_sampler_states = nvfx_sampler_state_bind;
+ nvfx->pipe.delete_sampler_state = nvfx_sampler_state_delete;
+ nvfx->pipe.set_fragment_sampler_views = nvfx_set_fragment_sampler_views;
+ nvfx->pipe.create_sampler_view = nvfx_create_sampler_view;
+ nvfx->pipe.sampler_view_destroy = nvfx_sampler_view_destroy;
+
+ nvfx->pipe.create_rasterizer_state = nvfx_rasterizer_state_create;
+ nvfx->pipe.bind_rasterizer_state = nvfx_rasterizer_state_bind;
+ nvfx->pipe.delete_rasterizer_state = nvfx_rasterizer_state_delete;
+
+ nvfx->pipe.create_depth_stencil_alpha_state =
+ nvfx_depth_stencil_alpha_state_create;
+ nvfx->pipe.bind_depth_stencil_alpha_state =
+ nvfx_depth_stencil_alpha_state_bind;
+ nvfx->pipe.delete_depth_stencil_alpha_state =
+ nvfx_depth_stencil_alpha_state_delete;
+
+ nvfx->pipe.create_vs_state = nvfx_vp_state_create;
+ nvfx->pipe.bind_vs_state = nvfx_vp_state_bind;
+ nvfx->pipe.delete_vs_state = nvfx_vp_state_delete;
+
+ nvfx->pipe.create_fs_state = nvfx_fp_state_create;
+ nvfx->pipe.bind_fs_state = nvfx_fp_state_bind;
+ nvfx->pipe.delete_fs_state = nvfx_fp_state_delete;
+
+ nvfx->pipe.set_blend_color = nvfx_set_blend_color;
+ nvfx->pipe.set_stencil_ref = nvfx_set_stencil_ref;
+ nvfx->pipe.set_clip_state = nvfx_set_clip_state;
+ nvfx->pipe.set_constant_buffer = nvfx_set_constant_buffer;
+ nvfx->pipe.set_framebuffer_state = nvfx_set_framebuffer_state;
+ nvfx->pipe.set_polygon_stipple = nvfx_set_polygon_stipple;
+ nvfx->pipe.set_scissor_state = nvfx_set_scissor_state;
+ nvfx->pipe.set_viewport_state = nvfx_set_viewport_state;
+
+ nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
+ nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
+ nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
+
+ nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h
new file mode 100644
index 00000000000..676d4fd34bb
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_state.h
@@ -0,0 +1,78 @@
+#ifndef __NVFX_STATE_H__
+#define __NVFX_STATE_H__
+
+#include "pipe/p_state.h"
+#include "pipe/p_video_state.h"
+#include "tgsi/tgsi_scan.h"
+#include "nouveau/nouveau_statebuf.h"
+
+struct nvfx_vertex_program_exec {
+ uint32_t data[4];
+ boolean has_branch_offset;
+ int const_index;
+};
+
+struct nvfx_vertex_program_data {
+ int index; /* immediates == -1 */
+ float value[4];
+};
+
+struct nvfx_vertex_program {
+ struct pipe_shader_state pipe;
+
+ struct draw_vertex_shader *draw;
+
+ boolean translated;
+
+ struct pipe_clip_state ucp;
+
+ struct nvfx_vertex_program_exec *insns;
+ unsigned nr_insns;
+ struct nvfx_vertex_program_data *consts;
+ unsigned nr_consts;
+
+ struct nouveau_resource *exec;
+ unsigned exec_start;
+ struct nouveau_resource *data;
+ unsigned data_start;
+ unsigned data_start_min;
+
+ uint32_t ir;
+ uint32_t or;
+ uint32_t clip_ctrl;
+};
+
+struct nvfx_fragment_program_data {
+ unsigned offset;
+ unsigned index;
+};
+
+struct nvfx_fragment_program_bo {
+ struct nvfx_fragment_program_bo* next;
+ struct nouveau_bo* bo;
+ char insn[] __attribute__((aligned(16)));
+};
+
+struct nvfx_fragment_program {
+ struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
+
+ boolean translated;
+ unsigned samplers;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct nvfx_fragment_program_data *consts;
+ unsigned nr_consts;
+
+ uint32_t fp_control;
+
+ unsigned bo_prog_idx;
+ unsigned prog_size;
+ unsigned progs_per_bo;
+ struct nvfx_fragment_program_bo* fpbo;
+};
+
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_state_blend.c b/src/gallium/drivers/nvfx/nvfx_state_blend.c
new file mode 100644
index 00000000000..fe34e98364c
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_state_blend.c
@@ -0,0 +1,22 @@
+#include "nvfx_context.h"
+
+void
+nvfx_state_blend_validate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len);
+}
+
+void
+nvfx_state_blend_colour_validate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct pipe_blend_color *bcol = &nvfx->blend_colour;
+
+ WAIT_RING(chan, 2);
+ OUT_RING(chan, RING_3D(NV34TCL_BLEND_COLOR, 1));
+ OUT_RING(chan, ((float_to_ubyte(bcol->color[3]) << 24) |
+ (float_to_ubyte(bcol->color[0]) << 16) |
+ (float_to_ubyte(bcol->color[1]) << 8) |
+ (float_to_ubyte(bcol->color[2]) << 0)));
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c
new file mode 100644
index 00000000000..f91ae19ecd3
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c
@@ -0,0 +1,180 @@
+#include "nvfx_context.h"
+#include "nvfx_state.h"
+#include "draw/draw_context.h"
+
+static boolean
+nvfx_state_validate_common(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ unsigned dirty = nvfx->dirty;
+
+ if(nvfx != nvfx->screen->cur_ctx)
+ dirty = ~0;
+
+ if(nvfx->render_mode == HW)
+ {
+ if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_VERTCONST | NVFX_NEW_UCP))
+ {
+ if(!nvfx_vertprog_validate(nvfx))
+ return FALSE;
+ }
+
+ if(dirty & (NVFX_NEW_ARRAYS))
+ {
+ if(!nvfx_vbo_validate(nvfx))
+ return FALSE;
+ }
+ }
+ else
+ {
+ /* TODO: this looks a bit misdesigned */
+ if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP))
+ nvfx_vertprog_validate(nvfx);
+
+ if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG))
+ nvfx_vtxfmt_validate(nvfx);
+ }
+
+ if(dirty & NVFX_NEW_FB)
+ nvfx_state_framebuffer_validate(nvfx);
+
+ if(dirty & NVFX_NEW_RAST)
+ sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len);
+
+ if(dirty & NVFX_NEW_SCISSOR)
+ nvfx_state_scissor_validate(nvfx);
+
+ if(dirty & NVFX_NEW_STIPPLE)
+ nvfx_state_stipple_validate(nvfx);
+
+ if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST))
+ nvfx_fragprog_validate(nvfx);
+
+ if(dirty & NVFX_NEW_SAMPLER)
+ nvfx_fragtex_validate(nvfx);
+
+ if(dirty & NVFX_NEW_BLEND)
+ sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len);
+
+ if(dirty & NVFX_NEW_BCOL)
+ nvfx_state_blend_colour_validate(nvfx);
+
+ if(dirty & NVFX_NEW_ZSA)
+ sb_emit(chan, nvfx->zsa->sb, nvfx->zsa->sb_len);
+
+ if(dirty & NVFX_NEW_SR)
+ nvfx_state_sr_validate(nvfx);
+
+/* Having this depend on FB looks wrong, but it seems
+ necessary to make this work on nv3x
+ TODO: find the right fix
+*/
+ if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_FB))
+ nvfx_state_viewport_validate(nvfx);
+
+ /* TODO: could nv30 need this or something similar too? */
+ if((dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SAMPLER)) && nvfx->is_nv4x) {
+ WAIT_RING(chan, 4);
+ OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
+ OUT_RING(chan, 2);
+ OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
+ OUT_RING(chan, 1);
+ }
+ nvfx->dirty = 0;
+ return TRUE;
+}
+
+void
+nvfx_state_emit(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ /* we need to ensure there is enough space to output relocations in one go */
+ unsigned max_relocs = 0
+ + 16 /* vertex buffers, incl. dma flag */
+ + 2 /* index buffer plus format+dma flag */
+ + 2 * 5 /* 4 cbufs + zsbuf, plus dma objects */
+ + 2 * 16 /* fragment textures plus format+dma flag */
+ + 2 * 4 /* vertex textures plus format+dma flag */
+ + 1 /* fragprog incl dma flag */
+ ;
+ MARK_RING(chan, max_relocs * 2, max_relocs * 2);
+ nvfx_state_relocate(nvfx);
+}
+
+void
+nvfx_state_relocate(struct nvfx_context *nvfx)
+{
+ nvfx_framebuffer_relocate(nvfx);
+ nvfx_fragtex_relocate(nvfx);
+ nvfx_fragprog_relocate(nvfx);
+ if (nvfx->render_mode == HW)
+ nvfx_vbo_relocate(nvfx);
+}
+
+boolean
+nvfx_state_validate(struct nvfx_context *nvfx)
+{
+ boolean was_sw = nvfx->fallback_swtnl ? TRUE : FALSE;
+
+ if (nvfx->render_mode != HW) {
+ /* Don't even bother trying to go back to hw if none
+ * of the states that caused swtnl previously have changed.
+ */
+ if ((nvfx->fallback_swtnl & nvfx->dirty)
+ != nvfx->fallback_swtnl)
+ return FALSE;
+
+ /* Attempt to go to hwtnl again */
+ nvfx->dirty |= (NVFX_NEW_VIEWPORT |
+ NVFX_NEW_VERTPROG |
+ NVFX_NEW_ARRAYS);
+ nvfx->render_mode = HW;
+ }
+
+ if(!nvfx_state_validate_common(nvfx))
+ return FALSE;
+
+ if (was_sw)
+ NOUVEAU_ERR("swtnl->hw\n");
+
+ return TRUE;
+}
+
+boolean
+nvfx_state_validate_swtnl(struct nvfx_context *nvfx)
+{
+ struct draw_context *draw = nvfx->draw;
+
+ /* Setup for swtnl */
+ if (nvfx->render_mode == HW) {
+ NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl);
+ nvfx->pipe.flush(&nvfx->pipe, 0, NULL);
+ nvfx->dirty |= (NVFX_NEW_VIEWPORT |
+ NVFX_NEW_VERTPROG |
+ NVFX_NEW_ARRAYS);
+ nvfx->render_mode = SWTNL;
+ }
+
+ if (nvfx->draw_dirty & NVFX_NEW_VERTPROG)
+ draw_bind_vertex_shader(draw, nvfx->vertprog->draw);
+
+ if (nvfx->draw_dirty & NVFX_NEW_RAST)
+ draw_set_rasterizer_state(draw, &nvfx->rasterizer->pipe,
+ nvfx->rasterizer);
+
+ if (nvfx->draw_dirty & NVFX_NEW_UCP)
+ draw_set_clip_state(draw, &nvfx->clip);
+
+ if (nvfx->draw_dirty & NVFX_NEW_VIEWPORT)
+ draw_set_viewport_state(draw, &nvfx->viewport);
+
+ if (nvfx->draw_dirty & NVFX_NEW_ARRAYS) {
+ draw_set_vertex_buffers(draw, nvfx->vtxbuf_nr, nvfx->vtxbuf);
+ draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe);
+ }
+
+ nvfx_state_validate_common(nvfx);
+
+ nvfx->draw_dirty = 0;
+ return TRUE;
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c
new file mode 100644
index 00000000000..360e569f77c
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c
@@ -0,0 +1,250 @@
+#include "nvfx_context.h"
+#include "nvfx_resource.h"
+#include "nouveau/nouveau_util.h"
+
+
+
+void
+nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
+{
+ struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
+ struct nouveau_channel *chan = nvfx->screen->base.channel;
+ uint32_t rt_enable = 0, rt_format = 0;
+ int i, colour_format = 0, zeta_format = 0;
+ int depth_only = 0;
+ unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
+ unsigned w = fb->width;
+ unsigned h = fb->height;
+ int colour_bits = 32, zeta_bits = 32;
+
+ if(!nvfx->is_nv4x)
+ assert(fb->nr_cbufs <= 2);
+ else
+ assert(fb->nr_cbufs <= 4);
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ if (colour_format)
+ assert(colour_format == fb->cbufs[i]->format);
+ else
+ colour_format = fb->cbufs[i]->format;
+
+ rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i);
+ nvfx->hw_rt[i].bo = nvfx_surface_buffer(fb->cbufs[i]);
+ nvfx->hw_rt[i].offset = fb->cbufs[i]->offset;
+ nvfx->hw_rt[i].pitch = ((struct nv04_surface *)fb->cbufs[i])->pitch;
+ }
+ for(; i < 4; ++i)
+ nvfx->hw_rt[i].bo = 0;
+
+ if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 |
+ NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3))
+ rt_enable |= NV34TCL_RT_ENABLE_MRT;
+
+ if (fb->zsbuf) {
+ zeta_format = fb->zsbuf->format;
+ nvfx->hw_zeta.bo = nvfx_surface_buffer(fb->zsbuf);
+ nvfx->hw_zeta.offset = fb->zsbuf->offset;
+ nvfx->hw_zeta.pitch = ((struct nv04_surface *)fb->zsbuf)->pitch;
+ }
+ else
+ nvfx->hw_zeta.bo = 0;
+
+ if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0 | NV34TCL_RT_ENABLE_COLOR1 |
+ NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) {
+ /* Render to at least a colour buffer */
+ if (!(fb->cbufs[0]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) {
+ assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+ for (i = 1; i < fb->nr_cbufs; i++)
+ assert(!(fb->cbufs[i]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR));
+
+ rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
+ (log2i(fb->cbufs[0]->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
+ (log2i(fb->cbufs[0]->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+ }
+ else
+ rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
+ } else if (fb->zsbuf) {
+ depth_only = 1;
+
+ /* Render to depth buffer only */
+ if (!(fb->zsbuf->texture->usage & NVFX_RESOURCE_FLAG_LINEAR)) {
+ assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+
+ rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
+ (log2i(fb->zsbuf->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
+ (log2i(fb->zsbuf->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+ }
+ else
+ rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
+ } else {
+ return;
+ }
+
+ switch (colour_format) {
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8;
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case 0:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
+ break;
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
+ colour_bits = 16;
+ break;
+ default:
+ assert(0);
+ }
+
+ switch (zeta_format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
+ zeta_bits = 16;
+ break;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case 0:
+ rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
+ break;
+ default:
+ assert(0);
+ }
+
+ if ((!nvfx->is_nv4x) && colour_bits > zeta_bits) {
+ /* TODO: does this limitation really exist?
+ TODO: can it be worked around somehow? */
+ assert(0);
+ }
+
+ if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0)
+ || ((!nvfx->is_nv4x) && depth_only)) {
+ struct nvfx_render_target *rt0 = (depth_only ? &nvfx->hw_zeta : &nvfx->hw_rt[0]);
+ uint32_t pitch = rt0->pitch;
+
+ if(!nvfx->is_nv4x)
+ {
+ if (nvfx->hw_zeta.bo) {
+ pitch |= (nvfx->hw_zeta.pitch << 16);
+ } else {
+ pitch |= (pitch << 16);
+ }
+ }
+
+ OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 1));
+ OUT_RELOC(chan, rt0->bo, 0,
+ rt_flags | NOUVEAU_BO_OR,
+ chan->vram->handle, chan->gart->handle);
+ OUT_RING(chan, RING_3D(NV34TCL_COLOR0_PITCH, 2));
+ OUT_RING(chan, pitch);
+ OUT_RELOC(chan, rt0->bo,
+ rt0->offset, rt_flags | NOUVEAU_BO_LOW,
+ 0, 0);
+ }
+
+ if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) {
+ OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR1, 1));
+ OUT_RELOC(chan, nvfx->hw_rt[1].bo, 0,
+ rt_flags | NOUVEAU_BO_OR,
+ chan->vram->handle, chan->gart->handle);
+ OUT_RING(chan, RING_3D(NV34TCL_COLOR1_OFFSET, 2));
+ OUT_RELOC(chan, nvfx->hw_rt[1].bo,
+ nvfx->hw_rt[1].offset, rt_flags | NOUVEAU_BO_LOW,
+ 0, 0);
+ OUT_RING(chan, nvfx->hw_rt[1].pitch);
+ }
+
+ if(nvfx->is_nv4x)
+ {
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
+ OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR2, 1));
+ OUT_RELOC(chan, nvfx->hw_rt[2].bo, 0,
+ rt_flags | NOUVEAU_BO_OR,
+ chan->vram->handle, chan->gart->handle);
+ OUT_RING(chan, RING_3D(NV40TCL_COLOR2_OFFSET, 1));
+ OUT_RELOC(chan, nvfx->hw_rt[2].bo,
+ nvfx->hw_rt[2].offset, rt_flags | NOUVEAU_BO_LOW,
+ 0, 0);
+ OUT_RING(chan, RING_3D(NV40TCL_COLOR2_PITCH, 1));
+ OUT_RING(chan, nvfx->hw_rt[2].pitch);
+ }
+
+ if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
+ OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR3, 1));
+ OUT_RELOC(chan, nvfx->hw_rt[3].bo, 0,
+ rt_flags | NOUVEAU_BO_OR,
+ chan->vram->handle, chan->gart->handle);
+ OUT_RING(chan, RING_3D(NV40TCL_COLOR3_OFFSET, 1));
+ OUT_RELOC(chan, nvfx->hw_rt[3].bo,
+ nvfx->hw_rt[3].offset, rt_flags | NOUVEAU_BO_LOW,
+ 0, 0);
+ OUT_RING(chan, RING_3D(NV40TCL_COLOR3_PITCH, 1));
+ OUT_RING(chan, nvfx->hw_rt[3].pitch);
+ }
+ }
+
+ if (zeta_format) {
+ OUT_RING(chan, RING_3D(NV34TCL_DMA_ZETA, 1));
+ OUT_RELOC(chan, nvfx->hw_zeta.bo, 0,
+ rt_flags | NOUVEAU_BO_OR,
+ chan->vram->handle, chan->gart->handle);
+ OUT_RING(chan, RING_3D(NV34TCL_ZETA_OFFSET, 1));
+ /* TODO: reverse engineer LMA */
+ OUT_RELOC(chan, nvfx->hw_zeta.bo,
+ nvfx->hw_zeta.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0);
+ if(nvfx->is_nv4x) {
+ OUT_RING(chan, RING_3D(NV40TCL_ZETA_PITCH, 1));
+ OUT_RING(chan, nvfx->hw_zeta.pitch);
+ }
+ }
+
+ OUT_RING(chan, RING_3D(NV34TCL_RT_ENABLE, 1));
+ OUT_RING(chan, rt_enable);
+ OUT_RING(chan, RING_3D(NV34TCL_RT_HORIZ, 3));
+ OUT_RING(chan, (w << 16) | 0);
+ OUT_RING(chan, (h << 16) | 0);
+ OUT_RING(chan, rt_format);
+ OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_HORIZ, 2));
+ OUT_RING(chan, (w << 16) | 0);
+ OUT_RING(chan, (h << 16) | 0);
+ OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2));
+ OUT_RING(chan, ((w - 1) << 16) | 0);
+ OUT_RING(chan, ((h - 1) << 16) | 0);
+ OUT_RING(chan, RING_3D(0x1d88, 1));
+ OUT_RING(chan, (1 << 12) | h);
+
+ if(!nvfx->is_nv4x) {
+ /* Wonder why this is needed, context should all be set to zero on init */
+ /* TODO: we can most likely remove this, after putting it in context init */
+ OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TX_ORIGIN, 1));
+ OUT_RING(chan, 0);
+ }
+}
+
+void
+nvfx_framebuffer_relocate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel *chan = nvfx->screen->base.channel;
+ unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
+ rt_flags |= NOUVEAU_BO_DUMMY;
+ MARK_RING(chan, 20, 20);
+
+#define DO_(var, pfx, name) \
+ if(var.bo) { \
+ OUT_RELOC(chan, var.bo, RING_3D(pfx##TCL_DMA_##name, 1), rt_flags, 0, 0); \
+ OUT_RELOC(chan, var.bo, 0, \
+ rt_flags | NOUVEAU_BO_OR, \
+ chan->vram->handle, chan->gart->handle); \
+ OUT_RELOC(chan, var.bo, RING_3D(pfx##TCL_##name##_OFFSET, 1), rt_flags, 0, 0); \
+ OUT_RELOC(chan, var.bo, \
+ var.offset, rt_flags | NOUVEAU_BO_LOW, \
+ 0, 0); \
+ }
+
+#define DO(pfx, num) DO_(nvfx->hw_rt[num], pfx, COLOR##num)
+ DO(NV34, 0);
+ DO(NV34, 1);
+ DO(NV40, 2);
+ DO(NV40, 3);
+
+ DO_(nvfx->hw_zeta, NV34, ZETA);
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c b/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c
new file mode 100644
index 00000000000..7f14ae85d5a
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c
@@ -0,0 +1,9 @@
+#include "nvfx_context.h"
+
+void
+nvfx_state_rasterizer_validate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len);
+}
+
diff --git a/src/gallium/drivers/nvfx/nvfx_state_scissor.c b/src/gallium/drivers/nvfx/nvfx_state_scissor.c
new file mode 100644
index 00000000000..9077266120a
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_state_scissor.c
@@ -0,0 +1,23 @@
+#include "nvfx_context.h"
+
+void
+nvfx_state_scissor_validate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe;
+ struct pipe_scissor_state *s = &nvfx->scissor;
+
+ if ((rast->scissor == 0 && nvfx->state.scissor_enabled == 0))
+ return;
+ nvfx->state.scissor_enabled = rast->scissor;
+
+ WAIT_RING(chan, 3);
+ OUT_RING(chan, RING_3D(NV34TCL_SCISSOR_HORIZ, 2));
+ if (nvfx->state.scissor_enabled) {
+ OUT_RING(chan, ((s->maxx - s->minx) << 16) | s->minx);
+ OUT_RING(chan, ((s->maxy - s->miny) << 16) | s->miny);
+ } else {
+ OUT_RING(chan, 4096 << 16);
+ OUT_RING(chan, 4096 << 16);
+ }
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_state_stipple.c b/src/gallium/drivers/nvfx/nvfx_state_stipple.c
new file mode 100644
index 00000000000..4da968f093f
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_state_stipple.c
@@ -0,0 +1,26 @@
+#include "nvfx_context.h"
+
+void
+nvfx_state_stipple_validate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe;
+
+ if ((rast->poly_stipple_enable == 0 && nvfx->state.stipple_enabled == 0))
+ return;
+
+ if (rast->poly_stipple_enable) {
+ unsigned i;
+
+ WAIT_RING(chan, 35);
+ OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_ENABLE, 1));
+ OUT_RING(chan, 1);
+ OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32));
+ for (i = 0; i < 32; i++)
+ OUT_RING(chan, nvfx->stipple[i]);
+ } else {
+ WAIT_RING(chan, 2);
+ OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_ENABLE, 1));
+ OUT_RING(chan, 0);
+ }
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_state_viewport.c b/src/gallium/drivers/nvfx/nvfx_state_viewport.c
new file mode 100644
index 00000000000..e983b16f321
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_state_viewport.c
@@ -0,0 +1,35 @@
+#include "nvfx_context.h"
+
+void
+nvfx_state_viewport_validate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct pipe_viewport_state *vpt = &nvfx->viewport;
+
+ WAIT_RING(chan, 11);
+ if(nvfx->render_mode == HW) {
+ OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TRANSLATE_X, 8));
+ OUT_RINGf(chan, vpt->translate[0]);
+ OUT_RINGf(chan, vpt->translate[1]);
+ OUT_RINGf(chan, vpt->translate[2]);
+ OUT_RINGf(chan, vpt->translate[3]);
+ OUT_RINGf(chan, vpt->scale[0]);
+ OUT_RINGf(chan, vpt->scale[1]);
+ OUT_RINGf(chan, vpt->scale[2]);
+ OUT_RINGf(chan, vpt->scale[3]);
+ OUT_RING(chan, RING_3D(0x1d78, 1));
+ OUT_RING(chan, 1);
+ } else {
+ OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TRANSLATE_X, 8));
+ OUT_RINGf(chan, 0.0f);
+ OUT_RINGf(chan, 0.0f);
+ OUT_RINGf(chan, 0.0f);
+ OUT_RINGf(chan, 0.0f);
+ OUT_RINGf(chan, 1.0f);
+ OUT_RINGf(chan, 1.0f);
+ OUT_RINGf(chan, 1.0f);
+ OUT_RINGf(chan, 1.0f);
+ OUT_RING(chan, RING_3D(0x1d78, 1));
+ OUT_RING(chan, nvfx->is_nv4x ? 0x110 : 1);
+ }
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_state_zsa.c b/src/gallium/drivers/nvfx/nvfx_state_zsa.c
new file mode 100644
index 00000000000..608605d32bd
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_state_zsa.c
@@ -0,0 +1,21 @@
+#include "nvfx_context.h"
+
+void
+nvfx_state_zsa_validate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ sb_emit(chan, nvfx->zsa->sb, nvfx->zsa->sb_len);
+}
+
+void
+nvfx_state_sr_validate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct pipe_stencil_ref *sr = &nvfx->stencil_ref;
+
+ WAIT_RING(chan, 4);
+ OUT_RING(chan, RING_3D(NV34TCL_STENCIL_FRONT_FUNC_REF, 1));
+ OUT_RING(chan, sr->ref_value[0]);
+ OUT_RING(chan, RING_3D(NV34TCL_STENCIL_BACK_FUNC_REF, 1));
+ OUT_RING(chan, sr->ref_value[1]);
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c
new file mode 100644
index 00000000000..2e115650aeb
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_surface.c
@@ -0,0 +1,61 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nvfx_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+
+static void
+nvfx_surface_copy(struct pipe_context *pipe,
+ struct pipe_surface *dest, unsigned destx, unsigned desty,
+ struct pipe_surface *src, unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nv04_surface_2d *eng2d = nvfx->screen->eng2d;
+
+ eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height);
+}
+
+static void
+nvfx_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
+ unsigned destx, unsigned desty, unsigned width,
+ unsigned height, unsigned value)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nv04_surface_2d *eng2d = nvfx->screen->eng2d;
+
+ eng2d->fill(eng2d, dest, destx, desty, width, height, value);
+}
+
+void
+nvfx_init_surface_functions(struct nvfx_context *nvfx)
+{
+ nvfx->pipe.surface_copy = nvfx_surface_copy;
+ nvfx->pipe.surface_fill = nvfx_surface_fill;
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_tex.h b/src/gallium/drivers/nvfx/nvfx_tex.h
new file mode 100644
index 00000000000..69187a79e79
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_tex.h
@@ -0,0 +1,133 @@
+#ifndef NVFX_TEX_H_
+#define NVFX_TEX_H_
+
+static inline unsigned
+nvfx_tex_wrap_mode(unsigned wrap) {
+ unsigned ret;
+
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ ret = NV34TCL_TX_WRAP_S_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_CLAMP:
+ ret = NV34TCL_TX_WRAP_S_CLAMP;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER;
+ break;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP;
+ break;
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ ret = NV34TCL_TX_WRAP_S_REPEAT;
+ break;
+ }
+
+ return ret >> NV34TCL_TX_WRAP_S_SHIFT;
+}
+
+static inline unsigned
+nvfx_tex_wrap_compare_mode(const struct pipe_sampler_state* cso)
+{
+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ switch (cso->compare_func) {
+ case PIPE_FUNC_NEVER:
+ return NV34TCL_TX_WRAP_RCOMP_NEVER;
+ case PIPE_FUNC_GREATER:
+ return NV34TCL_TX_WRAP_RCOMP_GREATER;
+ case PIPE_FUNC_EQUAL:
+ return NV34TCL_TX_WRAP_RCOMP_EQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return NV34TCL_TX_WRAP_RCOMP_GEQUAL;
+ case PIPE_FUNC_LESS:
+ return NV34TCL_TX_WRAP_RCOMP_LESS;
+ case PIPE_FUNC_NOTEQUAL:
+ return NV34TCL_TX_WRAP_RCOMP_NOTEQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return NV34TCL_TX_WRAP_RCOMP_LEQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return NV34TCL_TX_WRAP_RCOMP_ALWAYS;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+static inline unsigned nvfx_tex_filter(const struct pipe_sampler_state* cso)
+{
+ unsigned filter = 0;
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR;
+ break;
+ }
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST;
+ break;
+ }
+ break;
+ }
+ return filter;
+}
+
+static inline unsigned nvfx_tex_border_color(const float* border_color)
+{
+ return ((float_to_ubyte(border_color[3]) << 24) |
+ (float_to_ubyte(border_color[0]) << 16) |
+ (float_to_ubyte(border_color[1]) << 8) |
+ (float_to_ubyte(border_color[2]) << 0));
+}
+
+struct nvfx_sampler_state {
+ uint32_t fmt;
+ uint32_t wrap;
+ uint32_t en;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+#endif /* NVFX_TEX_H_ */
diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c
new file mode 100644
index 00000000000..b2ef27cf579
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_transfer.c
@@ -0,0 +1,205 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "nouveau/nouveau_winsys.h"
+#include "nvfx_context.h"
+#include "nvfx_screen.h"
+#include "nvfx_state.h"
+#include "nvfx_resource.h"
+#include "nvfx_transfer.h"
+
+struct nvfx_transfer {
+ struct pipe_transfer base;
+ struct pipe_surface *surface;
+ boolean direct;
+};
+
+static void
+nvfx_compatible_transfer_tex(struct pipe_resource *pt, unsigned width, unsigned height,
+ unsigned bind,
+ struct pipe_resource *template)
+{
+ memset(template, 0, sizeof(struct pipe_resource));
+ template->target = pt->target;
+ template->format = pt->format;
+ template->width0 = width;
+ template->height0 = height;
+ template->depth0 = 1;
+ template->last_level = 0;
+ template->nr_samples = pt->nr_samples;
+ template->bind = bind;
+ template->usage = PIPE_USAGE_DYNAMIC;
+ template->flags = NVFX_RESOURCE_FLAG_LINEAR;
+}
+
+
+static unsigned nvfx_transfer_bind_flags( unsigned transfer_usage )
+{
+ unsigned bind = 0;
+
+ if (transfer_usage & PIPE_TRANSFER_WRITE)
+ bind |= PIPE_BIND_BLIT_SOURCE;
+
+ if (transfer_usage & PIPE_TRANSFER_READ)
+ bind |= PIPE_BIND_BLIT_DESTINATION;
+
+ return bind;
+}
+
+struct pipe_transfer *
+nvfx_miptree_transfer_new(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box)
+{
+ struct pipe_screen *pscreen = pipe->screen;
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
+ struct nvfx_transfer *tx;
+ struct pipe_resource tx_tex_template, *tx_tex;
+ static int no_transfer = -1;
+ unsigned bind = nvfx_transfer_bind_flags(usage);
+ if(no_transfer < 0)
+ no_transfer = debug_get_bool_option("NOUVEAU_NO_TRANSFER", FALSE);
+
+
+ tx = CALLOC_STRUCT(nvfx_transfer);
+ if (!tx)
+ return NULL;
+
+ /* Don't handle 3D transfers yet.
+ */
+ assert(box->depth == 1);
+
+ pipe_resource_reference(&tx->base.resource, pt);
+ tx->base.sr = sr;
+ tx->base.usage = usage;
+ tx->base.box = *box;
+ tx->base.stride = mt->level[sr.level].pitch;
+
+ /* Direct access to texture */
+ if ((pt->usage == PIPE_USAGE_DYNAMIC ||
+ no_transfer) &&
+ pt->flags & NVFX_RESOURCE_FLAG_LINEAR)
+ {
+ tx->direct = true;
+
+ /* XXX: just call the internal nvfx function.
+ */
+ tx->surface = pscreen->get_tex_surface(pscreen, pt,
+ sr.face, sr.level,
+ box->z,
+ bind);
+ return &tx->base;
+ }
+
+ tx->direct = false;
+
+ nvfx_compatible_transfer_tex(pt, box->width, box->height, bind, &tx_tex_template);
+
+ tx_tex = pscreen->resource_create(pscreen, &tx_tex_template);
+ if (!tx_tex)
+ {
+ FREE(tx);
+ return NULL;
+ }
+
+ tx->base.stride = ((struct nvfx_miptree*)tx_tex)->level[0].pitch;
+
+ tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
+ 0, 0, 0,
+ bind);
+
+ pipe_resource_reference(&tx_tex, NULL);
+
+ if (!tx->surface)
+ {
+ pipe_surface_reference(&tx->surface, NULL);
+ FREE(tx);
+ return NULL;
+ }
+
+ if (usage & PIPE_TRANSFER_READ) {
+ struct nvfx_screen *nvscreen = nvfx_screen(pscreen);
+ struct pipe_surface *src;
+
+ src = pscreen->get_tex_surface(pscreen, pt,
+ sr.face, sr.level, box->z,
+ PIPE_BIND_BLIT_SOURCE);
+
+ /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
+ /* TODO: Check if SIFM can un-swizzle */
+ nvscreen->eng2d->copy(nvscreen->eng2d,
+ tx->surface, 0, 0,
+ src,
+ box->x, box->y,
+ box->width, box->height);
+
+ pipe_surface_reference(&src, NULL);
+ }
+
+ return &tx->base;
+}
+
+void
+nvfx_miptree_transfer_del(struct pipe_context *pipe,
+ struct pipe_transfer *ptx)
+{
+ struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx;
+
+ if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) {
+ struct pipe_screen *pscreen = pipe->screen;
+ struct nvfx_screen *nvscreen = nvfx_screen(pscreen);
+ struct pipe_surface *dst;
+
+ dst = pscreen->get_tex_surface(pscreen,
+ ptx->resource,
+ ptx->sr.face,
+ ptx->sr.level,
+ ptx->box.z,
+ PIPE_BIND_BLIT_DESTINATION);
+
+ /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
+ nvscreen->eng2d->copy(nvscreen->eng2d,
+ dst, ptx->box.x, ptx->box.y,
+ tx->surface, 0, 0,
+ ptx->box.width, ptx->box.height);
+
+ pipe_surface_reference(&dst, NULL);
+ }
+
+ pipe_surface_reference(&tx->surface, NULL);
+ pipe_resource_reference(&ptx->resource, NULL);
+ FREE(ptx);
+}
+
+void *
+nvfx_miptree_transfer_map(struct pipe_context *pipe, struct pipe_transfer *ptx)
+{
+ struct pipe_screen *pscreen = pipe->screen;
+ struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx;
+ struct nv04_surface *ns = (struct nv04_surface *)tx->surface;
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture;
+ uint8_t *map = nouveau_screen_bo_map(pscreen, mt->base.bo,
+ nouveau_screen_transfer_flags(ptx->usage));
+
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return (map + ns->base.offset +
+ ptx->box.y * ns->pitch +
+ ptx->box.x * util_format_get_blocksize(ptx->resource->format));
+}
+
+void
+nvfx_miptree_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx)
+{
+ struct pipe_screen *pscreen = pipe->screen;
+ struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx;
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture;
+
+ nouveau_screen_bo_unmap(pscreen, mt->base.bo);
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.h b/src/gallium/drivers/nvfx/nvfx_transfer.h
new file mode 100644
index 00000000000..3e3317b2c7b
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_transfer.h
@@ -0,0 +1,26 @@
+
+#ifndef NVFX_TRANSFER_H
+#define NVFX_TRANSFER_H
+
+#include "util/u_transfer.h"
+#include "pipe/p_state.h"
+
+
+struct pipe_transfer *
+nvfx_miptree_transfer_new(struct pipe_context *pcontext,
+ struct pipe_resource *pt,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box);
+void
+nvfx_miptree_transfer_del(struct pipe_context *pcontext,
+ struct pipe_transfer *ptx);
+void *
+nvfx_miptree_transfer_map(struct pipe_context *pcontext,
+ struct pipe_transfer *ptx);
+void
+nvfx_miptree_transfer_unmap(struct pipe_context *pcontext,
+ struct pipe_transfer *ptx);
+
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c
new file mode 100644
index 00000000000..520bae5aed2
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_vbo.c
@@ -0,0 +1,628 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nvfx_context.h"
+#include "nvfx_state.h"
+#include "nvfx_resource.h"
+
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_class.h"
+#include "nouveau/nouveau_pushbuf.h"
+#include "nouveau/nouveau_util.h"
+
+static INLINE int
+nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
+{
+ switch (pipe) {
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ *fmt = NV34TCL_VTXFMT_TYPE_FLOAT;
+ break;
+ case PIPE_FORMAT_R16_FLOAT:
+ case PIPE_FORMAT_R16G16_FLOAT:
+ case PIPE_FORMAT_R16G16B16_FLOAT:
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ *fmt = NV34TCL_VTXFMT_TYPE_HALF;
+ break;
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ *fmt = NV34TCL_VTXFMT_TYPE_UBYTE;
+ break;
+ case PIPE_FORMAT_R16_SSCALED:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *fmt = NV34TCL_VTXFMT_TYPE_USHORT;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
+ return 1;
+ }
+
+ switch (pipe) {
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R16_FLOAT:
+ case PIPE_FORMAT_R16_SSCALED:
+ *ncomp = 1;
+ break;
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R16G16_FLOAT:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ *ncomp = 2;
+ break;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R16G16B16_FLOAT:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ *ncomp = 3;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ *ncomp = 4;
+ break;
+ default:
+ NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
+ return 1;
+ }
+
+ return 0;
+}
+
+static boolean
+nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib,
+ unsigned ib_size)
+{
+ unsigned type;
+
+ if (!ib) {
+ nvfx->idxbuf = NULL;
+ nvfx->idxbuf_format = 0xdeadbeef;
+ return FALSE;
+ }
+
+ if (!nvfx->screen->index_buffer_reloc_flags || ib_size == 1)
+ return FALSE;
+
+ switch (ib_size) {
+ case 2:
+ type = NV34TCL_IDXBUF_FORMAT_TYPE_U16;
+ break;
+ case 4:
+ type = NV34TCL_IDXBUF_FORMAT_TYPE_U32;
+ break;
+ default:
+ return FALSE;
+ }
+
+ if (ib != nvfx->idxbuf ||
+ type != nvfx->idxbuf_format) {
+ nvfx->dirty |= NVFX_NEW_ARRAYS;
+ nvfx->idxbuf = ib;
+ nvfx->idxbuf_format = type;
+ }
+
+ return TRUE;
+}
+
+// type must be floating point
+static inline void
+nvfx_vbo_static_attrib(struct nvfx_context *nvfx,
+ int attrib, struct pipe_vertex_element *ve,
+ struct pipe_vertex_buffer *vb, unsigned ncomp)
+{
+ struct pipe_transfer *transfer;
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ void *map;
+ float *v;
+
+ map = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer);
+ map = (uint8_t *) map + vb->buffer_offset + ve->src_offset;
+
+ v = map;
+
+ switch (ncomp) {
+ case 4:
+ OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4));
+ OUT_RING(chan, fui(v[0]));
+ OUT_RING(chan, fui(v[1]));
+ OUT_RING(chan, fui(v[2]));
+ OUT_RING(chan, fui(v[3]));
+ break;
+ case 3:
+ OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3));
+ OUT_RING(chan, fui(v[0]));
+ OUT_RING(chan, fui(v[1]));
+ OUT_RING(chan, fui(v[2]));
+ break;
+ case 2:
+ OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2));
+ OUT_RING(chan, fui(v[0]));
+ OUT_RING(chan, fui(v[1]));
+ break;
+ case 1:
+ OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1));
+ OUT_RING(chan, fui(v[0]));
+ break;
+ }
+
+ pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer);
+}
+
+void
+nvfx_draw_arrays(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_screen *screen = nvfx->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ unsigned restart = 0;
+
+ nvfx_vbo_set_idxbuf(nvfx, NULL, 0);
+ if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
+ nvfx_draw_elements_swtnl(pipe, NULL, 0, 0,
+ mode, start, count);
+ return;
+ }
+
+ while (count) {
+ unsigned vc, nr, avail;
+
+ nvfx_state_emit(nvfx);
+
+ avail = AVAIL_RING(chan);
+ avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+
+ vc = nouveau_vbuf_split(avail, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(chan);
+ continue;
+ }
+
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING (chan, nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1));
+ OUT_RING (chan, ((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push));
+ while (push--) {
+ OUT_RING(chan, ((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING (chan, 0);
+
+ count -= vc;
+ start = restart;
+ }
+
+ pipe->flush(pipe, 0, NULL);
+}
+
+static INLINE void
+nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nvfx_screen *screen = nvfx->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+
+ while (count) {
+ uint8_t *elts = (uint8_t *)ib + start;
+ unsigned vc, push, restart = 0, avail;
+
+ nvfx_state_emit(nvfx);
+
+ avail = AVAIL_RING(chan);
+ avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+
+ vc = nouveau_vbuf_split(avail, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(chan);
+ continue;
+ }
+ count -= vc;
+
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING (chan, nvgl_primitive(mode));
+
+ if (vc & 1) {
+ OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
+ OUT_RING (chan, elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
+ for (i = 0; i < push; i+=2)
+ OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING (chan, 0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nvfx_screen *screen = nvfx->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+
+ while (count) {
+ uint16_t *elts = (uint16_t *)ib + start;
+ unsigned vc, push, restart = 0, avail;
+
+ nvfx_state_emit(nvfx);
+
+ avail = AVAIL_RING(chan);
+ avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+
+ vc = nouveau_vbuf_split(avail, 6, 2,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(chan);
+ continue;
+ }
+ count -= vc;
+
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING (chan, nvgl_primitive(mode));
+
+ if (vc & 1) {
+ OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
+ OUT_RING (chan, elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+
+ push = MIN2(vc, 2047 * 2);
+
+ OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
+ for (i = 0; i < push; i+=2)
+ OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
+
+ vc -= push;
+ elts += push;
+ }
+
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING (chan, 0);
+
+ start = restart;
+ }
+}
+
+static INLINE void
+nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nvfx_screen *screen = nvfx->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+
+ while (count) {
+ uint32_t *elts = (uint32_t *)ib + start;
+ unsigned vc, push, restart = 0, avail;
+
+ nvfx_state_emit(nvfx);
+
+ avail = AVAIL_RING(chan);
+ avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+
+ vc = nouveau_vbuf_split(avail, 5, 1,
+ mode, start, count, &restart);
+ if (vc == 0) {
+ FIRE_RING(chan);
+ continue;
+ }
+ count -= vc;
+
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING (chan, nvgl_primitive(mode));
+
+ while (vc) {
+ push = MIN2(vc, 2047);
+
+ OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push));
+ OUT_RINGp (chan, elts, push);
+
+ vc -= push;
+ elts += push;
+ }
+
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING (chan, 0);
+
+ start = restart;
+ }
+}
+
+static void
+nvfx_draw_elements_inline(struct pipe_context *pipe,
+ struct pipe_resource *ib,
+ unsigned ib_size, int ib_bias,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct pipe_transfer *transfer;
+ void *map;
+
+ map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer);
+ if (!ib) {
+ NOUVEAU_ERR("failed mapping ib\n");
+ return;
+ }
+
+ assert(ib_bias == 0);
+
+ switch (ib_size) {
+ case 1:
+ nvfx_draw_elements_u08(nvfx, map, mode, start, count);
+ break;
+ case 2:
+ nvfx_draw_elements_u16(nvfx, map, mode, start, count);
+ break;
+ case 4:
+ nvfx_draw_elements_u32(nvfx, map, mode, start, count);
+ break;
+ default:
+ NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
+ break;
+ }
+
+ pipe_buffer_unmap(pipe, ib, transfer);
+}
+
+static void
+nvfx_draw_elements_vbo(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nvfx_screen *screen = nvfx->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ unsigned restart = 0;
+
+ while (count) {
+ unsigned nr, vc, avail;
+
+ nvfx_state_emit(nvfx);
+
+ avail = AVAIL_RING(chan);
+ avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+
+ vc = nouveau_vbuf_split(avail, 6, 256,
+ mode, start, count, &restart);
+ if (!vc) {
+ FIRE_RING(chan);
+ continue;
+ }
+
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING (chan, nvgl_primitive(mode));
+
+ nr = (vc & 0xff);
+ if (nr) {
+ OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1));
+ OUT_RING (chan, ((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push));
+ while (push--) {
+ OUT_RING(chan, ((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING (chan, 0);
+
+ count -= vc;
+ start = restart;
+ }
+}
+
+void
+nvfx_draw_elements(struct pipe_context *pipe,
+ struct pipe_resource *indexBuffer,
+ unsigned indexSize, int indexBias,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ boolean idxbuf;
+
+ idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize);
+ if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
+ nvfx_draw_elements_swtnl(pipe,
+ indexBuffer, indexSize, indexBias,
+ mode, start, count);
+ return;
+ }
+
+ if (idxbuf) {
+ nvfx_draw_elements_vbo(pipe, mode, start, count);
+ } else {
+ nvfx_draw_elements_inline(pipe,
+ indexBuffer, indexSize, indexBias,
+ mode, start, count);
+ }
+
+ pipe->flush(pipe, 0, NULL);
+}
+
+boolean
+nvfx_vbo_validate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ struct pipe_resource *ib = nvfx->idxbuf;
+ unsigned ib_format = nvfx->idxbuf_format;
+ int i;
+ int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
+ uint32_t vtxfmt[16];
+ unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
+
+ if (!elements)
+ return TRUE;
+
+ nvfx->vbo_bo = 0;
+
+ MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
+ for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
+ struct pipe_vertex_element *ve;
+ struct pipe_vertex_buffer *vb;
+ unsigned type, ncomp;
+
+ ve = &nvfx->vtxelt->pipe[i];
+ vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+
+ if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
+ MARK_UNDO(chan);
+ nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS;
+ return FALSE;
+ }
+
+ if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) {
+ nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp);
+ vtxfmt[i] = type;
+ } else {
+ vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) |
+ (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type);
+ nvfx->vbo_bo |= (1 << i);
+ }
+ }
+
+ for(; i < elements; ++i)
+ vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT;
+
+ OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
+ OUT_RINGp(chan, vtxfmt, elements);
+
+ if(nvfx->is_nv4x) {
+ unsigned i;
+ /* seems to be some kind of cache flushing */
+ for(i = 0; i < 3; ++i) {
+ OUT_RING(chan, RING_3D(0x1718, 1));
+ OUT_RING(chan, 0);
+ }
+ }
+
+ OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
+ for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
+ struct pipe_vertex_element *ve;
+ struct pipe_vertex_buffer *vb;
+
+ ve = &nvfx->vtxelt->pipe[i];
+ vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+
+ if (!(nvfx->vbo_bo & (1 << i)))
+ OUT_RING(chan, 0);
+ else
+ {
+ struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
+ OUT_RELOC(chan, bo,
+ vb->buffer_offset + ve->src_offset,
+ vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
+ }
+ }
+
+ for (; i < elements; i++)
+ OUT_RING(chan, 0);
+
+ OUT_RING(chan, RING_3D(0x1710, 1));
+ OUT_RING(chan, 0);
+
+ if (ib) {
+ unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
+ struct nouveau_bo* bo = nvfx_resource(ib)->bo;
+
+ assert(nvfx->screen->index_buffer_reloc_flags);
+
+ OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
+ OUT_RELOC(chan, bo, 0, ib_flags | NOUVEAU_BO_LOW, 0, 0);
+ OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
+ 0, NV34TCL_IDXBUF_FORMAT_DMA1);
+ }
+
+ nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
+ return TRUE;
+}
+
+void
+nvfx_vbo_relocate(struct nvfx_context *nvfx)
+{
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
+ int i;
+
+ MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
+ for(i = 0; i < nvfx->vtxelt->num_elements; ++i) {
+ if(nvfx->vbo_bo & (1 << i)) {
+ struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+ struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
+ OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1),
+ vb_flags, 0, 0);
+ OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset,
+ vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
+ }
+ }
+
+ if(nvfx->idxbuf)
+ {
+ unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
+ struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf)->bo;
+
+ assert(nvfx->screen->index_buffer_reloc_flags);
+
+ OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2),
+ ib_flags, 0, 0);
+ OUT_RELOC(chan, bo, 0,
+ ib_flags | NOUVEAU_BO_LOW, 0, 0);
+ OUT_RELOC(chan, bo, nvfx->idxbuf_format,
+ ib_flags | NOUVEAU_BO_OR,
+ 0, NV34TCL_IDXBUF_FORMAT_DMA1);
+ }
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c
new file mode 100644
index 00000000000..80b98b62d34
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
@@ -0,0 +1,1066 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_util.h"
+
+#include "nvfx_context.h"
+#include "nvfx_state.h"
+
+/* TODO (at least...):
+ * 1. Indexed consts + ARL
+ * 3. NV_vp11, NV_vp2, NV_vp3 features
+ * - extra arith opcodes
+ * - branching
+ * - texture sampling
+ * - indexed attribs
+ * - indexed results
+ * 4. bugs
+ */
+
+#include "nv30_vertprog.h"
+#include "nv40_vertprog.h"
+
+#define NVFX_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
+
+struct nvfx_vpc {
+ struct nvfx_vertex_program *vp;
+
+ struct nvfx_vertex_program_exec *vpi;
+
+ unsigned r_temps;
+ unsigned r_temps_discard;
+ struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+ struct nvfx_sreg *r_address;
+ struct nvfx_sreg *r_temp;
+
+ struct nvfx_sreg *imm;
+ unsigned nr_imm;
+
+ unsigned hpos_idx;
+};
+
+static struct nvfx_sreg
+temp(struct nvfx_vpc *vpc)
+{
+ int idx = ffs(~vpc->r_temps) - 1;
+
+ if (idx < 0) {
+ NOUVEAU_ERR("out of temps!!\n");
+ assert(0);
+ return nvfx_sr(NVFXSR_TEMP, 0);
+ }
+
+ vpc->r_temps |= (1 << idx);
+ vpc->r_temps_discard |= (1 << idx);
+ return nvfx_sr(NVFXSR_TEMP, idx);
+}
+
+static INLINE void
+release_temps(struct nvfx_vpc *vpc)
+{
+ vpc->r_temps &= ~vpc->r_temps_discard;
+ vpc->r_temps_discard = 0;
+}
+
+static struct nvfx_sreg
+constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w)
+{
+ struct nvfx_vertex_program *vp = vpc->vp;
+ struct nvfx_vertex_program_data *vpd;
+ int idx;
+
+ if (pipe >= 0) {
+ for (idx = 0; idx < vp->nr_consts; idx++) {
+ if (vp->consts[idx].index == pipe)
+ return nvfx_sr(NVFXSR_CONST, idx);
+ }
+ }
+
+ idx = vp->nr_consts++;
+ vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
+ vpd = &vp->consts[idx];
+
+ vpd->index = pipe;
+ vpd->value[0] = x;
+ vpd->value[1] = y;
+ vpd->value[2] = z;
+ vpd->value[3] = w;
+ return nvfx_sr(NVFXSR_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nvfx_vp_arith(nvfx, (cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2))
+
+static void
+emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src)
+{
+ struct nvfx_vertex_program *vp = vpc->vp;
+ uint32_t sr = 0;
+
+ switch (src.type) {
+ case NVFXSR_TEMP:
+ sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT));
+ sr |= (src.index << NVFX_VP(SRC_TEMP_SRC_SHIFT));
+ break;
+ case NVFXSR_INPUT:
+ sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<
+ NVFX_VP(SRC_REG_TYPE_SHIFT));
+ vp->ir |= (1 << src.index);
+ hw[1] |= (src.index << NVFX_VP(INST_INPUT_SRC_SHIFT));
+ break;
+ case NVFXSR_CONST:
+ sr |= (NVFX_VP(SRC_REG_TYPE_CONST) <<
+ NVFX_VP(SRC_REG_TYPE_SHIFT));
+ assert(vpc->vpi->const_index == -1 ||
+ vpc->vpi->const_index == src.index);
+ vpc->vpi->const_index = src.index;
+ break;
+ case NVFXSR_NONE:
+ sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<
+ NVFX_VP(SRC_REG_TYPE_SHIFT));
+ break;
+ default:
+ assert(0);
+ }
+
+ if (src.negate)
+ sr |= NVFX_VP(SRC_NEGATE);
+
+ if (src.abs)
+ hw[0] |= (1 << (21 + pos));
+
+ sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) |
+ (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) |
+ (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) |
+ (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT)));
+
+ switch (pos) {
+ case 0:
+ hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >>
+ NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT);
+ hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) <<
+ NVFX_VP(INST_SRC0L_SHIFT);
+ break;
+ case 1:
+ hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT);
+ break;
+ case 2:
+ hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >>
+ NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT);
+ hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) <<
+ NVFX_VP(INST_SRC2L_SHIFT);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst)
+{
+ struct nvfx_vertex_program *vp = vpc->vp;
+
+ switch (dst.type) {
+ case NVFXSR_TEMP:
+ if(!nvfx->is_nv4x)
+ hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
+ else {
+ hw[3] |= NV40_VP_INST_DEST_MASK;
+ if (slot == 0) {
+ hw[0] |= (dst.index <<
+ NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
+ } else {
+ hw[3] |= (dst.index <<
+ NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
+ }
+ }
+ break;
+ case NVFXSR_OUTPUT:
+ /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */
+ switch (dst.index) {
+ case NVFX_VP_INST_DEST_CLIP(0):
+ vp->or |= (1 << 6);
+ vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0;
+ dst.index = NVFX_VP(INST_DEST_FOGC);
+ break;
+ case NVFX_VP_INST_DEST_CLIP(1):
+ vp->or |= (1 << 7);
+ vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1;
+ dst.index = NVFX_VP(INST_DEST_FOGC);
+ break;
+ case NVFX_VP_INST_DEST_CLIP(2):
+ vp->or |= (1 << 8);
+ vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2;
+ dst.index = NVFX_VP(INST_DEST_FOGC);
+ break;
+ case NVFX_VP_INST_DEST_CLIP(3):
+ vp->or |= (1 << 9);
+ vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3;
+ dst.index = NVFX_VP(INST_DEST_PSZ);
+ break;
+ case NVFX_VP_INST_DEST_CLIP(4):
+ vp->or |= (1 << 10);
+ vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4;
+ dst.index = NVFX_VP(INST_DEST_PSZ);
+ break;
+ case NVFX_VP_INST_DEST_CLIP(5):
+ vp->or |= (1 << 11);
+ vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5;
+ dst.index = NVFX_VP(INST_DEST_PSZ);
+ break;
+ default:
+ if(!nvfx->is_nv4x) {
+ switch (dst.index) {
+ case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+ case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+ case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+ case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+ case NV30_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;
+ case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
+ case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
+ case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
+ case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
+ case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
+ case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
+ case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
+ case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
+ case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ }
+ } else {
+ switch (dst.index) {
+ case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+ case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+ case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+ case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+ case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;
+ case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
+ case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
+ case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
+ case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
+ case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
+ case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
+ case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
+ case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
+ case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ }
+ }
+ break;
+ }
+
+ if(!nvfx->is_nv4x) {
+ hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
+ hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+
+ /*XXX: no way this is entirely correct, someone needs to
+ * figure out what exactly it is.
+ */
+ hw[3] |= 0x800;
+ } else {
+ hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT);
+ if (slot == 0) {
+ hw[0] |= NV40_VP_INST_VEC_RESULT;
+ hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+ } else {
+ hw[3] |= NV40_VP_INST_SCA_RESULT;
+ hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op,
+ struct nvfx_sreg dst, int mask,
+ struct nvfx_sreg s0, struct nvfx_sreg s1,
+ struct nvfx_sreg s2)
+{
+ struct nvfx_vertex_program *vp = vpc->vp;
+ uint32_t *hw;
+
+ vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
+ vpc->vpi = &vp->insns[vp->nr_insns - 1];
+ memset(vpc->vpi, 0, sizeof(*vpc->vpi));
+ vpc->vpi->const_index = -1;
+
+ hw = vpc->vpi->data;
+
+ hw[0] |= (NVFX_COND_TR << NVFX_VP(INST_COND_SHIFT));
+ hw[0] |= ((0 << NVFX_VP(INST_COND_SWZ_X_SHIFT)) |
+ (1 << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) |
+ (2 << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) |
+ (3 << NVFX_VP(INST_COND_SWZ_W_SHIFT)));
+
+ if(!nvfx->is_nv4x) {
+ hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
+// hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK);
+// hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT));
+
+ if (dst.type == NVFXSR_OUTPUT) {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
+ } else {
+ if (slot)
+ hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
+ else
+ hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
+ }
+ } else {
+ if (slot == 0) {
+ hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT);
+ hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
+ hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT);
+ } else {
+ hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT);
+ hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20));
+ hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT);
+ }
+ }
+
+ emit_dst(nvfx, vpc, hw, slot, dst);
+ emit_src(nvfx, vpc, hw, 0, s0);
+ emit_src(nvfx, vpc, hw, 1, s1);
+ emit_src(nvfx, vpc, hw, 2, s2);
+}
+
+static INLINE struct nvfx_sreg
+tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
+ struct nvfx_sreg src = { 0 };
+
+ switch (fsrc->Register.File) {
+ case TGSI_FILE_INPUT:
+ src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ src = vpc->imm[fsrc->Register.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ src = vpc->r_temp[fsrc->Register.Index];
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->Register.Absolute;
+ src.negate = fsrc->Register.Negate;
+ src.swz[0] = fsrc->Register.SwizzleX;
+ src.swz[1] = fsrc->Register.SwizzleY;
+ src.swz[2] = fsrc->Register.SwizzleZ;
+ src.swz[3] = fsrc->Register.SwizzleW;
+ return src;
+}
+
+static INLINE struct nvfx_sreg
+tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
+ struct nvfx_sreg dst = { 0 };
+
+ switch (fdst->Register.File) {
+ case TGSI_FILE_OUTPUT:
+ dst = vpc->r_result[fdst->Register.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ dst = vpc->r_temp[fdst->Register.Index];
+ break;
+ case TGSI_FILE_ADDRESS:
+ dst = vpc->r_address[fdst->Register.Index];
+ break;
+ default:
+ NOUVEAU_ERR("bad dst file\n");
+ break;
+ }
+
+ return dst;
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_VP_MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_VP_MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_VP_MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_VP_MASK_W;
+ return mask;
+}
+
+static boolean
+nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
+ const struct tgsi_full_instruction *finst)
+{
+ struct nvfx_sreg src[3], dst, tmp;
+ struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+ int mask;
+ int ai = -1, ci = -1, ii = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->Src[i];
+ if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(vpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->Src[i];
+
+ switch (fsrc->Register.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->Register.Index) {
+ ai = fsrc->Register.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ if ((ci == -1 && ii == -1) ||
+ ci == fsrc->Register.Index) {
+ ci = fsrc->Register.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ if ((ci == -1 && ii == -1) ||
+ ii == fsrc->Register.Index) {
+ ii = fsrc->Register.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(vpc, &finst->Dst[0]);
+ mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]);
+ break;
+ case TGSI_OPCODE_ARL:
+ arith(vpc, VEC, ARL, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_COS:
+ arith(vpc, SCA, COS, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(vpc, VEC, DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(vpc, SCA, EX2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_EXP:
+ arith(vpc, SCA, EXP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(vpc, VEC, FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(vpc, VEC, FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(vpc, SCA, LG2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LIT:
+ arith(vpc, SCA, LIT, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ arith(vpc, SCA, LOG, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LRP:
+ tmp = temp(vpc);
+ arith(vpc, VEC, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
+ arith(vpc, VEC, MAD, dst, mask, src[0], src[1], tmp);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(vpc, VEC, MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(vpc);
+ arith(vpc, SCA, LG2, tmp, NVFX_VP_MASK_X, none, none,
+ swz(src[0], X, X, X, X));
+ arith(vpc, VEC, MUL, tmp, NVFX_VP_MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(vpc, SCA, EX2, dst, mask, none, none,
+ swz(tmp, X, X, X, X));
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(vpc, SCA, RCP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_RET:
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0]));
+ break;
+ case TGSI_OPCODE_SEQ:
+ arith(vpc, VEC, SEQ, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SFL:
+ arith(vpc, VEC, SFL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SGT:
+ arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SIN:
+ arith(vpc, SCA, SIN, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_SLE:
+ arith(vpc, VEC, SLE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SNE:
+ arith(vpc, VEC, SNE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SSG:
+ arith(vpc, VEC, SSG, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_STR:
+ arith(vpc, VEC, STR, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1]));
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(vpc);
+ arith(vpc, VEC, MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(vpc, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ release_temps(vpc);
+ return TRUE;
+}
+
+static boolean
+nvfx_vertprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ unsigned idx = fdec->Range.First;
+ int hw;
+
+ switch (fdec->Semantic.Name) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NVFX_VP(INST_DEST_POS);
+ vpc->hpos_idx = idx;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.Index == 0) {
+ hw = NVFX_VP(INST_DEST_COL0);
+ } else
+ if (fdec->Semantic.Index == 1) {
+ hw = NVFX_VP(INST_DEST_COL1);
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (fdec->Semantic.Index == 0) {
+ hw = NVFX_VP(INST_DEST_BFC0);
+ } else
+ if (fdec->Semantic.Index == 1) {
+ hw = NVFX_VP(INST_DEST_BFC1);
+ } else {
+ NOUVEAU_ERR("bad bcolour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NVFX_VP(INST_DEST_FOGC);
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ hw = NVFX_VP(INST_DEST_PSZ);
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.Index <= 7) {
+ hw = NVFX_VP(INST_DEST_TC(fdec->Semantic.Index));
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ /* not really an error just a fallback */
+ NOUVEAU_ERR("cannot handle edgeflag output\n");
+ return FALSE;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ vpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);
+ return TRUE;
+}
+
+static boolean
+nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)
+{
+ struct tgsi_parse_context p;
+ int high_temp = -1, high_addr = -1, nr_imm = 0, i;
+
+ tgsi_parse_init(&p, vpc->vp->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&p)) {
+ const union tgsi_full_token *tok = &p.FullToken;
+
+ tgsi_parse_token(&p);
+ switch(tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ nr_imm++;
+ break;
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+
+ fdec = &p.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+ if (fdec->Range.Last > high_temp) {
+ high_temp =
+ fdec->Range.Last;
+ }
+ break;
+#if 0 /* this would be nice.. except gallium doesn't track it */
+ case TGSI_FILE_ADDRESS:
+ if (fdec->Range.Last > high_addr) {
+ high_addr =
+ fdec->Range.Last;
+ }
+ break;
+#endif
+ case TGSI_FILE_OUTPUT:
+ if (!nvfx_vertprog_parse_decl_output(nvfx, vpc, fdec))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+#if 1 /* yay, parse instructions looking for address regs instead */
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ const struct tgsi_full_dst_register *fdst;
+
+ finst = &p.FullToken.FullInstruction;
+ fdst = &finst->Dst[0];
+
+ if (fdst->Register.File == TGSI_FILE_ADDRESS) {
+ if (fdst->Register.Index > high_addr)
+ high_addr = fdst->Register.Index;
+ }
+
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+ tgsi_parse_free(&p);
+
+ if (nr_imm) {
+ vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg));
+ assert(vpc->imm);
+ }
+
+ if (++high_temp) {
+ vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
+ for (i = 0; i < high_temp; i++)
+ vpc->r_temp[i] = temp(vpc);
+ }
+
+ if (++high_addr) {
+ vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_sreg));
+ for (i = 0; i < high_addr; i++)
+ vpc->r_address[i] = temp(vpc);
+ }
+
+ vpc->r_temps_discard = 0;
+ return TRUE;
+}
+
+static void
+nvfx_vertprog_translate(struct nvfx_context *nvfx,
+ struct nvfx_vertex_program *vp)
+{
+ struct tgsi_parse_context parse;
+ struct nvfx_vpc *vpc = NULL;
+ struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+ int i;
+
+ vpc = CALLOC(1, sizeof(struct nvfx_vpc));
+ if (!vpc)
+ return;
+ vpc->vp = vp;
+
+ if (!nvfx_vertprog_prepare(nvfx, vpc)) {
+ FREE(vpc);
+ return;
+ }
+
+ /* Redirect post-transform vertex position to a temp if user clip
+ * planes are enabled. We need to append code to the vtxprog
+ * to handle clip planes later.
+ */
+ if (vp->ucp.nr) {
+ vpc->r_result[vpc->hpos_idx] = temp(vpc);
+ vpc->r_temps_discard = 0;
+ }
+
+ tgsi_parse_init(&parse, vp->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const struct tgsi_full_immediate *imm;
+
+ imm = &parse.FullToken.FullImmediate;
+ assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
+ assert(imm->Immediate.NrTokens == 4 + 1);
+ vpc->imm[vpc->nr_imm++] =
+ constant(vpc, -1,
+ imm->u[0].Float,
+ imm->u[1].Float,
+ imm->u[2].Float,
+ imm->u[3].Float);
+ }
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ finst = &parse.FullToken.FullInstruction;
+ if (!nvfx_vertprog_parse_instruction(nvfx, vpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Write out HPOS if it was redirected to a temp earlier */
+ if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) {
+ struct nvfx_sreg hpos = nvfx_sr(NVFXSR_OUTPUT,
+ NVFX_VP(INST_DEST_POS));
+ struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx];
+
+ arith(vpc, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none);
+ }
+
+ /* Insert code to handle user clip planes */
+ for (i = 0; i < vp->ucp.nr; i++) {
+ struct nvfx_sreg cdst = nvfx_sr(NVFXSR_OUTPUT,
+ NVFX_VP_INST_DEST_CLIP(i));
+ struct nvfx_sreg ceqn = constant(vpc, -1,
+ nvfx->clip.ucp[i][0],
+ nvfx->clip.ucp[i][1],
+ nvfx->clip.ucp[i][2],
+ nvfx->clip.ucp[i][3]);
+ struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx];
+ unsigned mask;
+
+ switch (i) {
+ case 0: case 3: mask = NVFX_VP_MASK_Y; break;
+ case 1: case 4: mask = NVFX_VP_MASK_Z; break;
+ case 2: case 5: mask = NVFX_VP_MASK_W; break;
+ default:
+ NOUVEAU_ERR("invalid clip dist #%d\n", i);
+ goto out_err;
+ }
+
+ arith(vpc, VEC, DP4, cdst, mask, htmp, ceqn, none);
+ }
+
+ vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
+ vp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ if (vpc->r_temp)
+ FREE(vpc->r_temp);
+ if (vpc->r_address)
+ FREE(vpc->r_address);
+ if (vpc->imm)
+ FREE(vpc->imm);
+ FREE(vpc);
+}
+
+boolean
+nvfx_vertprog_validate(struct nvfx_context *nvfx)
+{
+ struct pipe_context *pipe = &nvfx->pipe;
+ struct nvfx_screen *screen = nvfx->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *eng3d = screen->eng3d;
+ struct nvfx_vertex_program *vp;
+ struct pipe_resource *constbuf;
+ struct pipe_transfer *transfer = NULL;
+ boolean upload_code = FALSE, upload_data = FALSE;
+ int i;
+
+ if (nvfx->render_mode == HW) {
+ vp = nvfx->vertprog;
+ constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX];
+
+ // TODO: ouch! can't we just use constant slots for these?!
+ if ((nvfx->dirty & NVFX_NEW_UCP) ||
+ memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) {
+ nvfx_vertprog_destroy(nvfx, vp);
+ memcpy(&vp->ucp, &nvfx->clip, sizeof(vp->ucp));
+ }
+ } else {
+ vp = nvfx->swtnl.vertprog;
+ constbuf = NULL;
+ }
+
+ /* Translate TGSI shader into hw bytecode */
+ if (!vp->translated)
+ {
+ nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG;
+ nvfx_vertprog_translate(nvfx, vp);
+ if (!vp->translated) {
+ nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
+ return FALSE;
+ }
+ }
+
+ /* Allocate hw vtxprog exec slots */
+ if (!vp->exec) {
+ struct nouveau_resource *heap = nvfx->screen->vp_exec_heap;
+ uint vplen = vp->nr_insns;
+
+ if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) {
+ while (heap->next && heap->size < vplen) {
+ struct nvfx_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nouveau_resource_free(&evict->exec);
+ }
+
+ if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec))
+ assert(0);
+ }
+
+ upload_code = TRUE;
+ }
+
+ /* Allocate hw vtxprog const slots */
+ if (vp->nr_consts && !vp->data) {
+ struct nouveau_resource *heap = nvfx->screen->vp_data_heap;
+
+ if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+ while (heap->next && heap->size < vp->nr_consts) {
+ struct nvfx_vertex_program *evict;
+
+ evict = heap->next->priv;
+ nouveau_resource_free(&evict->data);
+ }
+
+ if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data))
+ assert(0);
+ }
+
+ /*XXX: handle this some day */
+ assert(vp->data->start >= vp->data_start_min);
+
+ upload_data = TRUE;
+ if (vp->data_start != vp->data->start)
+ upload_code = TRUE;
+ }
+
+ /* If exec or data segments moved we need to patch the program to
+ * fixup offsets and register IDs.
+ */
+ if (vp->exec_start != vp->exec->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nvfx_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->has_branch_offset) {
+ assert(0);
+ }
+ }
+
+ vp->exec_start = vp->exec->start;
+ }
+
+ if (vp->nr_consts && vp->data_start != vp->data->start) {
+ for (i = 0; i < vp->nr_insns; i++) {
+ struct nvfx_vertex_program_exec *vpi = &vp->insns[i];
+
+ if (vpi->const_index >= 0) {
+ vpi->data[1] &= ~NVFX_VP(INST_CONST_SRC_MASK);
+ vpi->data[1] |=
+ (vpi->const_index + vp->data->start) <<
+ NVFX_VP(INST_CONST_SRC_SHIFT);
+
+ }
+ }
+
+ vp->data_start = vp->data->start;
+ }
+
+ /* Update + Upload constant values */
+ if (vp->nr_consts) {
+ float *map = NULL;
+
+ if (constbuf) {
+ map = pipe_buffer_map(pipe, constbuf,
+ PIPE_TRANSFER_READ,
+ &transfer);
+ }
+
+ for (i = 0; i < vp->nr_consts; i++) {
+ struct nvfx_vertex_program_data *vpd = &vp->consts[i];
+
+ if (vpd->index >= 0) {
+ if (!upload_data &&
+ !memcmp(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float)))
+ continue;
+ memcpy(vpd->value, &map[vpd->index * 4],
+ 4 * sizeof(float));
+ }
+
+ BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (chan, i + vp->data->start);
+ OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
+ }
+
+ if (constbuf)
+ pipe_buffer_unmap(pipe, constbuf, transfer);
+ }
+
+ /* Upload vtxprog */
+ if (upload_code) {
+#if 0
+ for (i = 0; i < vp->nr_insns; i++) {
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]);
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]);
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]);
+ NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]);
+ }
+#endif
+ BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (chan, vp->exec->start);
+ for (i = 0; i < vp->nr_insns; i++) {
+ BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (chan, vp->insns[i].data, 4);
+ }
+ }
+
+ if(nvfx->dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP))
+ {
+ WAIT_RING(chan, 7);
+ OUT_RING(chan, RING_3D(NV34TCL_VP_START_FROM_ID, 1));
+ OUT_RING(chan, vp->exec->start);
+ if(nvfx->is_nv4x) {
+ OUT_RING(chan, RING_3D(NV40TCL_VP_ATTRIB_EN, 2));
+ OUT_RING(chan, vp->ir);
+ OUT_RING(chan, vp->or);
+ }
+ OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1));
+ OUT_RING(chan, vp->clip_ctrl);
+ }
+
+ return TRUE;
+}
+
+void
+nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp)
+{
+ vp->translated = FALSE;
+
+ if (vp->nr_insns) {
+ FREE(vp->insns);
+ vp->insns = NULL;
+ vp->nr_insns = 0;
+ }
+
+ if (vp->nr_consts) {
+ FREE(vp->consts);
+ vp->consts = NULL;
+ vp->nr_consts = 0;
+ }
+
+ nouveau_resource_free(&vp->exec);
+ vp->exec_start = 0;
+ nouveau_resource_free(&vp->data);
+ vp->data_start = 0;
+ vp->data_start_min = 0;
+
+ vp->ir = vp->or = vp->clip_ctrl = 0;
+}