diff options
author | Christian König <[email protected]> | 2010-10-12 23:05:25 +0200 |
---|---|---|
committer | Christian König <[email protected]> | 2010-10-12 23:07:29 +0200 |
commit | 695cc370a280a637f411f5ff3877b3fd1c05e424 (patch) | |
tree | 69ae2a8fbecfa553faba59274688ffe11ee1a612 /src/gallium/drivers/nvfx | |
parent | f3e34ba6fba76870b1c91a27adb706d1b87aeec8 (diff) | |
parent | 48156b87bc9d3e09ec34372d69504a787332ea0b (diff) |
Merge branch 'master' of ssh://git.freedesktop.org/git/mesa/mesa into pipe-video
Conflicts:
configure.ac
src/gallium/drivers/nvfx/Makefile
src/gallium/include/pipe/p_defines.h
src/gallium/include/pipe/p_screen.h
src/gallium/include/state_tracker/dri1_api.h
src/gallium/include/state_tracker/drm_api.h
src/gallium/winsys/nouveau/drm/nouveau_drm_api.c
Diffstat (limited to 'src/gallium/drivers/nvfx')
44 files changed, 10527 insertions, 3805 deletions
diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index e7ca6e6cb57..95da7782256 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -4,7 +4,7 @@ include $(TOP)/configs/current LIBNAME = nvfx C_SOURCES = \ - nv04_surface_2d.c \ + nv04_2d.c \ nvfx_buffer.c \ nvfx_context.c \ nvfx_clear.c \ @@ -14,18 +14,13 @@ C_SOURCES = \ nv30_fragtex.c \ nv40_fragtex.c \ nvfx_miptree.c \ + nvfx_push.c \ nvfx_query.c \ nvfx_resource.c \ nvfx_screen.c \ nvfx_state.c \ - nvfx_state_blend.c \ nvfx_state_emit.c \ nvfx_state_fb.c \ - nvfx_state_rasterizer.c \ - nvfx_state_scissor.c \ - nvfx_state_stipple.c \ - nvfx_state_viewport.c \ - nvfx_state_zsa.c \ nvfx_surface.c \ nvfx_transfer.c \ nvfx_vbo.c \ diff --git a/src/gallium/drivers/nvfx/SConscript b/src/gallium/drivers/nvfx/SConscript index 02d931b10e8..f1041e77633 100644 --- a/src/gallium/drivers/nvfx/SConscript +++ b/src/gallium/drivers/nvfx/SConscript @@ -9,7 +9,7 @@ env.PrependUnique(delete_existing=1, CPPPATH = [ nvfx = env.ConvenienceLibrary( target = 'nvfx', source = [ - 'nv04_surface_2d.c', + 'nv04_2d.c', 'nvfx_buffer.c', 'nvfx_context.c', 'nvfx_clear.c', @@ -19,18 +19,13 @@ nvfx = env.ConvenienceLibrary( 'nv30_fragtex.c', 'nv40_fragtex.c', 'nvfx_miptree.c', + 'nvfx_push.c', 'nvfx_query.c', 'nvfx_resource.c', 'nvfx_screen.c', 'nvfx_state.c', - 'nvfx_state_blend.c', 'nvfx_state_emit.c', 'nvfx_state_fb.c', - 'nvfx_state_rasterizer.c', - 'nvfx_state_scissor.c', - 'nvfx_state_stipple.c', - 'nvfx_state_viewport.c', - 'nvfx_state_zsa.c', 'nvfx_surface.c', 'nvfx_transfer.c', 'nvfx_vbo.c', diff --git a/src/gallium/drivers/nvfx/nv01_2d.xml.h b/src/gallium/drivers/nvfx/nv01_2d.xml.h new file mode 100644 index 00000000000..b963eb7edce --- /dev/null +++ b/src/gallium/drivers/nvfx/nv01_2d.xml.h @@ -0,0 +1,1343 @@ +#ifndef NV01_2D_XML +#define NV01_2D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nv01_2d.xml ( 33462 bytes, from 2010-08-05 19:38:53) +- copyright.xml ( 6503 bytes, from 2010-04-10 23:15:50) +- nv_defs.xml ( 4437 bytes, from 2010-08-05 19:38:53) +- nv_object.xml ( 10424 bytes, from 2010-08-05 19:38:53) +- nvchipsets.xml ( 2824 bytes, from 2010-08-05 19:38:53) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NV01_CONTEXT_BETA1_DMA_NOTIFY 0x00000180 + +#define NV01_CONTEXT_BETA1_BETA_1D31 0x00000300 + + +#define NV04_BETA_SOLID_DMA_NOTIFY 0x00000180 + +#define NV04_BETA_SOLID_BETA_FACTOR 0x00000300 + + +#define NV01_CONTEXT_COLOR_KEY_DMA_NOTIFY 0x00000180 + +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT 0x00000300 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A8R8G8B8 0x00000003 + +#define NV01_CONTEXT_COLOR_KEY_COLOR 0x00000304 + + +#define NV01_CONTEXT_PATTERN_DMA_NOTIFY 0x00000180 + +#define NV01_CONTEXT_PATTERN_COLOR_FORMAT 0x00000300 + +#define NV01_CONTEXT_PATTERN_MONOCHROME_FORMAT 0x00000304 + +#define NV01_CONTEXT_PATTERN_SHAPE 0x00000308 + +#define NV01_CONTEXT_PATTERN_COLOR(i0) (0x00000310 + 0x4*(i0)) +#define NV01_CONTEXT_PATTERN_COLOR__ESIZE 0x00000004 +#define NV01_CONTEXT_PATTERN_COLOR__LEN 0x00000002 + +#define NV01_CONTEXT_PATTERN_PATTERN(i0) (0x00000318 + 0x4*(i0)) +#define NV01_CONTEXT_PATTERN_PATTERN__ESIZE 0x00000004 +#define NV01_CONTEXT_PATTERN_PATTERN__LEN 0x00000002 + + +#define NV01_CONTEXT_CLIP_RECTANGLE_DMA_NOTIFY 0x00000180 + +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT 0x00000300 +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X__MASK 0x0000ffff +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X__SHIFT 0 +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y__MASK 0xffff0000 +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y__SHIFT 16 + +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE 0x00000304 +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W__MASK 0x0000ffff +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W__SHIFT 0 +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H__MASK 0xffff0000 +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H__SHIFT 16 + + +#define NV04_CONTEXT_SURFACES_2D_DMA_NOTIFY 0x00000180 + +#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE 0x00000184 + +#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_DESTIN 0x00000188 + + +#define NV50_CONTEXT_SURFACES_2D_SRC_LINEAR 0x00000200 + +#define NV50_CONTEXT_SURFACES_2D_SRC_TILE_MODE 0x00000204 + +#define NV50_CONTEXT_SURFACES_2D_SRC_WIDTH 0x00000208 + +#define NV50_CONTEXT_SURFACES_2D_SRC_HEIGHT 0x0000020c + +#define NV50_CONTEXT_SURFACES_2D_UNK0210 0x00000210 + +#define NV50_CONTEXT_SURFACES_2D_UNK0214 0x00000214 + +#define NV50_CONTEXT_SURFACES_2D_DST_LINEAR 0x00000218 + +#define NV50_CONTEXT_SURFACES_2D_DST_TILE_MODE 0x0000021c + +#define NV50_CONTEXT_SURFACES_2D_DST_WIDTH 0x00000220 + +#define NV50_CONTEXT_SURFACES_2D_DST_HEIGHT 0x00000224 + +#define NV50_CONTEXT_SURFACES_2D_UNK0228 0x00000228 + +#define NV50_CONTEXT_SURFACES_2D_UNK022C 0x0000022c + +#define NV50_CONTEXT_SURFACES_2D_OFFSET_SOURCE_HIGH 0x00000230 + +#define NV50_CONTEXT_SURFACES_2D_OFFSET_DESTIN_HIGH 0x00000234 + +#define NV04_CONTEXT_SURFACES_2D_FORMAT 0x00000300 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y8 0x00000001 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_Z1R5G5B5 0x00000002 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_X1R5G5B5 0x00000003 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5 0x00000004 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y16 0x00000005 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_Z8R8G8B8 0x00000006 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_X8R8G8B8 0x00000007 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_Z1A7R8G8B8 0x00000008 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_X1A7R8G8B8 0x00000009 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8 0x0000000a +#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y32 0x0000000b + +#define NV04_CONTEXT_SURFACES_2D_PITCH 0x00000304 +#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE__MASK 0x0000ffff +#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE__SHIFT 0 +#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN__MASK 0xffff0000 +#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN__SHIFT 16 + +#define NV04_CONTEXT_SURFACES_2D_OFFSET_SOURCE 0x00000308 + +#define NV04_CONTEXT_SURFACES_2D_OFFSET_DESTIN 0x0000030c + + +#define NV04_SWIZZLED_SURFACE_DMA_NOTIFY 0x00000180 + +#define NV04_SWIZZLED_SURFACE_DMA_IMAGE 0x00000184 + +#define NV04_SWIZZLED_SURFACE_FORMAT 0x00000300 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR__MASK 0x000000ff +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR__SHIFT 0 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y8 0x00000001 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5 0x00000002 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_X1R5G5B5 0x00000003 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_R5G6B5 0x00000004 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y16 0x00000005 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8 0x00000006 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_X8R8G8B8 0x00000007 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8 0x00000008 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8 0x00000009 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_A8R8G8B8 0x0000000a +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y32 0x0000000b +#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U__MASK 0x00ff0000 +#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U__SHIFT 16 +#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V__MASK 0xff000000 +#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V__SHIFT 24 + +#define NV04_SWIZZLED_SURFACE_OFFSET 0x00000304 + + +#define NV03_CONTEXT_ROP_DMA_NOTIFY 0x00000180 + +#define NV03_CONTEXT_ROP_ROP 0x00000300 + + +#define NV04_IMAGE_PATTERN_DMA_NOTIFY 0x00000180 + +#define NV04_IMAGE_PATTERN_COLOR_FORMAT 0x00000300 +#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV04_IMAGE_PATTERN_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A8R8G8B8 0x00000003 + +#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT 0x00000304 +#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_CGA6 0x00000001 +#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_LE 0x00000002 + +#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE 0x00000308 +#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_8X8 0x00000000 +#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_64X1 0x00000001 +#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_1X64 0x00000002 + +#define NV04_IMAGE_PATTERN_PATTERN_SELECT 0x0000030c +#define NV04_IMAGE_PATTERN_PATTERN_SELECT_MONO 0x00000001 +#define NV04_IMAGE_PATTERN_PATTERN_SELECT_COLOR 0x00000002 + +#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR0 0x00000310 + +#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR1 0x00000314 + +#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN0 0x00000318 + +#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN1 0x0000031c + +#define NV04_IMAGE_PATTERN_PATTERN_Y8(i0) (0x00000400 + 0x4*(i0)) +#define NV04_IMAGE_PATTERN_PATTERN_Y8__ESIZE 0x00000004 +#define NV04_IMAGE_PATTERN_PATTERN_Y8__LEN 0x00000010 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0__MASK 0x000000ff +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0__SHIFT 0 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1__MASK 0x0000ff00 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1__SHIFT 8 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2__MASK 0x00ff0000 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2__SHIFT 16 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3__MASK 0xff000000 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3__SHIFT 24 + +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5(i0) (0x00000500 + 0x4*(i0)) +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5__ESIZE 0x00000004 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5__LEN 0x00000020 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0__MASK 0x0000001f +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0__SHIFT 0 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0__MASK 0x000007e0 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0__SHIFT 5 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0__MASK 0x0000f800 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0__SHIFT 11 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1__MASK 0x001f0000 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1__SHIFT 16 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1__MASK 0x07e00000 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1__SHIFT 21 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1__MASK 0xf8000000 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1__SHIFT 27 + +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5(i0) (0x00000600 + 0x4*(i0)) +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5__ESIZE 0x00000004 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5__LEN 0x00000020 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0__MASK 0x0000001f +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0__SHIFT 0 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0__MASK 0x000003e0 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0__SHIFT 5 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0__MASK 0x00007c00 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0__SHIFT 10 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1__MASK 0x001f0000 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1__SHIFT 16 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1__MASK 0x03e00000 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1__SHIFT 21 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1__MASK 0x7c000000 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1__SHIFT 26 + +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8(i0) (0x00000700 + 0x4*(i0)) +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8__ESIZE 0x00000004 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8__LEN 0x00000040 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B__MASK 0x000000ff +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B__SHIFT 0 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G__MASK 0x0000ff00 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G__SHIFT 8 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R__MASK 0x00ff0000 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R__SHIFT 16 + + +#define NV01_RENDER_SOLID_LINE_PATCH 0x0000010c + +#define NV01_RENDER_SOLID_LINE_DMA_NOTIFY 0x00000180 + +#define NV01_RENDER_SOLID_LINE_CLIP_RECTANGLE 0x00000184 + +#define NV01_RENDER_SOLID_LINE_PATTERN 0x00000188 + +#define NV04_RENDER_SOLID_LINE_PATTERN 0x00000188 + +#define NV01_RENDER_SOLID_LINE_ROP 0x0000018c + +#define NV01_RENDER_SOLID_LINE_BETA1 0x00000190 + +#define NV01_RENDER_SOLID_LINE_SURFACE_DST 0x00000194 + + +#define NV04_RENDER_SOLID_LINE_BETA4 0x00000194 + +#define NV04_RENDER_SOLID_LINE_SURFACE 0x00000198 + +#define NV01_RENDER_SOLID_LINE_OPERATION 0x000002fc +#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_RENDER_SOLID_LINE_OPERATION_ROP_AND 0x00000001 +#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_AND 0x00000002 +#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY 0x00000003 +#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT 0x00000300 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A8R8G8B8 0x00000003 + +#define NV01_RENDER_SOLID_LINE_COLOR 0x00000304 + +#define NV01_RENDER_SOLID_LINE_LINE_POINT0(i0) (0x00000400 + 0x8*(i0)) +#define NV01_RENDER_SOLID_LINE_LINE_POINT0__ESIZE 0x00000008 +#define NV01_RENDER_SOLID_LINE_LINE_POINT0__LEN 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X__MASK 0x0000ffff +#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X__SHIFT 0 +#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y__MASK 0xffff0000 +#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y__SHIFT 16 + +#define NV01_RENDER_SOLID_LINE_LINE_POINT1(i0) (0x00000404 + 0x8*(i0)) +#define NV01_RENDER_SOLID_LINE_LINE_POINT1__ESIZE 0x00000008 +#define NV01_RENDER_SOLID_LINE_LINE_POINT1__LEN 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X__MASK 0x0000ffff +#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X__SHIFT 0 +#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y__MASK 0xffff0000 +#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y__SHIFT 16 + +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X(i0) (0x00000480 + 0x10*(i0)) +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X__ESIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X__LEN 0x00000010 + +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y(i0) (0x00000484 + 0x10*(i0)) +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y__ESIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y__LEN 0x00000010 + +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X(i0) (0x00000488 + 0x10*(i0)) +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X__ESIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X__LEN 0x00000010 + +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y(i0) (0x0000048c + 0x10*(i0)) +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y__ESIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y__LEN 0x00000010 + +#define NV01_RENDER_SOLID_LINE_POLYLINE(i0) (0x00000500 + 0x4*(i0)) +#define NV01_RENDER_SOLID_LINE_POLYLINE__ESIZE 0x00000004 +#define NV01_RENDER_SOLID_LINE_POLYLINE__LEN 0x00000020 +#define NV01_RENDER_SOLID_LINE_POLYLINE_X__MASK 0x0000ffff +#define NV01_RENDER_SOLID_LINE_POLYLINE_X__SHIFT 0 +#define NV01_RENDER_SOLID_LINE_POLYLINE_Y__MASK 0xffff0000 +#define NV01_RENDER_SOLID_LINE_POLYLINE_Y__SHIFT 16 + +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X(i0) (0x00000580 + 0x8*(i0)) +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X__ESIZE 0x00000008 +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X__LEN 0x00000010 + +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y(i0) (0x00000584 + 0x8*(i0)) +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y__ESIZE 0x00000008 +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y__LEN 0x00000010 + +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR(i0) (0x00000600 + 0x8*(i0)) +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR__ESIZE 0x00000008 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR__LEN 0x00000010 + +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT(i0) (0x00000604 + 0x8*(i0)) +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT__ESIZE 0x00000008 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT__LEN 0x00000010 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X__MASK 0x0000ffff +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X__SHIFT 0 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y__MASK 0xffff0000 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y__SHIFT 16 + + +#define NV01_RENDER_SOLID_TRIANGLE_PATCH 0x0000010c + +#define NV01_RENDER_SOLID_TRIANGLE_DMA_NOTIFY 0x00000180 + +#define NV01_RENDER_SOLID_TRIANGLE_CLIP_RECTANGLE 0x00000184 + +#define NV01_RENDER_SOLID_TRIANGLE_PATTERN 0x00000188 + +#define NV04_RENDER_SOLID_TRIANGLE_PATTERN 0x00000188 + +#define NV01_RENDER_SOLID_TRIANGLE_ROP 0x0000018c + +#define NV01_RENDER_SOLID_TRIANGLE_BETA1 0x00000190 + +#define NV01_RENDER_SOLID_TRIANGLE_SURFACE_DST 0x00000194 + + +#define NV04_RENDER_SOLID_TRIANGLE_BETA4 0x00000194 + +#define NV04_RENDER_SOLID_TRIANGLE_SURFACE 0x00000198 + +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION 0x000002fc +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_ROP_AND 0x00000001 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_AND 0x00000002 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY 0x00000003 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV01_RENDER_SOLID_TRIANGLE_COLOR_FORMAT 0x00000300 +#define NV01_RENDER_SOLID_TRIANGLE_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV01_RENDER_SOLID_TRIANGLE_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV01_RENDER_SOLID_TRIANGLE_COLOR_FORMAT_A8R8G8B8 0x00000003 + +#define NV01_RENDER_SOLID_TRIANGLE_COLOR 0x00000304 + +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0 0x00000310 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X__MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X__SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y__MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y__SHIFT 16 + +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1 0x00000314 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X__MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X__SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y__MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y__SHIFT 16 + +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2 0x00000318 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X__MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X__SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y__MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y__SHIFT 16 + +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_X 0x00000320 + +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_Y 0x00000324 + +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_X 0x00000328 + +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_Y 0x0000032c + +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_X 0x00000330 + +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_Y 0x00000334 + +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH(i0) (0x00000400 + 0x4*(i0)) +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH__ESIZE 0x00000004 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH__LEN 0x00000020 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X__MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X__SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y__MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y__SHIFT 16 + +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X(i0) (0x00000480 + 0x8*(i0)) +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X__ESIZE 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X__LEN 0x00000010 + +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y(i0) (0x00000484 + 0x8*(i0)) +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y__ESIZE 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y__LEN 0x00000010 + +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR(i0) (0x00000500 + 0x10*(i0)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR__ESIZE 0x00000010 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR__LEN 0x00000008 + +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0(i0) (0x00000504 + 0x10*(i0)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0__ESIZE 0x00000010 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0__LEN 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X__MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X__SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y__MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y__SHIFT 16 + +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1(i0) (0x00000508 + 0x10*(i0)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1__ESIZE 0x00000010 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1__LEN 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X__MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X__SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y__MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y__SHIFT 16 + +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2(i0) (0x0000050c + 0x10*(i0)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2__ESIZE 0x00000010 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2__LEN 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X__MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X__SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y__MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y__SHIFT 16 + +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR(i0) (0x00000580 + 0x8*(i0)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR__ESIZE 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR__LEN 0x00000010 + +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT(i0) (0x00000584 + 0x8*(i0)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT__ESIZE 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT__LEN 0x00000010 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X__MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X__SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y__MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y__SHIFT 16 + + +#define NV01_RENDER_SOLID_RECTANGLE_PATCH 0x0000010c + +#define NV01_RENDER_SOLID_RECTANGLE_DMA_NOTIFY 0x00000180 + +#define NV01_RENDER_SOLID_RECTANGLE_CLIP_RECTANGLE 0x00000184 + +#define NV01_RENDER_SOLID_RECTANGLE_PATTERN 0x00000188 + +#define NV04_RENDER_SOLID_RECTANGLE_PATTERN 0x00000188 + +#define NV01_RENDER_SOLID_RECTANGLE_ROP 0x0000018c + +#define NV01_RENDER_SOLID_RECTANGLE_BETA1 0x00000190 + +#define NV01_RENDER_SOLID_RECTANGLE_SURFACE_DST 0x00000194 + + +#define NV04_RENDER_SOLID_RECTANGLE_BETA4 0x00000194 + +#define NV04_RENDER_SOLID_RECTANGLE_SURFACE 0x00000198 + +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION 0x000002fc +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_ROP_AND 0x00000001 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_AND 0x00000002 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY 0x00000003 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV01_RENDER_SOLID_RECTANGLE_COLOR_FORMAT 0x00000300 +#define NV01_RENDER_SOLID_RECTANGLE_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV01_RENDER_SOLID_RECTANGLE_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV01_RENDER_SOLID_RECTANGLE_COLOR_FORMAT_A8R8G8B8 0x00000003 + +#define NV01_RENDER_SOLID_RECTANGLE_COLOR 0x00000304 + +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT(i0) (0x00000400 + 0x8*(i0)) +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT__ESIZE 0x00000008 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT__LEN 0x00000010 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X__MASK 0x0000ffff +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X__SHIFT 0 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y__MASK 0xffff0000 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y__SHIFT 16 + +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE(i0) (0x00000404 + 0x8*(i0)) +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE__ESIZE 0x00000008 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE__LEN 0x00000010 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W__MASK 0x0000ffff +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W__SHIFT 0 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H__MASK 0xffff0000 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H__SHIFT 16 + + +#define NV01_IMAGE_BLIT_PATCH 0x0000010c + + +#define NV11_IMAGE_BLIT_WAIT_FOR_IDLE 0x00000108 + +#define NV11_IMAGE_BLIT_UNK120 0x00000120 + +#define NV11_IMAGE_BLIT_UNK124 0x00000124 + +#define NV11_IMAGE_BLIT_UNK128 0x00000128 + +#define NV11_IMAGE_BLIT_UNK12C 0x0000012c + +#define NV11_IMAGE_BLIT_UNK130 0x00000130 + +#define NV11_IMAGE_BLIT_UNK134 0x00000134 + +#define NV01_IMAGE_BLIT_DMA_NOTIFY 0x00000180 + +#define NV01_IMAGE_BLIT_COLOR_KEY 0x00000184 + +#define NV04_IMAGE_BLIT_COLOR_KEY 0x00000184 + +#define NV01_IMAGE_BLIT_CLIP_RECTANGLE 0x00000188 + +#define NV01_IMAGE_BLIT_PATTERN 0x0000018c + +#define NV04_IMAGE_BLIT_PATTERN 0x0000018c + +#define NV01_IMAGE_BLIT_ROP 0x00000190 + +#define NV01_IMAGE_BLIT_BETA1 0x00000194 + + +#define NV01_IMAGE_BLIT_SURFACE_SRC 0x00000198 + +#define NV01_IMAGE_BLIT_SURFACE_DST 0x0000019c + + +#define NV04_IMAGE_BLIT_BETA4 0x00000198 + +#define NV04_IMAGE_BLIT_SURFACES 0x0000019c + +#define NV01_IMAGE_BLIT_OPERATION 0x000002fc +#define NV01_IMAGE_BLIT_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_IMAGE_BLIT_OPERATION_ROP_AND 0x00000001 +#define NV01_IMAGE_BLIT_OPERATION_BLEND_AND 0x00000002 +#define NV01_IMAGE_BLIT_OPERATION_SRCCOPY 0x00000003 +#define NV01_IMAGE_BLIT_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_IMAGE_BLIT_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV01_IMAGE_BLIT_POINT_IN 0x00000300 +#define NV01_IMAGE_BLIT_POINT_IN_X__MASK 0x0000ffff +#define NV01_IMAGE_BLIT_POINT_IN_X__SHIFT 0 +#define NV01_IMAGE_BLIT_POINT_IN_Y__MASK 0xffff0000 +#define NV01_IMAGE_BLIT_POINT_IN_Y__SHIFT 16 + +#define NV01_IMAGE_BLIT_POINT_OUT 0x00000304 +#define NV01_IMAGE_BLIT_POINT_OUT_X__MASK 0x0000ffff +#define NV01_IMAGE_BLIT_POINT_OUT_X__SHIFT 0 +#define NV01_IMAGE_BLIT_POINT_OUT_Y__MASK 0xffff0000 +#define NV01_IMAGE_BLIT_POINT_OUT_Y__SHIFT 16 + +#define NV01_IMAGE_BLIT_SIZE 0x00000308 +#define NV01_IMAGE_BLIT_SIZE_W__MASK 0x0000ffff +#define NV01_IMAGE_BLIT_SIZE_W__SHIFT 0 +#define NV01_IMAGE_BLIT_SIZE_H__MASK 0xffff0000 +#define NV01_IMAGE_BLIT_SIZE_H__SHIFT 16 + + +#define NV04_INDEXED_IMAGE_FROM_CPU_PATCH 0x0000010c + +#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180 + +#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_LUT 0x00000184 + +#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR_KEY 0x00000188 + +#define NV04_INDEXED_IMAGE_FROM_CPU_CLIP_RECTANGLE 0x0000018c + +#define NV04_INDEXED_IMAGE_FROM_CPU_PATTERN 0x00000190 + +#define NV04_INDEXED_IMAGE_FROM_CPU_ROP 0x00000194 + +#define NV04_INDEXED_IMAGE_FROM_CPU_BETA1 0x00000198 + +#define NV04_INDEXED_IMAGE_FROM_CPU_BETA4 0x0000019c + +#define NV04_INDEXED_IMAGE_FROM_CPU_SURFACE 0x000001a0 + +#define NV05_INDEXED_IMAGE_FROM_CPU_SURFACE 0x000001a0 + +#define NV05_INDEXED_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000003e0 + +#define NV04_INDEXED_IMAGE_FROM_CPU_OPERATION 0x000003e4 + +#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR_FORMAT 0x000003e8 + +#define NV04_INDEXED_IMAGE_FROM_CPU_INDEX_FORMAT 0x000003ec + +#define NV04_INDEXED_IMAGE_FROM_CPU_LUT_OFFSET 0x000003f0 + +#define NV04_INDEXED_IMAGE_FROM_CPU_POINT 0x000003f4 + +#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_OUT 0x000003f8 + +#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_IN 0x000003fc + +#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR(i0) (0x00000400 + 0x4*(i0)) +#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR__ESIZE 0x00000004 +#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR__LEN 0x00000700 + + +#define NV10_IMAGE_FROM_CPU_WAIT_FOR_IDLE 0x00000108 + +#define NV01_IMAGE_FROM_CPU_PATCH 0x0000010c + +#define NV01_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180 + +#define NV01_IMAGE_FROM_CPU_COLOR_KEY 0x00000184 + +#define NV04_IMAGE_FROM_CPU_COLOR_KEY 0x00000184 + +#define NV01_IMAGE_FROM_CPU_CLIP_RECTANGLE 0x00000188 + +#define NV01_IMAGE_FROM_CPU_PATTERN 0x0000018c + +#define NV04_IMAGE_FROM_CPU_PATTERN 0x0000018c + +#define NV01_IMAGE_FROM_CPU_ROP 0x00000190 + +#define NV01_IMAGE_FROM_CPU_BETA1 0x00000194 + + +#define NV01_IMAGE_FROM_CPU_SURFACE_DST 0x00000198 + + +#define NV04_IMAGE_FROM_CPU_BETA4 0x00000198 + +#define NV04_IMAGE_FROM_CPU_SURFACE 0x0000019c + +#define NV05_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000002f8 + +#define NV01_IMAGE_FROM_CPU_OPERATION 0x000002fc +#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_IMAGE_FROM_CPU_OPERATION_ROP_AND 0x00000001 +#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_AND 0x00000002 +#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY 0x00000003 +#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT 0x00000300 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_R5G6G5 0x00000001 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A1R5G5B5 0x00000002 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X1R5G5B5 0x00000003 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A8R8G8B8 0x00000004 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X8R8G8B8 0x00000005 + +#define NV01_IMAGE_FROM_CPU_POINT 0x00000304 +#define NV01_IMAGE_FROM_CPU_POINT_X__MASK 0x0000ffff +#define NV01_IMAGE_FROM_CPU_POINT_X__SHIFT 0 +#define NV01_IMAGE_FROM_CPU_POINT_Y__MASK 0xffff0000 +#define NV01_IMAGE_FROM_CPU_POINT_Y__SHIFT 16 + +#define NV01_IMAGE_FROM_CPU_SIZE_OUT 0x00000308 +#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W__MASK 0x0000ffff +#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W__SHIFT 0 +#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H__MASK 0xffff0000 +#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H__SHIFT 16 + +#define NV01_IMAGE_FROM_CPU_SIZE_IN 0x0000030c +#define NV01_IMAGE_FROM_CPU_SIZE_IN_W__MASK 0x0000ffff +#define NV01_IMAGE_FROM_CPU_SIZE_IN_W__SHIFT 0 +#define NV01_IMAGE_FROM_CPU_SIZE_IN_H__MASK 0xffff0000 +#define NV01_IMAGE_FROM_CPU_SIZE_IN_H__SHIFT 16 + +#define NV01_IMAGE_FROM_CPU_COLOR(i0) (0x00000400 + 0x4*(i0)) +#define NV01_IMAGE_FROM_CPU_COLOR__ESIZE 0x00000004 +#define NV01_IMAGE_FROM_CPU_COLOR__LEN 0x00000020 + +#define NV04_IMAGE_FROM_CPU_COLOR(i0) (0x00000400 + 0x4*(i0)) +#define NV04_IMAGE_FROM_CPU_COLOR__ESIZE 0x00000004 +#define NV04_IMAGE_FROM_CPU_COLOR__LEN 0x00000700 + + +#define NV03_STRETCHED_IMAGE_FROM_CPU_PATCH 0x0000010c + +#define NV03_STRETCHED_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180 + +#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_KEY 0x00000184 + +#define NV04_STRETCHED_IMAGE_FROM_CPU_COLOR_KEY 0x00000184 + +#define NV03_STRETCHED_IMAGE_FROM_CPU_PATTERN 0x00000188 + +#define NV04_STRETCHED_IMAGE_FROM_CPU_PATTERN 0x00000188 + +#define NV03_STRETCHED_IMAGE_FROM_CPU_ROP 0x0000018c + +#define NV03_STRETCHED_IMAGE_FROM_CPU_BETA1 0x00000190 + + +#define NV03_STRETCHED_IMAGE_FROM_CPU_SURFACE_DST 0x00000194 + + +#define NV04_STRETCHED_IMAGE_FROM_CPU_BETA4 0x00000194 + +#define NV04_STRETCHED_IMAGE_FROM_CPU_SURFACE 0x00000198 + +#define NV05_STRETCHED_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000002f8 + +#define NV03_STRETCHED_IMAGE_FROM_CPU_OPERATION 0x000002fc + +#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_FORMAT 0x00000300 + +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN 0x00000304 +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W__MASK 0x0000ffff +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W__SHIFT 0 +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H__MASK 0xffff0000 +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H__SHIFT 16 + +#define NV03_STRETCHED_IMAGE_FROM_CPU_DX_DU 0x00000308 + +#define NV03_STRETCHED_IMAGE_FROM_CPU_DY_DV 0x0000030c + +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT 0x00000310 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X__MASK 0x0000ffff +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X__SHIFT 0 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y__MASK 0xffff0000 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y__SHIFT 16 + +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE 0x00000314 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W__MASK 0x0000ffff +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W__SHIFT 0 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H__MASK 0xffff0000 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H__SHIFT 16 + +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4 0x00000318 +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X__MASK 0x0000ffff +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X__SHIFT 0 +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y__MASK 0xffff0000 +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y__SHIFT 16 + +#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR(i0) (0x00000400 + 0x4*(i0)) +#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR__ESIZE 0x00000004 +#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR__LEN 0x00000700 + + +#define NV10_SCALED_IMAGE_FROM_MEMORY_WAIT_FOR_IDLE 0x00000108 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_NOTIFY 0x00000180 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE 0x00000184 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_PATTERN 0x00000188 + +#define NV04_SCALED_IMAGE_FROM_MEMORY_PATTERN 0x00000188 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_ROP 0x0000018c + +#define NV03_SCALED_IMAGE_FROM_MEMORY_BETA1 0x00000190 + + +#define NV03_SCALED_IMAGE_FROM_MEMORY_SURFACE_DST 0x00000194 + + +#define NV04_SCALED_IMAGE_FROM_MEMORY_BETA4 0x00000194 + +#define NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE 0x00000198 + +#define NV05_SCALED_IMAGE_FROM_MEMORY_SURFACE 0x00000198 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION 0x000002fc +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_DITHER 0x00000000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE 0x00000001 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_SUBTR_TRUNCATE 0x00000002 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT 0x00000300 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5 0x00000001 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5 0x00000002 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8 0x00000003 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8 0x00000004 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_V8YB8U8YA8 0x00000005 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_YB8V8YA8U8 0x00000006 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5 0x00000007 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8 0x00000008 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8 0x00000009 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION 0x00000304 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_AND 0x00000000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_ROP_AND 0x00000001 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_AND 0x00000002 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY 0x00000003 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT 0x00000308 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X__MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X__SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y__MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y__SHIFT 16 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE 0x0000030c +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W__MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W__SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H__MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H__SHIFT 16 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT 0x00000310 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X__MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X__SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y__MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y__SHIFT 16 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE 0x00000314 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W__MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W__SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H__MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H__SHIFT 16 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_DU_DX 0x00000318 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_DV_DY 0x0000031c + +#define NV03_SCALED_IMAGE_FROM_MEMORY_SIZE 0x00000400 +#define NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_W__MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_W__SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H__MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H__SHIFT 16 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT 0x00000404 +#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH__MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH__SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN__MASK 0x00ff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN__SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER 0x00010000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CORNER 0x00020000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER__MASK 0xff000000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER__SHIFT 24 +#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE 0x00000000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_BILINEAR 0x01000000 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_OFFSET 0x00000408 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_POINT 0x0000040c +#define NV03_SCALED_IMAGE_FROM_MEMORY_POINT_U__MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_POINT_U__SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_POINT_V__MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_POINT_V__SHIFT 16 + + +#define NV50_SCALED_IMAGE_FROM_MEMORY_OFFSET_HIGH 0x00000410 + +#define NV50_SCALED_IMAGE_FROM_MEMORY_SRC_LINEAR 0x00000414 + +#define NV50_SCALED_IMAGE_FROM_MEMORY_SRC_TILE_MODE 0x00000418 + + +#define NV03_GDI_RECTANGLE_TEXT_DMA_NOTIFY 0x00000180 + +#define NV03_GDI_RECTANGLE_TEXT_PATTERN 0x00000184 + +#define NV03_GDI_RECTANGLE_TEXT_ROP 0x00000188 + +#define NV03_GDI_RECTANGLE_TEXT_BETA1 0x0000019c + +#define NV03_GDI_RECTANGLE_TEXT_SURFACE_DST 0x00000190 + +#define NV03_GDI_RECTANGLE_TEXT_OPERATION 0x000002fc + +#define NV03_GDI_RECTANGLE_TEXT_COLOR_FORMAT 0x00000300 + +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT 0x00000304 + +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_A 0x000003fc + +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT 0x00000400 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE 0x00000404 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B 0x000007f4 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B 0x000007f8 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_B 0x000007fc + +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0 0x00000800 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1 0x00000804 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0 0x00000bec +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1 0x00000bf0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_C 0x00000bf4 + +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C 0x00000bf8 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_POINT_C 0x00000bfc +#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(i0) (0x00000c00 + 0x4*(i0)) +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__ESIZE 0x00000004 +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__LEN 0x00000020 + +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0 0x00000fe8 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1 0x00000fec +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_D 0x00000ff0 + +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D 0x00000ff4 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D 0x00000ff8 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_POINT_D 0x00000ffc +#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D(i0) (0x00001000 + 0x4*(i0)) +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D__ESIZE 0x00000004 +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D__LEN 0x00000020 + +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0 0x000013e4 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1 0x000013e8 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_COLOR0_E 0x000013ec + +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_E 0x000013f0 + +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E 0x000013f4 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E 0x000013f8 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_POINT_E 0x000013fc +#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X__MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X__SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y__MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y__SHIFT 16 + +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(i0) (0x00001400 + 0x4*(i0)) +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__ESIZE 0x00000004 +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__LEN 0x00000020 + + +#define NV04_GDI_RECTANGLE_TEXT_PATCH 0x0000010c + +#define NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY 0x00000180 + +#define NV04_GDI_RECTANGLE_TEXT_DMA_FONTS 0x00000184 + +#define NV04_GDI_RECTANGLE_TEXT_PATTERN 0x00000188 + +#define NV04_GDI_RECTANGLE_TEXT_ROP 0x0000018c + +#define NV04_GDI_RECTANGLE_TEXT_BETA1 0x00000190 + +#define NV04_GDI_RECTANGLE_TEXT_BETA4 0x00000194 + +#define NV04_GDI_RECTANGLE_TEXT_SURFACE 0x00000198 + +#define NV04_GDI_RECTANGLE_TEXT_OPERATION 0x000002fc +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_AND 0x00000000 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_ROP_AND 0x00000001 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_AND 0x00000002 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY 0x00000003 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_PREMULT 0x00000005 + +#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT 0x00000300 +#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8 0x00000003 + +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT 0x00000304 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_CGA6 0x00000001 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE 0x00000002 + +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_A 0x000003fc + +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(i0) (0x00000400 + 0x8*(i0)) +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT__ESIZE 0x00000008 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT__LEN 0x00000020 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE(i0) (0x00000404 + 0x8*(i0)) +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE__ESIZE 0x00000008 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE__LEN 0x00000020 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0 0x000005f4 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1 0x000005f8 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_B 0x000005fc + +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0(i0) (0x00000600 + 0x8*(i0)) +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0__ESIZE 0x00000008 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0__LEN 0x00000020 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1(i0) (0x00000604 + 0x8*(i0)) +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1__ESIZE 0x00000008 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1__LEN 0x00000020 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0 0x000007ec +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1 0x000007f0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_C 0x000007f4 + +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C 0x000007f8 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_POINT_C 0x000007fc +#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(i0) (0x00000800 + 0x4*(i0)) +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__ESIZE 0x00000004 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__LEN 0x00000080 + +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0 0x00000be4 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1 0x00000be8 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_COLOR0_E 0x00000bec + +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_E 0x00000bf0 + +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E 0x00000bf4 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E 0x00000bf8 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_POINT_E 0x00000bfc +#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(i0) (0x00000c00 + 0x4*(i0)) +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__ESIZE 0x00000004 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__LEN 0x00000080 + +#define NV04_GDI_RECTANGLE_TEXT_FONT_F 0x00000ff0 +#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET__MASK 0x0fffffff +#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH__MASK 0xf0000000 +#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH__SHIFT 28 + +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0 0x00000ff4 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1 0x00000ff8 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_F 0x00000ffc + +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F(i0) (0x00001000 + 0x4*(i0)) +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F__ESIZE 0x00000004 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F__LEN 0x00000100 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX__MASK 0x000000ff +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X__MASK 0x000fff00 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X__SHIFT 8 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y__MASK 0xfff00000 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y__SHIFT 20 + +#define NV04_GDI_RECTANGLE_TEXT_FONT_G 0x000017f0 +#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET__MASK 0x0fffffff +#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH__MASK 0xf0000000 +#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH__SHIFT 28 + +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0 0x000017f4 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1 0x000017f8 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_G 0x000017fc + +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT(i0) (0x00001800 + 0x8*(i0)) +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT__ESIZE 0x00000008 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT__LEN 0x00000100 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X__MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X__SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y__MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y__SHIFT 16 + +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX(i0) (0x00001804 + 0x8*(i0)) +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX__ESIZE 0x00000008 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX__LEN 0x00000100 + + +#define NV10_TEXTURE_FROM_CPU_WAIT_FOR_IDLE 0x00000108 + +#define NV10_TEXTURE_FROM_CPU_DMA_NOTIFY 0x00000180 + +#define NV10_TEXTURE_FROM_CPU_SURFACE 0x00000184 + +#define NV10_TEXTURE_FROM_CPU_COLOR_FORMAT 0x00000300 + +#define NV10_TEXTURE_FROM_CPU_POINT 0x00000304 +#define NV10_TEXTURE_FROM_CPU_POINT_X__MASK 0x0000ffff +#define NV10_TEXTURE_FROM_CPU_POINT_X__SHIFT 0 +#define NV10_TEXTURE_FROM_CPU_POINT_Y__MASK 0xffff0000 +#define NV10_TEXTURE_FROM_CPU_POINT_Y__SHIFT 16 + +#define NV10_TEXTURE_FROM_CPU_SIZE 0x00000308 +#define NV10_TEXTURE_FROM_CPU_SIZE_W__MASK 0x0000ffff +#define NV10_TEXTURE_FROM_CPU_SIZE_W__SHIFT 0 +#define NV10_TEXTURE_FROM_CPU_SIZE_H__MASK 0xffff0000 +#define NV10_TEXTURE_FROM_CPU_SIZE_H__SHIFT 16 + +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL 0x0000030c +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X__MASK 0x0000ffff +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X__SHIFT 0 +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W__MASK 0xffff0000 +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W__SHIFT 16 + +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL 0x00000310 +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y__MASK 0x0000ffff +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y__SHIFT 0 +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H__MASK 0xffff0000 +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H__SHIFT 16 + +#define NV10_TEXTURE_FROM_CPU_COLOR(i0) (0x00000400 + 0x4*(i0)) +#define NV10_TEXTURE_FROM_CPU_COLOR__ESIZE 0x00000004 +#define NV10_TEXTURE_FROM_CPU_COLOR__LEN 0x00000700 + + +#endif /* NV01_2D_XML */ diff --git a/src/gallium/drivers/nvfx/nv04_2d.c b/src/gallium/drivers/nvfx/nv04_2d.c new file mode 100644 index 00000000000..e0e65e7a87f --- /dev/null +++ b/src/gallium/drivers/nvfx/nv04_2d.c @@ -0,0 +1,1393 @@ +/************************************************************************** + * + * Copyright 2009 Ben Skeggs + * Copyright 2009 Younes Manton + * Copyright 2010 Luca Barbieri + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +/* this code has no Mesa or Gallium dependency and can be reused in the classic Mesa driver or DDX */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <nouveau/nouveau_device.h> +#include <nouveau/nouveau_pushbuf.h> +#include <nouveau/nouveau_channel.h> +#include <nouveau/nouveau_bo.h> +#include <nouveau/nouveau_notifier.h> +#include <nouveau/nouveau_grobj.h> +#include "nv04_2d.h" + +#include "nouveau/nv_object.xml.h" +#include "nouveau/nv_m2mf.xml.h" +#include "nv01_2d.xml.h" + +/* avoid depending on Mesa/Gallium */ +#ifdef __GNUC__ +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define likely(x) !!(x) +#define unlikely(x) !!(x) +#endif + +#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) +#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) + +struct nv04_2d_context +{ + struct nouveau_notifier *ntfy; + struct nouveau_grobj *surf2d; + struct nouveau_grobj *swzsurf; + struct nouveau_grobj *m2mf; + struct nouveau_grobj *rect; + struct nouveau_grobj *sifm; + struct nouveau_grobj *blit; +}; + +static inline int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + +static inline int +util_is_pot(unsigned x) +{ + return (x & (x - 1)) == 0; +} + +/* Integer base-2 logarithm, rounded towards zero. */ +static inline unsigned log2i(unsigned i) +{ + unsigned r = 0; + + if (i & 0xffff0000) { + i >>= 16; + r += 16; + } + if (i & 0x0000ff00) { + i >>= 8; + r += 8; + } + if (i & 0x000000f0) { + i >>= 4; + r += 4; + } + if (i & 0x0000000c) { + i >>= 2; + r += 2; + } + if (i & 0x00000002) { + r += 1; + } + return r; +} + +//#define NV04_REGION_DEBUG + +// Yes, we really want to inline everything, since all the functions are used only once +#if defined(__GNUC__) && !defined(DEBUG) +#define inline __attribute__((always_inline)) inline +#endif + +static inline unsigned +nv04_swizzle_bits_square(unsigned x, unsigned y) +{ + unsigned u = (x & 0x001) << 0 | + (x & 0x002) << 1 | + (x & 0x004) << 2 | + (x & 0x008) << 3 | + (x & 0x010) << 4 | + (x & 0x020) << 5 | + (x & 0x040) << 6 | + (x & 0x080) << 7 | + (x & 0x100) << 8 | + (x & 0x200) << 9 | + (x & 0x400) << 10 | + (x & 0x800) << 11; + + unsigned v = (y & 0x001) << 1 | + (y & 0x002) << 2 | + (y & 0x004) << 3 | + (y & 0x008) << 4 | + (y & 0x010) << 5 | + (y & 0x020) << 6 | + (y & 0x040) << 7 | + (y & 0x080) << 8 | + (y & 0x100) << 9 | + (y & 0x200) << 10 | + (y & 0x400) << 11 | + (y & 0x800) << 12; + return v | u; +} + +/* rectangular swizzled textures are linear concatenations of swizzled square tiles */ +static inline unsigned +nv04_swizzle_bits_2d(unsigned x, unsigned y, unsigned w, unsigned h) +{ + if(h <= 1) + return x; + else + { + unsigned s = MIN2(w, h); + unsigned m = s - 1; + return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m); + } +} + +// general 3D texture case +static inline unsigned +nv04_swizzle_bits(unsigned x, unsigned y, unsigned z, unsigned w, unsigned h, unsigned d) +{ + if(d <= 1) + return nv04_swizzle_bits_2d(x, y, w, h); + else + { + // TODO: autogenerate code for all possible texture sizes (13 * 13 * 13 with dims <= 4096) and do a single indirect call + unsigned v = 0; + w >>= 1; + h >>= 1; + d >>= 1; + for(int i = 0;;) + { + int oldi = i; + if(likely(w)) + { + v |= (x & 1) << i; + x >>= 1; + w >>= 1; + ++i; + } + + if(likely(h)) + { + v |= (y & 1) << i; + y >>= 1; + h >>= 1; + ++i; + } + + if(likely(d)) + { + v |= (z & 1) << i; + z >>= 1; + d >>= 1; + ++i; + } + + if(i == oldi) + break; + } + return v; + } +} + +unsigned +nv04_region_begin(struct nv04_region* rgn, unsigned w, unsigned h) +{ + if(rgn->pitch) + return rgn->pitch * rgn->y + (rgn->x << rgn->bpps); + else + return nv04_swizzle_bits(rgn->x, rgn->y, rgn->z, rgn->w, rgn->h, rgn->d) << rgn->bpps; +} + +unsigned +nv04_region_end(struct nv04_region* rgn, unsigned w, unsigned h) +{ + if(rgn->pitch) + return rgn->pitch * (rgn->y + h - 1) + ((rgn->x + w) << rgn->bpps); + else + return (nv04_swizzle_bits(rgn->x + w - 1, rgn->y + h - 1, rgn->z, rgn->w, rgn->h, rgn->d) + 1) << rgn->bpps; +} + +// *pitch = -1 -> use 3D swizzling for (x, y), *pitch = 0 -> use 2D swizzling, other *pitch -> use linear calculations +// returns 2 if pixel order is 3D-swizzled and 1 if subrect is 2D-swizzled +/* *pitch == -1 ret = 0 -> 3D swizzled subrect + * *pitch == 0 ret = 0 -> 2D swizzled subrect + * *pitch > 0 ret = 0 -> linear subrect + * *pitch > 0 ret = 1 -> linear subrect, but with swizzled 3D data inside + */ + +static inline void +nv04_region_print(struct nv04_region* rgn) +{ + fprintf(stderr, "<%i[%i]> ", rgn->bo->handle, rgn->offset); + if(rgn->pitch) + fprintf(stderr, "lin %i", rgn->pitch); + else + fprintf(stderr, "swz %ix%ix%i", rgn->w, rgn->h, rgn->d); + fprintf(stderr, " (%i, %i, %i)", rgn->x, rgn->y, rgn->z); +} + +static inline void +nv04_region_assert(struct nv04_region* rgn, unsigned w, unsigned h) +{ + unsigned end = rgn->offset + nv04_region_end(rgn, w, h); + + assert(rgn->offset <= (int)rgn->bo->size); + assert(end <= rgn->bo->size); + (void) end; + if(!rgn->pitch) { + assert(util_is_pot(rgn->w)); + assert(util_is_pot(rgn->h)); + } +} + +/* determine if region can be linearized or fake-linearized */ +static inline int +nv04_region_is_contiguous(struct nv04_region* rgn, int w, int h) +{ + int surf_min; + int rect_min; + + if(rgn->pitch) + return rgn->pitch == w << rgn->bpps; + + // redundant, but this is the fast path for the common case + if(w == rgn->w && h == rgn->h && rgn->d <= 1) + return 1; + + // must be POT + if((w & (w - 1)) || (h & (h - 1))) + return 0; + + // must be aligned + if((rgn->x & (w - 1)) || (rgn->y & (h - 1))) + return 0; + + if(rgn->d > 1) + return 0; + + surf_min = MIN2(rgn->w, rgn->h); + rect_min = MIN2(w, h); + + if((rect_min == surf_min) || (w == h) || (w == 2 * h)) + return 1; + + return 0; +} + +// double the pitch until it is larger than the alignment, or the height becomes odd or 1 +static inline void +nv04_region_contiguous_shape(struct nv04_region* rgn, int* w, int* h, int align) +{ + while(!(*h & 1) && (*w << rgn->bpps) < (1 << align)) + { + *w <<= 1; + *h >>= 1; + } + + while((*w << rgn->bpps) > 16384 && !(*w & 1)) + { + *w >>= 1; + *h <<= 1; + } + +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tCONTIGUOUS %ix%i\n", *w, *h); +#endif +} + +static inline void +nv04_region_linearize_contiguous(struct nv04_region* rgn, unsigned w, unsigned h) +{ + int pos; + if(rgn->pitch) + { + rgn->offset += rgn->y * rgn->pitch + (rgn->x << rgn->bpps); + rgn->x = 0; + rgn->y = 0; + } + else + { + rgn->offset += (rgn->w * rgn->h * rgn->z) << rgn->bpps; + pos = nv04_swizzle_bits(rgn->x, rgn->y, rgn->z, rgn->w, rgn->h, rgn->d); + rgn->x = pos & (w - 1); + rgn->y = pos / w; + } + rgn->pitch = w << rgn->bpps; + +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tLINEARIZE "); + nv04_region_print(rgn); + fprintf(stderr, "\n"); +#endif +} + + /* preserve the offset! */ + /* + rgn->pitch = util_format_get_stride(rgn->format, w); + int pos = nv04_swizzle_bits(rgn->x, rgn->y, rgn->z, rgn->w, rgn->h, rgn->d); + rgn->x = pos & (w - 1); + rgn->y = pos & ~(w - 1); + */ + + /* + rgn->offset += + rgn->pitch = util_format_get_stride(rgn->format, w); + rgn->x = 0; + rgn->y = 0; + */ + +/* This code will get used for, and always succeed on: + * - 4x2 1bpp swizzled texture mipmap levels + * - linear regions created by linearization + * + * This code will get used for, and MAY work for: + * - misaligned texture blanket + * - linear surfaces created without wide_pitch (in this case, it will only work if we are lucky) + * + * The general case requires splitting the region in 2. + */ +static inline int +nv04_region_do_align_offset(struct nv04_region* rgn, unsigned w, unsigned h, int shift) +{ + if(rgn->pitch > 0) + { + assert(!(rgn->offset & ((1 << rgn->bpps) - 1))); // fatal! + + if(h <= 1) + { + int delta; + rgn->offset += rgn->y * rgn->pitch + (rgn->x << rgn->bpps); + delta = rgn->offset & ((1 << shift) - 1); + rgn->y = 0; + rgn->x = delta >> rgn->bpps; + rgn->offset -= delta; + rgn->pitch = align((rgn->x + w) << rgn->bpps, 1 << shift); + } + else + { + int delta = rgn->offset & ((1 << shift) - 1); + int newxo = (rgn->x << rgn->bpps) + delta; + int dy = newxo / rgn->pitch; + newxo -= dy * rgn->pitch; + if((newxo + (w << rgn->bpps)) > rgn->pitch) + { + // TODO: split the region into two rectangles (!) if *really* necessary, unless the hardware actually supports "wrapping" rectangles + // this does not happen if the surface is pitch-aligned, which it should always be + assert(0); + return -1; + } + rgn->x = newxo >> rgn->bpps; + rgn->y += dy; + } + } + else + { + int size; + int min; + int v; + + // we don't care about the alignment of 3D surfaces since the 2D engine can't use them + if(rgn->d < 0) + return -1; + + min = MIN2(rgn->w, rgn->h); + size = min * min << rgn->bpps; + + // this is unfixable, and should not be happening + if(rgn->offset & (size - 1)) + return -1; + + v = (rgn->offset & ((1 << shift) - 1)) / size; + rgn->offset -= v * size; + + if(rgn->h == min) + { + unsigned w; + rgn->x += rgn->h * v; + w = rgn->w + rgn->h * v; + + while(rgn->w < w) + rgn->w += rgn->w; + } + else + { + unsigned h; + rgn->y += rgn->w * v; + h = rgn->h + rgn->w * v; + + while(rgn->h < h) + rgn->h += rgn->h; + } + } + +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tALIGNED "); + nv04_region_print(rgn); + fprintf(stderr, "\n"); +#endif + return 0; +} + +// both pitch and shift +// will leave the region unchanged if it fails +static inline int +nv04_region_align(struct nv04_region* rgn, unsigned w, unsigned h, int shift) +{ + if(rgn->pitch & ((1 << shift) - 1)) + { + if(h == 1) + goto do_align; /* this will fix pitch too in this case */ + else + return -1; + } + + if(rgn->offset & ((1 << shift) - 1)) + { + do_align: + if(nv04_region_do_align_offset(rgn, w, h, shift)) + return -1; + } + return 0; +} + +/* this contains 22 different copy loops after preprocessing. unfortunately, it's necessary */ +void +nv04_region_copy_cpu(struct nv04_region* dst, struct nv04_region* src, int w, int h) +{ + uint8_t* mdst; + uint8_t* msrc; + int size; + + if(dst->bo != src->bo) + { + nouveau_bo_map(dst->bo, NOUVEAU_BO_WR); + nouveau_bo_map(src->bo, NOUVEAU_BO_RD); + } + else + nouveau_bo_map(dst->bo, NOUVEAU_BO_WR | NOUVEAU_BO_RD); + + mdst = (uint8_t*)dst->bo->map + dst->offset; + msrc = (uint8_t*)src->bo->map + src->offset; + + size = w << dst->bpps; + + nv04_region_assert(dst, w, h); + nv04_region_assert(src, w, h); + +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tRGN_COPY_CPU [%i, %i: %i] ", w, h, dst->bpps); + for(int i = 0; i < 2; ++i) + { + nv04_region_print(i ? src : dst); + fprintf(stderr, i ? "\n" : " <- "); + } + +// for(int i = 0; i < 16; ++i) +// fprintf(stderr, "%02x ", msrc[i]); +// fprintf(stderr, "\n"); +#endif + + // TODO: support overlapping copies! + if(src->pitch && dst->pitch) + { + mdst += dst->y * dst->pitch + (dst->x << dst->bpps); + msrc += src->y * src->pitch + (src->x << src->bpps); + if(dst->bo != src->bo) + goto simple; + else if(mdst < msrc) + { + if(mdst + size <= msrc) + { +simple: + for(int iy = 0; iy < h; ++iy) + { + assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size); + assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size); + memcpy(mdst, msrc, size); + msrc += src->pitch; mdst += dst->pitch; + } + } + else + { + for(int iy = 0; iy < h; ++iy) + { + assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size); + assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size); + memmove(mdst, msrc, size); + msrc += src->pitch; mdst += dst->pitch; + } + } + } + else + { + /* copy backwards so we don't destroy data we have to read yet */ + if(msrc + size <= mdst) + { + for(int iy = h - 1; iy >= 0; --iy) + { + assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size); + assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size); + memcpy(mdst, msrc, size); + msrc += src->pitch; mdst += dst->pitch; + } + } + else + { + for(int iy = h - 1; iy >= 0; --iy) + { + assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size); + assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size); + memmove(mdst, msrc, size); + msrc += src->pitch; mdst += dst->pitch; + } + } + } + } + else + { + int* dswx = NULL; + int* dswy = NULL; + int* sswx = NULL; + int* sswy = NULL; + int dir; + + if(!dst->pitch) + { + dswx = alloca(w * sizeof(int)); + for(int ix = 0; ix < w; ++ix) // we are adding, so z cannot be contributed by both + dswx[ix] = nv04_swizzle_bits(dst->x + ix, 0, 0, dst->w, dst->h, dst->d); + dswy = alloca(h * sizeof(int)); + for(int iy = 0; iy < h; ++iy) + dswy[iy] = nv04_swizzle_bits(0, dst->y + iy, dst->z, dst->w, dst->h, dst->d); + } + + if(!src->pitch) + { + sswx = alloca(w * sizeof(int)); + for(int ix = 0; ix < w; ++ix) + sswx[ix] = nv04_swizzle_bits(src->x + ix, 0, 0, src->w, src->h, src->d); + sswy = alloca(h * sizeof(int)); + for(int iy = 0; iy < h; ++iy) + sswy[iy] = nv04_swizzle_bits(0, src->y + iy, src->z, src->w, src->h, src->d); + } + + dir = 1; + /* do backwards copies for overlapping swizzled surfaces */ + if(dst->pitch == src->pitch && dst->offset == src->offset) + { + if(dst->y > src->y || (dst->y == src->y && dst->x > src->x)) + dir = -1; + } + +#define SWIZZLED_COPY_LOOPS + if(dir == 1) + { + int dir = 1; +#define LOOP_Y for(int iy = 0; iy < h; ++iy) +#define LOOP_X for(int ix = 0; ix < w; ++ix) +#include "nv04_2d_loops.h" +#undef LOOP_X +#undef LOOP_Y + } + else + { + int dir = -1; +#define LOOP_Y for(int iy = h - 1; iy >= 0; --iy) +#define LOOP_X for(int ix = w - 1; ix >= 0; --ix) +#include "nv04_2d_loops.h" +#undef LOOP_X +#undef LOOP_Y + } +#undef SWIZZLED_COPY_LOOP + } + + if(src->bo != dst->bo) + nouveau_bo_unmap(src->bo); + nouveau_bo_unmap(dst->bo); +} + +/* TODO: if the destination is swizzled, we are doing random writes, which causes write combining to fail + * the alternative is to read, modify and copy back, which may or may not be faster + * loading 3D textures is a common case that hits this and could probably benefit from the temporary + */ +void +nv04_region_fill_cpu(struct nv04_region* dst, int w, int h, unsigned value) +{ + uint8_t* mdst = (nouveau_bo_map(dst->bo, NOUVEAU_BO_WR), (uint8_t*)dst->bo->map + dst->offset); + +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tRGN_FILL_CPU "); + nv04_region_print(dst); + fprintf(stderr, "\n"); +#endif + + nv04_region_assert(dst, w, h); + + if(dst->pitch) + { + unsigned size = w << dst->bpps; + +#define FILL(T) do { \ + for(int iy = 0; iy < h; ++iy) \ + { \ + assert((char*)((T*)mdst + w) <= (char*)dst->bo->map + dst->bo->size); \ + for(int ix = 0; ix < w; ++ix) \ + ((T*)mdst)[ix] = (T)value; \ + mdst += dst->pitch; \ + } \ + } while(0) + + mdst += dst->y * dst->pitch + (dst->x << dst->bpps); + + if(dst->bpps == 0) + { +ms: + assert(mdst + size * h <= (uint8_t*)dst->bo->map + dst->bo->size); + if(size == dst->pitch) + memset(mdst, (uint8_t)value, size * h); + else + { + for(int iy = 0; iy < h; ++iy) + { + assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size); + memset(mdst, (uint8_t)value, size); + mdst += dst->pitch; + } + } + } + else if(dst->bpps == 1) + { + if(!((uint8_t)value ^ (uint8_t)(value >> 8))) + goto ms; + + FILL(uint16_t); + } + else if(dst->bpps == 2) + { + if(value == (uint8_t)value * 0x1010101) + goto ms; + FILL(uint32_t); + } + else + assert(0); +#undef FILL + } + else + { + int* dswx; + int* dswy; + + dswx = alloca(w * sizeof(int)); + for(int ix = 0; ix < w; ++ix) + dswx[ix] = nv04_swizzle_bits(dst->x + ix, 0, dst->z, dst->w, dst->h, dst->d); + dswy = alloca(h * sizeof(int)); + for(int iy = 0; iy < h; ++iy) + dswy[iy] = nv04_swizzle_bits(0, dst->y + iy, dst->z, dst->w, dst->h, dst->d); + +#define FILL(T) do { \ + T tvalue = (T)value; \ + for(int iy = 0; iy < h; ++iy) \ + { \ + T* pdst = (T*)mdst + dswy[iy]; \ + for(int ix = 0; ix < w; ++ix) \ + { \ + assert((uint8_t*)&pdst[dswx[ix] + 1] <= (uint8_t*)dst->bo->map + dst->bo->size); \ + pdst[dswx[ix]] = tvalue; \ + } \ + } \ + } while(0) + + if(dst->bpps == 0) + FILL(uint8_t); + else if(dst->bpps == 1) + FILL(uint16_t); + else if(dst->bpps == 2) + FILL(uint32_t); + else + assert(0 && "unhandled bpp"); +#undef FILL + } + + nouveau_bo_unmap(dst->bo); +} + +static inline int +nv04_region_cs2d_format(struct nv04_region* rgn) +{ + switch(rgn->bpps) { + case 0: + return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; + case 1: + if(rgn->one_bits >= 1) + return NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_X1R5G5B5; + else + return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; + case 2: + if(rgn->one_bits >= 8) + return NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_X8R8G8B8; + else + return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8; + default: + return -1; + } +} + +static inline int +nv04_region_sifm_format(struct nv04_region* rgn) +{ + switch(rgn->bpps) { + case 0: + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8; + case 1: + if(rgn->one_bits >= 1) + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5; + else + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5; + case 2: + if(rgn->one_bits >= 8) + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8; + else + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8; + default: + return -1; + } +} +static void +nv04_region_copy_swizzle(struct nv04_2d_context *ctx, + struct nv04_region* dst, + struct nv04_region* src, + int w, int h) +{ + struct nouveau_channel *chan = ctx->swzsurf->channel; + struct nouveau_grobj *swzsurf = ctx->swzsurf; + struct nouveau_grobj *sifm = ctx->sifm; + int cs2d_format = nv04_region_cs2d_format(dst); + int sifm_format = nv04_region_sifm_format(src); + /* Max width & height may not be the same on all HW, but must be POT */ + unsigned max_shift = 10; + unsigned cw = 1 << max_shift; + unsigned ch = 1 << max_shift; + unsigned sx = dst->x >> max_shift; + unsigned sy = dst->y >> max_shift; + unsigned ex = (dst->x + w - 1) >> max_shift; + unsigned ey = (dst->y + h - 1) >> max_shift; + unsigned chunks = (ex - sx + 1) * (ey - sy + 1); + unsigned chunk_size; + if(dst->w < cw) + cw = dst->w; + if(dst->h < ch) + ch = dst->h; + chunk_size = cw * ch << dst->bpps; + +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tRGN_COPY_SWIZZLE [%i, %i: %i] ", w, h, dst->bpps); + for(int i = 0; i < 2; ++i) + { + nv04_region_print(i ? src : dst); + fprintf(stderr, i ? "\n" : " <- "); + } +#endif + + nv04_region_assert(dst, w, h); + nv04_region_assert(src, w, h); + + MARK_RING (chan, 8 + chunks * 17, 2 + chunks * 2); + + BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); + OUT_RELOCo(chan, dst->bo, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1); + OUT_RING (chan, cs2d_format | + log2i(cw) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U__SHIFT | + log2i(ch) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V__SHIFT); + + BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1); + OUT_RELOCo(chan, src->bo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1); + OUT_RING (chan, swzsurf->handle); + + assert(!(dst->offset & 63)); + + for (int cy = sy; cy <= ey; ++cy) { + int ry = MAX2(0, (int)(dst->y - ch * cy)); + int rh = MIN2((int)ch, (int)(dst->y - ch * cy + h)) - ry; + for (int cx = sx; cx <= ex; ++cx) { + int rx = MAX2(0, (int)(dst->x - cw * cx)); + int rw = MIN2((int)cw, (int)(dst->x - cw * cx + w)) - rx; + unsigned dst_offset; + unsigned src_offset; + + BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); + + dst_offset = dst->offset + (nv04_swizzle_bits_2d(cx * cw, cy * ch, dst->w, dst->h) << dst->bpps); + assert(dst_offset <= dst->bo->size); + assert(dst_offset + chunk_size <= dst->bo->size); + OUT_RELOCl(chan, dst->bo, dst_offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); + OUT_RING (chan, NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); + OUT_RING (chan, sifm_format); + OUT_RING (chan, NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); + OUT_RING (chan, rx | (ry << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y__SHIFT)); + OUT_RING (chan, rh << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H__SHIFT | rw); + OUT_RING (chan, rx | (ry << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y__SHIFT)); + OUT_RING (chan, rh << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H__SHIFT | rw); + OUT_RING (chan, 1 << 20); + OUT_RING (chan, 1 << 20); + + BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_SIZE, 4); + OUT_RING (chan, rh << NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H__SHIFT | align(rw, 8)); + OUT_RING (chan, src->pitch | + NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | + NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); + src_offset = src->offset + (cy * ch + ry + src->y - dst->y) * src->pitch + ((cx * cw + rx + src->x - dst->x) << src->bpps); + assert(src_offset <= src->bo->size); + assert(src_offset + (src->pitch * (rh - 1)) + (rw << src->bpps) <= src->bo->size); + OUT_RELOCl(chan, src->bo, src_offset, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, 0); + } + } +} + +static inline void +nv04_copy_m2mf_begin(struct nv04_2d_context *ctx, struct nouveau_bo* dstbo, struct nouveau_bo* srcbo, unsigned commands) +{ + struct nouveau_channel *chan = ctx->m2mf->channel; + struct nouveau_grobj *m2mf = ctx->m2mf; + MARK_RING (chan, 3 + commands * 9, 2 + commands * 2); + BEGIN_RING(chan, m2mf, NV04_M2MF_DMA_BUFFER_IN, 2); + OUT_RELOCo(chan, srcbo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(chan, dstbo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +} + +static inline void +nv04_copy_m2mf_body(struct nv04_2d_context *ctx, struct nouveau_bo* dstbo, int* pdstoff, unsigned dstpitch, struct nouveau_bo* srcbo, int* psrcoff, unsigned srcpitch, unsigned size, unsigned lines) +{ + struct nouveau_channel *chan = ctx->m2mf->channel; + struct nouveau_grobj *m2mf = ctx->m2mf; + +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\t\t\tCOPY_M2MF_BODY [%i, %i] <%i[%u]> lin %u <- <%i[%u]> lin %u\n", size, lines, dstbo->handle, *pdstoff, dstpitch, srcbo->handle, *psrcoff, srcpitch); +#endif + + BEGIN_RING(chan, m2mf, NV04_M2MF_OFFSET_IN, 8); + OUT_RELOCl(chan, srcbo, *psrcoff, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, dstbo, *pdstoff, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RING (chan, srcpitch); + OUT_RING (chan, dstpitch); + OUT_RING (chan, size); + OUT_RING (chan, lines); + OUT_RING (chan, 0x0101); + OUT_RING (chan, 0); + + *psrcoff += srcpitch * lines; + *pdstoff += dstpitch * lines; +} + +static void +nv04_copy_m2mf(struct nv04_2d_context *ctx, + struct nouveau_bo* dstbo, int dstoff, unsigned dstpitch, + struct nouveau_bo* srcbo, int srcoff, unsigned srcpitch, + unsigned size, unsigned h) +{ + unsigned max_pitch = 32767; + unsigned max_lines = 2047; + +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\t\tCOPY_M2MF [%i, %i] <%i[%i]> lin %u <- <%i[%i]> lin %u\n", size, h, dstbo->handle, dstoff, dstpitch, srcbo->handle, srcoff, srcpitch); +#endif + + if(srcpitch <= max_pitch && dstpitch <= max_pitch) + { + unsigned full_pages = h / max_lines; + unsigned leftover_lines = h - full_pages * max_lines; + + nv04_copy_m2mf_begin(ctx, dstbo, srcbo, full_pages + !!leftover_lines); + + for(unsigned i = 0; i < full_pages; ++i) + nv04_copy_m2mf_body(ctx, dstbo, &dstoff, dstpitch, srcbo, &srcoff, srcpitch, size, max_lines); + + if(leftover_lines) + nv04_copy_m2mf_body(ctx, dstbo, &dstoff, dstpitch, srcbo, &srcoff, srcpitch, size, leftover_lines); + } + else + { + unsigned lines = size / max_pitch; + unsigned leftover = size - lines * max_pitch; + unsigned full_pages = lines / max_lines; + unsigned leftover_lines = lines - full_pages * max_lines; + unsigned srcgap = srcpitch - size; + unsigned dstgap = dstpitch - size; + + nv04_copy_m2mf_begin(ctx, dstbo, srcbo, h * (full_pages + !!leftover_lines + !!leftover)); + + for(unsigned i = 0; i < h; ++i) + { + for(unsigned j = 0; j < full_pages; ++j) + nv04_copy_m2mf_body(ctx, dstbo, &dstoff, max_pitch, srcbo, &srcoff, max_pitch, max_pitch, max_lines); + + if(leftover_lines) + nv04_copy_m2mf_body(ctx, dstbo, &dstoff, max_pitch, srcbo, &srcoff, max_pitch, max_pitch, leftover_lines); + + if(leftover) + nv04_copy_m2mf_body(ctx, dstbo, &dstoff, leftover, srcbo, &srcoff, leftover, leftover, 1); + + srcoff += srcgap; + dstoff += dstgap; + } + } +} + +void +nv04_memcpy(struct nv04_2d_context *ctx, struct nouveau_bo* dstbo, int dstoff, struct nouveau_bo* srcbo, int srcoff, unsigned size) +{ +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tMEMCPY [%i] <%i[%i]> <- <%i[%i]>\n", size, dstbo->handle, dstoff, srcbo->handle, srcoff); +#endif + + nv04_copy_m2mf(ctx, dstbo, dstoff, size, srcbo, srcoff, size, size, 1); +} + +static void +nv04_region_copy_m2mf(struct nv04_2d_context *ctx, struct nv04_region *dst, struct nv04_region *src, int w, int h) +{ +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tRGN_COPY_M2MF [%i, %i: %i] ", w, h, dst->bpps); + for(int i = 0; i < 2; ++i) + { + nv04_region_print(i ? src : dst); + fprintf(stderr, i ? "\n" : " <- "); + } +#endif + + nv04_region_assert(dst, w, h); + nv04_region_assert(src, w, h); + assert(src->pitch); + assert(dst->pitch); + + nv04_copy_m2mf(ctx, + dst->bo, dst->offset + dst->y * dst->pitch + (dst->x << dst->bpps), dst->pitch, + src->bo, src->offset + src->y * src->pitch + (src->x << src->bpps), src->pitch, + w << src->bpps, h); +} + +static inline void +nv04_region_copy_blit(struct nv04_2d_context *ctx, struct nv04_region* dst, struct nv04_region* src, int w, int h) +{ + struct nouveau_channel *chan = ctx->surf2d->channel; + struct nouveau_grobj *surf2d = ctx->surf2d; + struct nouveau_grobj *blit = ctx->blit; + int cs2d_format = nv04_region_cs2d_format(dst); + +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tRGN_COPY_BLIT [%i, %i: %i] ", w, h, dst->bpps); + for(int i = 0; i < 2; ++i) + { + nv04_region_print(i ? src : dst); + fprintf(stderr, i ? "\n" : " <- "); + } +#endif + + assert(!(src->pitch & 63) && src->pitch); + assert(!(dst->pitch & 63) && dst->pitch); + nv04_region_assert(dst, w, h); + nv04_region_assert(src, w, h); + + MARK_RING (chan, 12, 4); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RELOCo(chan, src->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(chan, dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); + OUT_RING (chan, cs2d_format); + OUT_RING (chan, (dst->pitch << 16) | src->pitch); + OUT_RELOCl(chan, src->bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, dst->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, blit, 0x0300, 3); + OUT_RING (chan, (src->y << 16) | src->x); + OUT_RING (chan, (dst->y << 16) | dst->x); + OUT_RING (chan, ( h << 16) | w); +} + +/* THEOREM: a non-linearizable swizzled destination is always 64 byte aligned, except for 4x2 mipmap levels of swizzled 1bpp surfaces + * HYPOTESIS: + * 1. The first mipmap level is 64-byte-aligned + * PROOF: + * 1. Thus, all mipmaps level with a parent which is 64-byte or more in size are. + * 2. At 1bpp, the smallest levels with a <= 32-byte parent are either Nx1 or 1xN or size <=8, thus 4x2, 2x2 or 2x4 + * 3. Nx1, 1xN, 2x4, 2x2 have all subrects linearizable. 4x2 does not. + * 4. At 2/4bpp or more, the smallest levels with a 32-byte parent are 1xN, Nx1 or 2x2 + * + * However, nv04_region_align handles that. + */ + +// 0 -> done, 1 -> do with 3D engine or CPU, -1 -> do with CPU +// dst and src may be modified, and the possibly modified version should be passed to nv04_region_cpu if necessary +int +nv04_region_copy_2d(struct nv04_2d_context *ctx, struct nv04_region* dst, struct nv04_region* src, + int w, int h, int dst_to_gpu, int src_on_gpu) +{ + assert(src->bpps == dst->bpps); + +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "RGN_COPY [%i, %i: %i] ", w, h, dst->bpps); + for(int i = 0; i < 2; ++i) + { + int gpu = i ? src_on_gpu : dst_to_gpu; + nv04_region_print(i ? src : dst); + fprintf(stderr, " %s", gpu ? "gpu" : "cpu"); + fprintf(stderr, i ? "\n" : " <- "); + } +#endif + + // if they are contiguous and either both swizzled or both linear, reshape + if(!dst->pitch == !src->pitch + && nv04_region_is_contiguous(dst, w, h) + && nv04_region_is_contiguous(src, w, h)) + { + nv04_region_contiguous_shape(dst, &w, &h, 6); + nv04_region_linearize_contiguous(dst, w, h); + nv04_region_linearize_contiguous(src, w, h); + } + +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tOPT "); + for(int i = 0; i < 2; ++i) + { + nv04_region_print(i ? src : dst); + fprintf(stderr, i ? "\n" : " <- "); + } +#endif + + /* if the destination is not for GPU _and_ source is on CPU, use CPU */ + /* if the destination is not for GPU _or_ source is on CPU, use CPU only if we think it's faster than the GPU */ + /* TODO: benchmark to find out in which cases exactly we should prefer the CPU */ + if((!dst_to_gpu && !src_on_gpu) + || (!dst->pitch && dst->d > 1) + /* 3D swizzled destination are unwritable by the GPU, and 2D swizzled ones are readable only by the 3D engine */ + ) + return -1; + /* there is no known way to read 2D/3D-swizzled surfaces with the 2D engine + * ask the caller to use the 3D engine + * If a format cannot be sampled from the 3D engine there is no point in making it swizzled, so we must not do so + */ + else if(!src->pitch) + { +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tCOPY_ENG3D\n"); +#endif + return 1; + } + /* Setup transfer to swizzle the texture to vram if needed */ + else + { + if (!dst->pitch) + { + if(!dst_to_gpu) + { +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tCOPY_ENG3D\n"); +#endif + return 1; + } + else + { + assert(!nv04_region_align(dst, w, h, 6)); + + nv04_region_copy_swizzle(ctx, dst, src, w, h); + return 0; + } + } + else + { + /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback + * to NV_M2MF in this case. + * TODO: is this also true for the source? possibly not + * TODO: should we just always use m2mf? + * TODO: if not, add support for multiple operations to copy_blit + */ + + if (!dst_to_gpu + || w > 2047 + || h > 2047 + || (w & 1) + || nv04_region_align(src, w, h, 6) + || nv04_region_align(dst, w, h, 6) + ) + nv04_region_copy_m2mf(ctx, dst, src, w, h); + else + nv04_region_copy_blit(ctx, dst, src, w, h); + + return 0; + } + } +} + +static inline void +nv04_region_fill_gdirect(struct nv04_2d_context *ctx, struct nv04_region* dst, int w, int h, unsigned value) +{ + struct nouveau_channel *chan = ctx->surf2d->channel; + struct nouveau_grobj *surf2d = ctx->surf2d; + struct nouveau_grobj *rect = ctx->rect; + int cs2d_format, gdirect_format; + +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tFILL_GDIRECT\n"); +#endif + + assert(!(dst->pitch & 63) && dst->pitch); + nv04_region_assert(dst, w, h); + + switch(dst->bpps) + { + case 0: + gdirect_format = NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; + cs2d_format = NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; + break; + case 1: + gdirect_format = NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; + cs2d_format = NV04_CONTEXT_SURFACES_2D_FORMAT_Y16; + break; + case 2: + gdirect_format = NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; + cs2d_format = NV04_CONTEXT_SURFACES_2D_FORMAT_Y32; + break; + default: + assert(0); + gdirect_format = 0; + cs2d_format = 0; + break; + } + + MARK_RING (chan, 15, 4); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RELOCo(chan, dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCo(chan, dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); + OUT_RING (chan, cs2d_format); + OUT_RING (chan, (dst->pitch << 16) | dst->pitch); + OUT_RELOCl(chan, dst->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1); + OUT_RING (chan, gdirect_format); + BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1); + OUT_RING (chan, value); + BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2); + OUT_RING (chan, (dst->x << 16) | dst->y); + OUT_RING (chan, ( w << 16) | h); +} + +int +nv04_region_fill_2d(struct nv04_2d_context *ctx, struct nv04_region *dst, + int w, int h, unsigned value) +{ + if(!w || !h) + return 0; + +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "FILL [%i, %i: %i] ", w, h, dst->bpps); + nv04_region_print(dst); + fprintf(stderr, " <- 0x%x\n", value); +#endif + + if(nv04_region_is_contiguous(dst, w, h)) + { + nv04_region_contiguous_shape(dst, &w, &h, 6); + nv04_region_linearize_contiguous(dst, w, h); + } + + // TODO: maybe do intermediate copies for some cases instead of using the 3D engine/CPU + /* GdiRect doesn't work together with swzsurf, so the 3D engine, or an intermediate copy, is the only option here */ + if(!dst->pitch) + { +#ifdef NV04_REGION_DEBUG + fprintf(stderr, "\tFILL_ENG3D\n"); +#endif + return 1; + } + else if(!nv04_region_align(dst, w, h, 6)) + { + nv04_region_fill_gdirect(ctx, dst, w, h, value); + return 0; + } + else + return -1; +} + + +void +nv04_2d_context_takedown(struct nv04_2d_context *ctx) +{ + nouveau_notifier_free(&ctx->ntfy); + nouveau_grobj_free(&ctx->m2mf); + nouveau_grobj_free(&ctx->surf2d); + nouveau_grobj_free(&ctx->swzsurf); + nouveau_grobj_free(&ctx->rect); + nouveau_grobj_free(&ctx->blit); + nouveau_grobj_free(&ctx->sifm); + + free(ctx); +} + +struct nv04_2d_context * +nv04_2d_context_init(struct nouveau_channel* chan) +{ + struct nv04_2d_context *ctx = calloc(1, sizeof(struct nv04_2d_context)); + unsigned handle = 0x88000000, class; + int ret; + + if (!ctx) + return NULL; + + ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy); + if (ret) { + nv04_2d_context_takedown(ctx); + return NULL; + } + + ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf); + if (ret) { + nv04_2d_context_takedown(ctx); + return NULL; + } + + BEGIN_RING(chan, ctx->m2mf, NV04_M2MF_DMA_NOTIFY, 1); + OUT_RING (chan, ctx->ntfy->handle); + + if (chan->device->chipset < 0x10) + class = NV04_CONTEXT_SURFACES_2D; + else + class = NV10_CONTEXT_SURFACES_2D; + + ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d); + if (ret) { + nv04_2d_context_takedown(ctx); + return NULL; + } + + BEGIN_RING(chan, ctx->surf2d, + NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); + + if (chan->device->chipset < 0x10) + class = NV04_IMAGE_BLIT; + else + class = NV11_IMAGE_BLIT; + + ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit); + if (ret) { + nv04_2d_context_takedown(ctx); + return NULL; + } + + BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_DMA_NOTIFY, 1); + OUT_RING (chan, ctx->ntfy->handle); + BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACES, 1); + OUT_RING (chan, ctx->surf2d->handle); + BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_OPERATION, 1); + OUT_RING (chan, NV01_IMAGE_BLIT_OPERATION_SRCCOPY); + + ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT, + &ctx->rect); + if (ret) { + nv04_2d_context_takedown(ctx); + return NULL; + } + + BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1); + OUT_RING (chan, ctx->ntfy->handle); + BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1); + OUT_RING (chan, ctx->surf2d->handle); + BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1); + OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY); + BEGIN_RING(chan, ctx->rect, + NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1); + OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE); + + switch (chan->device->chipset & 0xf0) { + case 0x00: + case 0x10: + class = NV04_SWIZZLED_SURFACE; + break; + case 0x20: + class = NV11_SWIZZLED_SURFACE; + break; + case 0x30: + class = NV30_SWIZZLED_SURFACE; + break; + case 0x40: + case 0x60: + class = NV40_SWIZZLED_SURFACE; + break; + default: + /* Famous last words: this really can't happen.. */ + assert(0); + break; + } + + ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf); + if (ret) { + nv04_2d_context_takedown(ctx); + return NULL; + } + + /* all the Gallium MARK_RING calculations assume no autobinding, so do that now */ + if(ctx->swzsurf->bound == NOUVEAU_GROBJ_UNBOUND) + nouveau_grobj_autobind(ctx->swzsurf); + + switch (chan->device->chipset & 0xf0) { + case 0x10: + case 0x20: + class = NV10_SCALED_IMAGE_FROM_MEMORY; + break; + case 0x30: + class = NV30_SCALED_IMAGE_FROM_MEMORY; + break; + case 0x40: + case 0x60: + class = NV40_SCALED_IMAGE_FROM_MEMORY; + break; + default: + class = NV04_SCALED_IMAGE_FROM_MEMORY; + break; + } + + ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm); + if (ret) { + nv04_2d_context_takedown(ctx); + return NULL; + } + + /* all the Gallium MARK_RING calculations assume no autobinding, so do that now */ + if(ctx->sifm->bound == NOUVEAU_GROBJ_UNBOUND) + nouveau_grobj_autobind(ctx->sifm); + + return ctx; +} diff --git a/src/gallium/drivers/nvfx/nv04_2d.h b/src/gallium/drivers/nvfx/nv04_2d.h new file mode 100644 index 00000000000..00ee5bc0b23 --- /dev/null +++ b/src/gallium/drivers/nvfx/nv04_2d.h @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2009 Ben Skeggs + * Copyright 2009 Younes Manton + * Copyright 2010 Luca Barbieri + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +/* this code has no Mesa or Gallium dependency and can be reused in the classic Mesa driver or DDX */ + +#ifndef __NV04_2D_H__ +#define __NV04_2D_H__ + +struct nv04_2d_context; +struct nouveau_channel; +struct nouveau_bo; + +// NOTE: all functions taking this as a parameter will CLOBBER it (except for ->bo) +struct nv04_region { + struct nouveau_bo* bo; + int offset; + unsigned pitch; // 0 -> swizzled + unsigned bpps; // bpp shift (0, 1, 2; 3, 4 for fp/compressed) + unsigned one_bits; // number of high bits read and written as ones (for "no-alpha" optimization) + unsigned x, y, z; + unsigned w, h, d; +}; + +static inline void +nv04_region_try_to_linearize(struct nv04_region* rgn) +{ + assert(!rgn->pitch); + + if(rgn->d <= 1) + { + if(rgn->h <= 1 || rgn->w <= 2) + rgn->pitch = rgn->w << rgn->bpps; + } + else + { + if(rgn->h <= 2 && rgn->w <= 2) + { + rgn->pitch = rgn->w << rgn->bpps; + rgn->offset += rgn->z * rgn->h * rgn->pitch; + } + } +} + +void +nv04_memcpy(struct nv04_2d_context *ctx, + struct nouveau_bo* dstbo, int dstoff, + struct nouveau_bo* srcbo, int srcoff, + unsigned size); + +unsigned +nv04_region_begin(struct nv04_region* rgn, unsigned w, unsigned h); + +unsigned +nv04_region_end(struct nv04_region* rgn, unsigned w, unsigned h); + +void +nv04_2d_context_takedown(struct nv04_2d_context *pctx); + +struct nv04_2d_context * +nv04_2d_context_init(struct nouveau_channel* chan); + +void +nv04_region_copy_cpu(struct nv04_region* dst, struct nv04_region* src, int w, int h); + +void +nv04_region_fill_cpu(struct nv04_region* dst, int w, int h, unsigned value); + +int +nv04_region_copy_2d(struct nv04_2d_context *ctx, + struct nv04_region* dst, struct nv04_region* src, + int w, int h, + int dst_to_gpu, int src_on_gpu); + +int +nv04_region_fill_2d(struct nv04_2d_context *ctx, + struct nv04_region *dst, + int w, int h, + unsigned value); + +#endif diff --git a/src/gallium/drivers/nvfx/nv04_2d_loops.h b/src/gallium/drivers/nvfx/nv04_2d_loops.h new file mode 100644 index 00000000000..3a6787c0717 --- /dev/null +++ b/src/gallium/drivers/nvfx/nv04_2d_loops.h @@ -0,0 +1,70 @@ +#ifndef T +{ + if(dst->bpps == 0) +#define T uint8_t +#include "nv04_2d_loops.h" +#undef T + else if(dst->bpps == 1) +#define T uint16_t +#include "nv04_2d_loops.h" +#undef T + else if(dst->bpps == 2) +#define T uint32_t +#include "nv04_2d_loops.h" +#undef T + else + assert(0); +} +#else +#ifdef SWIZZLED_COPY_LOOPS +{ + if(!dst->pitch) + { + if(!src->pitch) + { + LOOP_Y + { + T* pdst = (T*)mdst + dswy[iy]; + T* psrc = (T*)msrc + sswy[iy]; + LOOP_X + { + assert((char*)&psrc[sswx[ix] + 1] <= ((char*)src->bo->map + src->bo->size)); + assert((char*)&pdst[dswx[ix] + 1] <= ((char*)dst->bo->map + dst->bo->size)); + pdst[dswx[ix]] = psrc[sswx[ix]]; + } + } + } + else + { + T* psrc = (T*)(msrc + ((dir > 0) ? src->y : (src->y + h - 1)) * src->pitch) + src->x; + LOOP_Y + { + T* pdst = (T*)mdst + dswy[iy]; + LOOP_X + { + assert((char*)&psrc[ix + 1] <= ((char*)src->bo->map + src->bo->size)); + assert((char*)&pdst[dswx[ix] + 1] <= ((char*)dst->bo->map + dst->bo->size)); + pdst[dswx[ix]] = psrc[ix]; + } + psrc = (T*)((char*)psrc + dir * src->pitch); + } + } + } + else + { + T* pdst = (T*)(mdst + ((dir > 0) ? dst->y : (dst->y + h - 1)) * dst->pitch) + dst->x; + LOOP_Y + { + T* psrc = (T*)msrc + sswy[iy]; + LOOP_X + { + assert((char*)&psrc[sswx[ix] + 1] <= ((char*)src->bo->map + src->bo->size)); + assert((char*)&pdst[ix + 1] <= ((char*)dst->bo->map + dst->bo->size)); + pdst[ix] = psrc[sswx[ix]]; + } + pdst = (T*)((char*)pdst + dir * dst->pitch); + } + } +} +#endif +#endif diff --git a/src/gallium/drivers/nvfx/nv04_surface_2d.c b/src/gallium/drivers/nvfx/nv04_surface_2d.c deleted file mode 100644 index 4ed574227d6..00000000000 --- a/src/gallium/drivers/nvfx/nv04_surface_2d.c +++ /dev/null @@ -1,535 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_format.h" -#include "util/u_format.h" -#include "util/u_math.h" -#include "util/u_memory.h" - -#include "nouveau/nouveau_winsys.h" -#include "nouveau/nouveau_util.h" -#include "nouveau/nouveau_screen.h" -#include "nv04_surface_2d.h" - -static INLINE int -nv04_surface_format(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_I8_UNORM: - return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_L8A8_UNORM: - return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_UNORM: - return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8; - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32; - default: - return -1; - } -} - -static INLINE int -nv04_rect_format(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_A8_UNORM: - return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; - default: - return -1; - } -} - -static INLINE int -nv04_scaled_image_format(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_I8_UNORM: - return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8; - case PIPE_FORMAT_B5G5R5A1_UNORM: - return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5; - case PIPE_FORMAT_B8G8R8A8_UNORM: - return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8; - case PIPE_FORMAT_B8G8R8X8_UNORM: - return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8; - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_L8A8_UNORM: - return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5; - default: - return -1; - } -} - -static INLINE unsigned -nv04_swizzle_bits_square(unsigned x, unsigned y) -{ - unsigned u = (x & 0x001) << 0 | - (x & 0x002) << 1 | - (x & 0x004) << 2 | - (x & 0x008) << 3 | - (x & 0x010) << 4 | - (x & 0x020) << 5 | - (x & 0x040) << 6 | - (x & 0x080) << 7 | - (x & 0x100) << 8 | - (x & 0x200) << 9 | - (x & 0x400) << 10 | - (x & 0x800) << 11; - - unsigned v = (y & 0x001) << 1 | - (y & 0x002) << 2 | - (y & 0x004) << 3 | - (y & 0x008) << 4 | - (y & 0x010) << 5 | - (y & 0x020) << 6 | - (y & 0x040) << 7 | - (y & 0x080) << 8 | - (y & 0x100) << 9 | - (y & 0x200) << 10 | - (y & 0x400) << 11 | - (y & 0x800) << 12; - return v | u; -} - -/* rectangular swizzled textures are linear concatenations of swizzled square tiles */ -static INLINE unsigned -nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h) -{ - unsigned s = MIN2(w, h); - unsigned m = s - 1; - return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m); -} - -static int -nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, - struct pipe_surface *dst, int dx, int dy, - struct pipe_surface *src, int sx, int sy, - int w, int h) -{ - struct nouveau_channel *chan = ctx->swzsurf->channel; - struct nouveau_grobj *swzsurf = ctx->swzsurf; - struct nouveau_grobj *sifm = ctx->sifm; - struct nouveau_bo *src_bo = ctx->buf(src); - struct nouveau_bo *dst_bo = ctx->buf(dst); - const unsigned src_pitch = ((struct nv04_surface *)src)->pitch; - /* Max width & height may not be the same on all HW, but must be POT */ - const unsigned max_w = 1024; - const unsigned max_h = 1024; - unsigned sub_w = w > max_w ? max_w : w; - unsigned sub_h = h > max_h ? max_h : h; - unsigned x; - unsigned y; - - /* Swizzled surfaces must be POT */ - assert(util_is_pot(dst->width) && util_is_pot(dst->height)); - - /* If area is too large to copy in one shot we must copy it in POT chunks to meet alignment requirements */ - assert(sub_w == w || util_is_pot(sub_w)); - assert(sub_h == h || util_is_pot(sub_h)); - - MARK_RING (chan, 8 + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*17, 2 + - ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*2); - - BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); - OUT_RELOCo(chan, dst_bo, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1); - OUT_RING (chan, nv04_surface_format(dst->format) | - log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT | - log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT); - - BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1); - OUT_RELOCo(chan, src_bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1); - OUT_RING (chan, swzsurf->handle); - - for (y = 0; y < h; y += sub_h) { - sub_h = MIN2(sub_h, h - y); - - for (x = 0; x < w; x += sub_w) { - sub_w = MIN2(sub_w, w - x); - - assert(!(dst->offset & 63)); - - BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); - OUT_RELOCl(chan, dst_bo, dst->offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(chan, sifm, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); - OUT_RING (chan, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); - OUT_RING (chan, nv04_scaled_image_format(src->format)); - OUT_RING (chan, NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); - OUT_RING (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT)); - OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w); - OUT_RING (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT)); - OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w); - OUT_RING (chan, 1 << 20); - OUT_RING (chan, 1 << 20); - - BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_SIZE, 4); - OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | sub_w); - OUT_RING (chan, src_pitch | - NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | - NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); - OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * util_format_get_blocksize(src->texture->format), - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RING (chan, 0); - } - } - - return 0; -} - -static int -nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx, - struct pipe_surface *dst, int dx, int dy, - struct pipe_surface *src, int sx, int sy, int w, int h) -{ - struct nouveau_channel *chan = ctx->m2mf->channel; - struct nouveau_grobj *m2mf = ctx->m2mf; - struct nouveau_bo *src_bo = ctx->buf(src); - struct nouveau_bo *dst_bo = ctx->buf(dst); - unsigned src_pitch = ((struct nv04_surface *)src)->pitch; - unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; - unsigned dst_offset = dst->offset + dy * dst_pitch + - dx * util_format_get_blocksize(dst->texture->format); - unsigned src_offset = src->offset + sy * src_pitch + - sx * util_format_get_blocksize(src->texture->format); - - MARK_RING (chan, 3 + ((h / 2047) + 1) * 9, 2 + ((h / 2047) + 1) * 2); - BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); - OUT_RELOCo(chan, src_bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCo(chan, dst_bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - while (h) { - int count = (h > 2047) ? 2047 : h; - - BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); - OUT_RELOCl(chan, src_bo, src_offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCl(chan, dst_bo, dst_offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RING (chan, src_pitch); - OUT_RING (chan, dst_pitch); - OUT_RING (chan, w * util_format_get_blocksize(src->texture->format)); - OUT_RING (chan, count); - OUT_RING (chan, 0x0101); - OUT_RING (chan, 0); - - h -= count; - src_offset += src_pitch * count; - dst_offset += dst_pitch * count; - } - - return 0; -} - -static int -nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst, - int dx, int dy, struct pipe_surface *src, int sx, int sy, - int w, int h) -{ - struct nouveau_channel *chan = ctx->surf2d->channel; - struct nouveau_grobj *surf2d = ctx->surf2d; - struct nouveau_grobj *blit = ctx->blit; - struct nouveau_bo *src_bo = ctx->buf(src); - struct nouveau_bo *dst_bo = ctx->buf(dst); - unsigned src_pitch = ((struct nv04_surface *)src)->pitch; - unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; - int format; - - format = nv04_surface_format(dst->format); - if (format < 0) - return 1; - - MARK_RING (chan, 12, 4); - BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); - OUT_RING (chan, format); - OUT_RING (chan, (dst_pitch << 16) | src_pitch); - OUT_RELOCl(chan, src_bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(chan, blit, 0x0300, 3); - OUT_RING (chan, (sy << 16) | sx); - OUT_RING (chan, (dy << 16) | dx); - OUT_RING (chan, ( h << 16) | w); - - return 0; -} - -static void -nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst, - int dx, int dy, struct pipe_surface *src, int sx, int sy, - int w, int h) -{ - int src_linear = src->texture->flags & NVFX_RESOURCE_FLAG_LINEAR; - int dst_linear = dst->texture->flags & NVFX_RESOURCE_FLAG_LINEAR; - - assert(src->format == dst->format); - - /* Setup transfer to swizzle the texture to vram if needed */ - if (src_linear && !dst_linear && w > 1 && h > 1) { - nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h); - return; - } - - /* Use M2MF instead of the blitter since it always works - * Any possible performance drop is likely to be not very significant - * and dwarfed anyway by the current buffer management problems - */ - nv04_surface_copy_m2mf(ctx, dst, dx, dy, src, sx, sy, w, h); -} - -static void -nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst, - int dx, int dy, int w, int h, unsigned value) -{ - struct nouveau_channel *chan = ctx->surf2d->channel; - struct nouveau_grobj *surf2d = ctx->surf2d; - struct nouveau_grobj *rect = ctx->rect; - struct nouveau_bo *dst_bo = ctx->buf(dst); - unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; - int cs2d_format, gdirect_format; - - cs2d_format = nv04_surface_format(dst->format); - assert(cs2d_format >= 0); - - gdirect_format = nv04_rect_format(dst->format); - assert(gdirect_format >= 0); - - MARK_RING (chan, 16, 4); - BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); - OUT_RING (chan, cs2d_format); - OUT_RING (chan, (dst_pitch << 16) | dst_pitch); - OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1); - OUT_RING (chan, gdirect_format); - BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1); - OUT_RING (chan, value); - BEGIN_RING(chan, rect, - NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2); - OUT_RING (chan, (dx << 16) | dy); - OUT_RING (chan, ( w << 16) | h); -} - -void -nv04_surface_2d_takedown(struct nv04_surface_2d **pctx) -{ - struct nv04_surface_2d *ctx; - - if (!pctx || !*pctx) - return; - ctx = *pctx; - *pctx = NULL; - - nouveau_notifier_free(&ctx->ntfy); - nouveau_grobj_free(&ctx->m2mf); - nouveau_grobj_free(&ctx->surf2d); - nouveau_grobj_free(&ctx->swzsurf); - nouveau_grobj_free(&ctx->rect); - nouveau_grobj_free(&ctx->blit); - nouveau_grobj_free(&ctx->sifm); - - FREE(ctx); -} - -struct nv04_surface_2d * -nv04_surface_2d_init(struct nouveau_screen *screen) -{ - struct nv04_surface_2d *ctx = CALLOC_STRUCT(nv04_surface_2d); - struct nouveau_channel *chan = screen->channel; - unsigned handle = 0x88000000, class; - int ret; - - if (!ctx) - return NULL; - - ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - BEGIN_RING(chan, ctx->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1); - OUT_RING (chan, ctx->ntfy->handle); - - if (chan->device->chipset < 0x10) - class = NV04_CONTEXT_SURFACES_2D; - else - class = NV10_CONTEXT_SURFACES_2D; - - ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - BEGIN_RING(chan, ctx->surf2d, - NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RING (chan, chan->vram->handle); - OUT_RING (chan, chan->vram->handle); - - if (chan->device->chipset < 0x10) - class = NV04_IMAGE_BLIT; - else - class = NV12_IMAGE_BLIT; - - ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_DMA_NOTIFY, 1); - OUT_RING (chan, ctx->ntfy->handle); - BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACE, 1); - OUT_RING (chan, ctx->surf2d->handle); - BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_OPERATION, 1); - OUT_RING (chan, NV01_IMAGE_BLIT_OPERATION_SRCCOPY); - - ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT, - &ctx->rect); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1); - OUT_RING (chan, ctx->ntfy->handle); - BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1); - OUT_RING (chan, ctx->surf2d->handle); - BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1); - OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY); - BEGIN_RING(chan, ctx->rect, - NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1); - OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE); - - switch (chan->device->chipset & 0xf0) { - case 0x00: - case 0x10: - class = NV04_SWIZZLED_SURFACE; - break; - case 0x20: - class = NV20_SWIZZLED_SURFACE; - break; - case 0x30: - class = NV30_SWIZZLED_SURFACE; - break; - case 0x40: - case 0x60: - class = NV40_SWIZZLED_SURFACE; - break; - default: - /* Famous last words: this really can't happen.. */ - assert(0); - break; - } - - ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - switch (chan->device->chipset & 0xf0) { - case 0x10: - case 0x20: - class = NV10_SCALED_IMAGE_FROM_MEMORY; - break; - case 0x30: - class = NV30_SCALED_IMAGE_FROM_MEMORY; - break; - case 0x40: - case 0x60: - class = NV40_SCALED_IMAGE_FROM_MEMORY; - break; - default: - class = NV04_SCALED_IMAGE_FROM_MEMORY; - break; - } - - ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - ctx->copy = nv04_surface_copy; - ctx->fill = nv04_surface_fill; - return ctx; -} - -struct nv04_surface* -nv04_surface_wrap_for_render(struct pipe_screen *pscreen, - struct nv04_surface_2d* eng2d, struct nv04_surface* ns) -{ - struct pipe_resource templ; - struct pipe_resource* temp_tex; - struct nv04_surface* temp_ns; - int temp_flags; - - temp_flags = (ns->base.usage | - PIPE_BIND_BLIT_SOURCE | - PIPE_BIND_BLIT_DESTINATION); - - ns->base.usage = (PIPE_BIND_BLIT_SOURCE | - PIPE_BIND_BLIT_DESTINATION); - - memset(&templ, 0, sizeof(templ)); - templ.format = ns->base.texture->format; - templ.target = PIPE_TEXTURE_2D; - templ.width0 = ns->base.width; - templ.height0 = ns->base.height; - templ.depth0 = 1; - templ.last_level = 0; - - // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented - templ.nr_samples = ns->base.texture->nr_samples; - - templ.bind = ns->base.texture->bind | PIPE_BIND_RENDER_TARGET; - - temp_tex = pscreen->resource_create(pscreen, &templ); - temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags); - temp_ns->backing = ns; - - if(ns->base.usage & PIPE_BIND_BLIT_SOURCE) - eng2d->copy(eng2d, &temp_ns->backing->base, - 0, 0, &ns->base, - 0, 0, ns->base.width, ns->base.height); - - return temp_ns; -} diff --git a/src/gallium/drivers/nvfx/nv04_surface_2d.h b/src/gallium/drivers/nvfx/nv04_surface_2d.h deleted file mode 100644 index 2123c3ed08b..00000000000 --- a/src/gallium/drivers/nvfx/nv04_surface_2d.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef __NV04_SURFACE_2D_H__ -#define __NV04_SURFACE_2D_H__ - -#include "pipe/p_state.h" - -struct nouveau_screen; - -struct nv04_surface { - struct pipe_surface base; - unsigned pitch; - struct nv04_surface* backing; -}; - -struct nv04_surface_2d { - struct nouveau_notifier *ntfy; - struct nouveau_grobj *surf2d; - struct nouveau_grobj *swzsurf; - struct nouveau_grobj *m2mf; - struct nouveau_grobj *rect; - struct nouveau_grobj *blit; - struct nouveau_grobj *sifm; - - struct nouveau_bo *(*buf)(struct pipe_surface *); - - void (*copy)(struct nv04_surface_2d *, struct pipe_surface *dst, - int dx, int dy, struct pipe_surface *src, int sx, int sy, - int w, int h); - void (*fill)(struct nv04_surface_2d *, struct pipe_surface *dst, - int dx, int dy, int w, int h, unsigned value); -}; - -struct nv04_surface_2d * -nv04_surface_2d_init(struct nouveau_screen *screen); - -void -nv04_surface_2d_takedown(struct nv04_surface_2d **); - -struct nv04_surface* -nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns); - -#define NVFX_RESOURCE_FLAG_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) - -#endif diff --git a/src/gallium/drivers/nvfx/nv30-40_3d.xml.h b/src/gallium/drivers/nvfx/nv30-40_3d.xml.h new file mode 100644 index 00000000000..a705a6bd3f2 --- /dev/null +++ b/src/gallium/drivers/nvfx/nv30-40_3d.xml.h @@ -0,0 +1,2022 @@ +#ifndef NV30_40_3D_XML +#define NV30_40_3D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nv30-40_3d.xml ( 31709 bytes, from 2010-09-05 08:00:50) +- copyright.xml ( 6503 bytes, from 2010-04-10 23:15:50) +- nv_3ddefs.xml ( 15391 bytes, from 2010-09-05 08:00:46) +- nv_defs.xml ( 4437 bytes, from 2010-08-05 19:38:53) +- nv_object.xml ( 10424 bytes, from 2010-08-05 19:38:53) +- nvchipsets.xml ( 2824 bytes, from 2010-08-05 19:38:53) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NV30_3D_DMA_NOTIFY 0x00000180 + +#define NV30_3D_DMA_TEXTURE0 0x00000184 + +#define NV30_3D_DMA_TEXTURE1 0x00000188 + +#define NV30_3D_DMA_COLOR1 0x0000018c + +#define NV30_3D_DMA_UNK190 0x00000190 + +#define NV30_3D_DMA_COLOR0 0x00000194 + +#define NV30_3D_DMA_ZETA 0x00000198 + +#define NV30_3D_DMA_VTXBUF0 0x0000019c + +#define NV30_3D_DMA_VTXBUF1 0x000001a0 + +#define NV30_3D_DMA_FENCE 0x000001a4 + +#define NV30_3D_DMA_QUERY 0x000001a8 + +#define NV30_3D_DMA_UNK1AC 0x000001ac + +#define NV30_3D_DMA_UNK1B0 0x000001b0 + +#define NV40_3D_DMA_COLOR2 0x000001b4 + +#define NV40_3D_DMA_COLOR3 0x000001b8 + +#define NV30_3D_RT_HORIZ 0x00000200 +#define NV30_3D_RT_HORIZ_X__MASK 0x0000ffff +#define NV30_3D_RT_HORIZ_X__SHIFT 0 +#define NV30_3D_RT_HORIZ_W__MASK 0xffff0000 +#define NV30_3D_RT_HORIZ_W__SHIFT 16 + +#define NV30_3D_RT_VERT 0x00000204 +#define NV30_3D_RT_VERT_Y__MASK 0x0000ffff +#define NV30_3D_RT_VERT_Y__SHIFT 0 +#define NV30_3D_RT_VERT_H__MASK 0xffff0000 +#define NV30_3D_RT_VERT_H__SHIFT 16 + +#define NV30_3D_RT_FORMAT 0x00000208 +#define NV30_3D_RT_FORMAT_COLOR__MASK 0x0000001f +#define NV30_3D_RT_FORMAT_COLOR__SHIFT 0 +#define NV30_3D_RT_FORMAT_COLOR_R5G6B5 0x00000003 +#define NV30_3D_RT_FORMAT_COLOR_X8R8G8B8 0x00000005 +#define NV30_3D_RT_FORMAT_COLOR_A8R8G8B8 0x00000008 +#define NV30_3D_RT_FORMAT_COLOR_B8 0x00000009 +#define NV30_3D_RT_FORMAT_COLOR_A16B16G16R16_FLOAT 0x0000000b +#define NV30_3D_RT_FORMAT_COLOR_A32B32G32R32_FLOAT 0x0000000c +#define NV30_3D_RT_FORMAT_COLOR_R32_FLOAT 0x0000000d +#define NV30_3D_RT_FORMAT_COLOR_UNK0D 0x0000000d +#define NV30_3D_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f +#define NV30_3D_RT_FORMAT_COLOR_A8B8G8R8 0x00000010 +#define NV30_3D_RT_FORMAT_ZETA__MASK 0x000000e0 +#define NV30_3D_RT_FORMAT_ZETA__SHIFT 5 +#define NV30_3D_RT_FORMAT_ZETA_Z16 0x00000020 +#define NV30_3D_RT_FORMAT_ZETA_Z24S8 0x00000040 +#define NV30_3D_RT_FORMAT_TYPE__MASK 0x00000f00 +#define NV30_3D_RT_FORMAT_TYPE__SHIFT 8 +#define NV30_3D_RT_FORMAT_TYPE_LINEAR 0x00000100 +#define NV30_3D_RT_FORMAT_TYPE_SWIZZLED 0x00000200 +#define NV30_3D_RT_FORMAT_LOG2_WIDTH__MASK 0x00ff0000 +#define NV30_3D_RT_FORMAT_LOG2_WIDTH__SHIFT 16 +#define NV30_3D_RT_FORMAT_LOG2_HEIGHT__MASK 0xff000000 +#define NV30_3D_RT_FORMAT_LOG2_HEIGHT__SHIFT 24 + +#define NV30_3D_COLOR0_PITCH 0x0000020c +#define NV30_3D_COLOR0_PITCH_COLOR0__MASK 0x0000ffff +#define NV30_3D_COLOR0_PITCH_COLOR0__SHIFT 0 +#define NV30_3D_COLOR0_PITCH_ZETA__MASK 0xffff0000 +#define NV30_3D_COLOR0_PITCH_ZETA__SHIFT 16 + +#define NV40_3D_COLOR0_PITCH 0x0000020c + +#define NV30_3D_COLOR0_OFFSET 0x00000210 + +#define NV30_3D_ZETA_OFFSET 0x00000214 + +#define NV30_3D_COLOR1_OFFSET 0x00000218 + +#define NV30_3D_COLOR1_PITCH 0x0000021c + +#define NV30_3D_RT_ENABLE 0x00000220 +#define NV30_3D_RT_ENABLE_COLOR0 0x00000001 +#define NV30_3D_RT_ENABLE_COLOR1 0x00000002 +#define NV40_3D_RT_ENABLE_COLOR2 0x00000004 +#define NV40_3D_RT_ENABLE_COLOR3 0x00000008 +#define NV30_3D_RT_ENABLE_MRT 0x00000010 + +#define NV40_3D_ZETA_PITCH 0x0000022c + +#define NV30_3D_LMA_DEPTH_PITCH 0x0000022c + +#define NV30_3D_LMA_DEPTH_OFFSET 0x00000230 + +#define NV30_3D_TEX_UNITS_ENABLE 0x0000023c +#define NV30_3D_TEX_UNITS_ENABLE_TX0 0x00000001 +#define NV30_3D_TEX_UNITS_ENABLE_TX1 0x00000002 +#define NV30_3D_TEX_UNITS_ENABLE_TX2 0x00000004 +#define NV30_3D_TEX_UNITS_ENABLE_TX3 0x00000008 +#define NV30_3D_TEX_UNITS_ENABLE_TX4 0x00000010 +#define NV30_3D_TEX_UNITS_ENABLE_TX5 0x00000020 +#define NV30_3D_TEX_UNITS_ENABLE_TX6 0x00000040 +#define NV30_3D_TEX_UNITS_ENABLE_TX7 0x00000080 + +#define NV30_3D_TEX_MATRIX_ENABLE(i0) (0x00000240 + 0x4*(i0)) +#define NV30_3D_TEX_MATRIX_ENABLE__ESIZE 0x00000004 +#define NV30_3D_TEX_MATRIX_ENABLE__LEN 0x00000008 + +#define NV40_3D_COLOR2_PITCH 0x00000280 + +#define NV40_3D_COLOR3_PITCH 0x00000284 + +#define NV40_3D_COLOR2_OFFSET 0x00000288 + +#define NV40_3D_COLOR3_OFFSET 0x0000028c + +#define NV30_3D_VIEWPORT_TX_ORIGIN 0x000002b8 +#define NV30_3D_VIEWPORT_TX_ORIGIN_X__MASK 0x0000ffff +#define NV30_3D_VIEWPORT_TX_ORIGIN_X__SHIFT 0 +#define NV30_3D_VIEWPORT_TX_ORIGIN_Y__MASK 0xffff0000 +#define NV30_3D_VIEWPORT_TX_ORIGIN_Y__SHIFT 16 + +#define NV30_3D_VIEWPORT_CLIP_MODE 0x000002bc + +#define NV30_3D_VIEWPORT_CLIP_HORIZ(i0) (0x000002c0 + 0x8*(i0)) +#define NV30_3D_VIEWPORT_CLIP_HORIZ__ESIZE 0x00000008 +#define NV30_3D_VIEWPORT_CLIP_HORIZ__LEN 0x00000008 +#define NV30_3D_VIEWPORT_CLIP_HORIZ_L__MASK 0x0000ffff +#define NV30_3D_VIEWPORT_CLIP_HORIZ_L__SHIFT 0 +#define NV30_3D_VIEWPORT_CLIP_HORIZ_R__MASK 0xffff0000 +#define NV30_3D_VIEWPORT_CLIP_HORIZ_R__SHIFT 16 + +#define NV30_3D_VIEWPORT_CLIP_VERT(i0) (0x000002c4 + 0x8*(i0)) +#define NV30_3D_VIEWPORT_CLIP_VERT__ESIZE 0x00000008 +#define NV30_3D_VIEWPORT_CLIP_VERT__LEN 0x00000008 +#define NV30_3D_VIEWPORT_CLIP_VERT_T__MASK 0x0000ffff +#define NV30_3D_VIEWPORT_CLIP_VERT_T__SHIFT 0 +#define NV30_3D_VIEWPORT_CLIP_VERT_D__MASK 0xffff0000 +#define NV30_3D_VIEWPORT_CLIP_VERT_D__SHIFT 16 + +#define NV30_3D_DITHER_ENABLE 0x00000300 + +#define NV30_3D_ALPHA_FUNC_ENABLE 0x00000304 + +#define NV30_3D_ALPHA_FUNC_FUNC 0x00000308 +#define NV30_3D_ALPHA_FUNC_FUNC_NEVER 0x00000200 +#define NV30_3D_ALPHA_FUNC_FUNC_LESS 0x00000201 +#define NV30_3D_ALPHA_FUNC_FUNC_EQUAL 0x00000202 +#define NV30_3D_ALPHA_FUNC_FUNC_LEQUAL 0x00000203 +#define NV30_3D_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV30_3D_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV30_3D_ALPHA_FUNC_FUNC_GEQUAL 0x00000206 +#define NV30_3D_ALPHA_FUNC_FUNC_ALWAYS 0x00000207 + +#define NV30_3D_ALPHA_FUNC_REF 0x0000030c + +#define NV30_3D_BLEND_FUNC_ENABLE 0x00000310 + +#define NV30_3D_BLEND_FUNC_SRC 0x00000314 +#define NV30_3D_BLEND_FUNC_SRC_RGB__MASK 0x0000ffff +#define NV30_3D_BLEND_FUNC_SRC_RGB__SHIFT 0 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ZERO 0x00000000 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE 0x00000001 +#define NV30_3D_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV30_3D_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV30_3D_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV30_3D_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV30_3D_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV30_3D_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV30_3D_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003 +#define NV30_3D_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA__MASK 0xffff0000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA__SHIFT 16 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE 0x00010000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x03000000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x03020000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x03040000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x03060000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x03080000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x80010000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x80030000 +#define NV30_3D_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 + +#define NV30_3D_BLEND_FUNC_DST 0x00000318 +#define NV30_3D_BLEND_FUNC_DST_RGB__MASK 0x0000ffff +#define NV30_3D_BLEND_FUNC_DST_RGB__SHIFT 0 +#define NV30_3D_BLEND_FUNC_DST_RGB_ZERO 0x00000000 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE 0x00000001 +#define NV30_3D_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV30_3D_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV30_3D_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV30_3D_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV30_3D_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV30_3D_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV30_3D_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003 +#define NV30_3D_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV30_3D_BLEND_FUNC_DST_ALPHA__MASK 0xffff0000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA__SHIFT 16 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE 0x00010000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x03000000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x03020000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x03040000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x03060000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x03080000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x80010000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000 +#define NV30_3D_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 + +#define NV30_3D_BLEND_COLOR 0x0000031c +#define NV30_3D_BLEND_COLOR_B__MASK 0x000000ff +#define NV30_3D_BLEND_COLOR_B__SHIFT 0 +#define NV30_3D_BLEND_COLOR_G__MASK 0x0000ff00 +#define NV30_3D_BLEND_COLOR_G__SHIFT 8 +#define NV30_3D_BLEND_COLOR_R__MASK 0x00ff0000 +#define NV30_3D_BLEND_COLOR_R__SHIFT 16 +#define NV30_3D_BLEND_COLOR_A__MASK 0xff000000 +#define NV30_3D_BLEND_COLOR_A__SHIFT 24 + +#define NV30_3D_BLEND_EQUATION 0x00000320 +#define NV30_3D_BLEND_EQUATION_FUNC_ADD 0x00008006 +#define NV30_3D_BLEND_EQUATION_MIN 0x00008007 +#define NV30_3D_BLEND_EQUATION_MAX 0x00008008 +#define NV30_3D_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a +#define NV30_3D_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NV40_3D_BLEND_EQUATION 0x00000320 +#define NV40_3D_BLEND_EQUATION_RGB__MASK 0x0000ffff +#define NV40_3D_BLEND_EQUATION_RGB__SHIFT 0 +#define NV40_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NV40_3D_BLEND_EQUATION_RGB_MIN 0x00008007 +#define NV40_3D_BLEND_EQUATION_RGB_MAX 0x00008008 +#define NV40_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NV40_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV40_3D_BLEND_EQUATION_ALPHA__MASK 0xffff0000 +#define NV40_3D_BLEND_EQUATION_ALPHA__SHIFT 16 +#define NV40_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x80060000 +#define NV40_3D_BLEND_EQUATION_ALPHA_MIN 0x80070000 +#define NV40_3D_BLEND_EQUATION_ALPHA_MAX 0x80080000 +#define NV40_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x800a0000 +#define NV40_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x800b0000 + +#define NV30_3D_COLOR_MASK 0x00000324 +#define NV30_3D_COLOR_MASK_B 0x000000ff +#define NV30_3D_COLOR_MASK_G 0x0000ff00 +#define NV30_3D_COLOR_MASK_R 0x00ff0000 +#define NV30_3D_COLOR_MASK_A 0xff000000 + +#define NV30_3D_STENCIL(i0) (0x00000328 + 0x20*(i0)) +#define NV30_3D_STENCIL__ESIZE 0x00000020 +#define NV30_3D_STENCIL__LEN 0x00000002 + +#define NV30_3D_STENCIL_ENABLE(i0) (0x00000328 + 0x20*(i0)) + +#define NV30_3D_STENCIL_MASK(i0) (0x0000032c + 0x20*(i0)) + +#define NV30_3D_STENCIL_FUNC_FUNC(i0) (0x00000330 + 0x20*(i0)) +#define NV30_3D_STENCIL_FUNC_FUNC_NEVER 0x00000200 +#define NV30_3D_STENCIL_FUNC_FUNC_LESS 0x00000201 +#define NV30_3D_STENCIL_FUNC_FUNC_EQUAL 0x00000202 +#define NV30_3D_STENCIL_FUNC_FUNC_LEQUAL 0x00000203 +#define NV30_3D_STENCIL_FUNC_FUNC_GREATER 0x00000204 +#define NV30_3D_STENCIL_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV30_3D_STENCIL_FUNC_FUNC_GEQUAL 0x00000206 +#define NV30_3D_STENCIL_FUNC_FUNC_ALWAYS 0x00000207 + +#define NV30_3D_STENCIL_FUNC_REF(i0) (0x00000334 + 0x20*(i0)) + +#define NV30_3D_STENCIL_FUNC_MASK(i0) (0x00000338 + 0x20*(i0)) + +#define NV30_3D_STENCIL_OP_FAIL(i0) (0x0000033c + 0x20*(i0)) +#define NV30_3D_STENCIL_OP_FAIL_ZERO 0x00000000 +#define NV30_3D_STENCIL_OP_FAIL_INVERT 0x0000150a +#define NV30_3D_STENCIL_OP_FAIL_KEEP 0x00001e00 +#define NV30_3D_STENCIL_OP_FAIL_REPLACE 0x00001e01 +#define NV30_3D_STENCIL_OP_FAIL_INCR 0x00001e02 +#define NV30_3D_STENCIL_OP_FAIL_DECR 0x00001e03 +#define NV30_3D_STENCIL_OP_FAIL_INCR_WRAP 0x00008507 +#define NV30_3D_STENCIL_OP_FAIL_DECR_WRAP 0x00008508 + +#define NV30_3D_STENCIL_OP_ZFAIL(i0) (0x00000340 + 0x20*(i0)) +#define NV30_3D_STENCIL_OP_ZFAIL_ZERO 0x00000000 +#define NV30_3D_STENCIL_OP_ZFAIL_INVERT 0x0000150a +#define NV30_3D_STENCIL_OP_ZFAIL_KEEP 0x00001e00 +#define NV30_3D_STENCIL_OP_ZFAIL_REPLACE 0x00001e01 +#define NV30_3D_STENCIL_OP_ZFAIL_INCR 0x00001e02 +#define NV30_3D_STENCIL_OP_ZFAIL_DECR 0x00001e03 +#define NV30_3D_STENCIL_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV30_3D_STENCIL_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NV30_3D_STENCIL_OP_ZPASS(i0) (0x00000344 + 0x20*(i0)) +#define NV30_3D_STENCIL_OP_ZPASS_ZERO 0x00000000 +#define NV30_3D_STENCIL_OP_ZPASS_INVERT 0x0000150a +#define NV30_3D_STENCIL_OP_ZPASS_KEEP 0x00001e00 +#define NV30_3D_STENCIL_OP_ZPASS_REPLACE 0x00001e01 +#define NV30_3D_STENCIL_OP_ZPASS_INCR 0x00001e02 +#define NV30_3D_STENCIL_OP_ZPASS_DECR 0x00001e03 +#define NV30_3D_STENCIL_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV30_3D_STENCIL_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NV30_3D_SHADE_MODEL 0x00000368 +#define NV30_3D_SHADE_MODEL_FLAT 0x00001d00 +#define NV30_3D_SHADE_MODEL_SMOOTH 0x00001d01 + +#define NV30_3D_FOG_ENABLE 0x0000036c + +#define NV30_3D_FOG_COLOR 0x00000370 +#define NV30_3D_FOG_COLOR_R__MASK 0x000000ff +#define NV30_3D_FOG_COLOR_R__SHIFT 0 +#define NV30_3D_FOG_COLOR_G__MASK 0x0000ff00 +#define NV30_3D_FOG_COLOR_G__SHIFT 8 +#define NV30_3D_FOG_COLOR_B__MASK 0x00ff0000 +#define NV30_3D_FOG_COLOR_B__SHIFT 16 +#define NV30_3D_FOG_COLOR_A__MASK 0xff000000 +#define NV30_3D_FOG_COLOR_A__SHIFT 24 + +#define NV40_3D_MRT_COLOR_MASK 0x00000370 +#define NV40_3D_MRT_COLOR_MASK_BUFFER1_A 0x00000010 +#define NV40_3D_MRT_COLOR_MASK_BUFFER1_R 0x00000020 +#define NV40_3D_MRT_COLOR_MASK_BUFFER1_G 0x00000040 +#define NV40_3D_MRT_COLOR_MASK_BUFFER1_B 0x00000080 +#define NV40_3D_MRT_COLOR_MASK_BUFFER2_A 0x00000100 +#define NV40_3D_MRT_COLOR_MASK_BUFFER2_R 0x00000200 +#define NV40_3D_MRT_COLOR_MASK_BUFFER2_G 0x00000400 +#define NV40_3D_MRT_COLOR_MASK_BUFFER2_B 0x00000800 +#define NV40_3D_MRT_COLOR_MASK_BUFFER3_A 0x00001000 +#define NV40_3D_MRT_COLOR_MASK_BUFFER3_R 0x00002000 +#define NV40_3D_MRT_COLOR_MASK_BUFFER3_G 0x00004000 +#define NV40_3D_MRT_COLOR_MASK_BUFFER3_B 0x00008000 + +#define NV30_3D_COLOR_LOGIC_OP_ENABLE 0x00000374 + +#define NV30_3D_COLOR_LOGIC_OP_OP 0x00000378 +#define NV30_3D_COLOR_LOGIC_OP_OP_CLEAR 0x00001500 +#define NV30_3D_COLOR_LOGIC_OP_OP_AND 0x00001501 +#define NV30_3D_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502 +#define NV30_3D_COLOR_LOGIC_OP_OP_COPY 0x00001503 +#define NV30_3D_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504 +#define NV30_3D_COLOR_LOGIC_OP_OP_NOOP 0x00001505 +#define NV30_3D_COLOR_LOGIC_OP_OP_XOR 0x00001506 +#define NV30_3D_COLOR_LOGIC_OP_OP_OR 0x00001507 +#define NV30_3D_COLOR_LOGIC_OP_OP_NOR 0x00001508 +#define NV30_3D_COLOR_LOGIC_OP_OP_EQUIV 0x00001509 +#define NV30_3D_COLOR_LOGIC_OP_OP_INVERT 0x0000150a +#define NV30_3D_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b +#define NV30_3D_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c +#define NV30_3D_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d +#define NV30_3D_COLOR_LOGIC_OP_OP_NAND 0x0000150e +#define NV30_3D_COLOR_LOGIC_OP_OP_SET 0x0000150f + +#define NV30_3D_NORMALIZE_ENABLE 0x0000037c + +#define NV30_3D_COLOR_MATERIAL 0x00000390 +#define NV30_3D_COLOR_MATERIAL_FRONT_EMISSION_ENABLE 0x00000001 +#define NV30_3D_COLOR_MATERIAL_FRONT_AMBIENT_ENABLE 0x00000004 +#define NV30_3D_COLOR_MATERIAL_FRONT_DIFFUSE_ENABLE 0x00000010 +#define NV30_3D_COLOR_MATERIAL_FRONT_SPECULAR_ENABLE 0x00000040 +#define NV30_3D_COLOR_MATERIAL_BACK_EMISSION_ENABLE 0x00000100 +#define NV30_3D_COLOR_MATERIAL_BACK_AMBIENT_ENABLE 0x00000400 +#define NV30_3D_COLOR_MATERIAL_BACK_DIFFUSE_ENABLE 0x00001000 +#define NV30_3D_COLOR_MATERIAL_BACK_SPECULAR_ENABLE 0x00004000 + +#define NV30_3D_DEPTH_RANGE_NEAR 0x00000394 + +#define NV30_3D_DEPTH_RANGE_FAR 0x00000398 + +#define NV30_3D_COLOR_MATERIAL_FRONT 0x000003a0 + + +#define NV30_3D_COLOR_MATERIAL_FRONT_R 0x000003a0 + +#define NV30_3D_COLOR_MATERIAL_FRONT_G 0x000003a4 + +#define NV30_3D_COLOR_MATERIAL_FRONT_B 0x000003a8 + +#define NV30_3D_COLOR_MATERIAL_FRONT_A 0x000003ac + +#define NV40_3D_MIPMAP_ROUNDING 0x000003b0 +#define NV40_3D_MIPMAP_ROUNDING_MODE__MASK 0x00100000 +#define NV40_3D_MIPMAP_ROUNDING_MODE__SHIFT 20 +#define NV40_3D_MIPMAP_ROUNDING_MODE_UP 0x00000000 +#define NV40_3D_MIPMAP_ROUNDING_MODE_DOWN 0x00100000 + +#define NV30_3D_LINE_WIDTH 0x000003b8 + +#define NV30_3D_LINE_SMOOTH_ENABLE 0x000003bc + + + +#define NV30_3D_TEX_GEN_MODE(i0, i1) (0x00000400 + 0x10*(i0) + 0x4*(i1)) +#define NV30_3D_TEX_GEN_MODE__ESIZE 0x00000004 +#define NV30_3D_TEX_GEN_MODE__LEN 0x00000004 +#define NV30_3D_TEX_GEN_MODE_FALSE 0x00000000 +#define NV30_3D_TEX_GEN_MODE_EYE_LINEAR 0x00002400 +#define NV30_3D_TEX_GEN_MODE_OBJECT_LINEAR 0x00002401 +#define NV30_3D_TEX_GEN_MODE_SPHERE_MAP 0x00002402 +#define NV30_3D_TEX_GEN_MODE_NORMAL_MAP 0x00008511 +#define NV30_3D_TEX_GEN_MODE_REFLECTION_MAP 0x00008512 + +#define NV30_3D_MODELVIEW_MATRIX(i0) (0x00000480 + 0x4*(i0)) +#define NV30_3D_MODELVIEW_MATRIX__ESIZE 0x00000004 +#define NV30_3D_MODELVIEW_MATRIX__LEN 0x00000010 + +#define NV30_3D_INVERSE_MODELVIEW_MATRIX(i0) (0x00000580 + 0x4*(i0)) +#define NV30_3D_INVERSE_MODELVIEW_MATRIX__ESIZE 0x00000004 +#define NV30_3D_INVERSE_MODELVIEW_MATRIX__LEN 0x0000000c + +#define NV30_3D_PROJECTION_MATRIX(i0) (0x00000680 + 0x4*(i0)) +#define NV30_3D_PROJECTION_MATRIX__ESIZE 0x00000004 +#define NV30_3D_PROJECTION_MATRIX__LEN 0x00000010 + + +#define NV30_3D_TEX_MATRIX(i0, i1) (0x000006c0 + 0x40*(i0) + 0x4*(i1)) +#define NV30_3D_TEX_MATRIX__ESIZE 0x00000004 +#define NV30_3D_TEX_MATRIX__LEN 0x00000010 + +#define NV30_3D_SCISSOR_HORIZ 0x000008c0 +#define NV30_3D_SCISSOR_HORIZ_X__MASK 0x0000ffff +#define NV30_3D_SCISSOR_HORIZ_X__SHIFT 0 +#define NV30_3D_SCISSOR_HORIZ_W__MASK 0xffff0000 +#define NV30_3D_SCISSOR_HORIZ_W__SHIFT 16 + +#define NV30_3D_SCISSOR_VERT 0x000008c4 +#define NV30_3D_SCISSOR_VERT_Y__MASK 0x0000ffff +#define NV30_3D_SCISSOR_VERT_Y__SHIFT 0 +#define NV30_3D_SCISSOR_VERT_H__MASK 0xffff0000 +#define NV30_3D_SCISSOR_VERT_H__SHIFT 16 + +#define NV30_3D_FOG_COORD_DIST 0x000008c8 + +#define NV30_3D_FOG_MODE 0x000008cc + +#define NV30_3D_FOG_EQUATION_CONSTANT 0x000008d0 + +#define NV30_3D_FOG_EQUATION_LINEAR 0x000008d4 + +#define NV30_3D_FOG_EQUATION_QUADRATIC 0x000008d8 + +#define NV30_3D_FP_ACTIVE_PROGRAM 0x000008e4 +#define NV30_3D_FP_ACTIVE_PROGRAM_DMA0 0x00000001 +#define NV30_3D_FP_ACTIVE_PROGRAM_DMA1 0x00000002 +#define NV30_3D_FP_ACTIVE_PROGRAM_OFFSET__MASK 0xfffffffc +#define NV30_3D_FP_ACTIVE_PROGRAM_OFFSET__SHIFT 2 + + +#define NV30_3D_RC_COLOR0 0x000008ec +#define NV30_3D_RC_COLOR0_B__MASK 0x000000ff +#define NV30_3D_RC_COLOR0_B__SHIFT 0 +#define NV30_3D_RC_COLOR0_G__MASK 0x0000ff00 +#define NV30_3D_RC_COLOR0_G__SHIFT 8 +#define NV30_3D_RC_COLOR0_R__MASK 0x00ff0000 +#define NV30_3D_RC_COLOR0_R__SHIFT 16 +#define NV30_3D_RC_COLOR0_A__MASK 0xff000000 +#define NV30_3D_RC_COLOR0_A__SHIFT 24 + +#define NV30_3D_RC_COLOR1 0x000008f0 +#define NV30_3D_RC_COLOR1_B__MASK 0x000000ff +#define NV30_3D_RC_COLOR1_B__SHIFT 0 +#define NV30_3D_RC_COLOR1_G__MASK 0x0000ff00 +#define NV30_3D_RC_COLOR1_G__SHIFT 8 +#define NV30_3D_RC_COLOR1_R__MASK 0x00ff0000 +#define NV30_3D_RC_COLOR1_R__SHIFT 16 +#define NV30_3D_RC_COLOR1_A__MASK 0xff000000 +#define NV30_3D_RC_COLOR1_A__SHIFT 24 + +#define NV30_3D_RC_FINAL0 0x000008f4 +#define NV30_3D_RC_FINAL0_D_INPUT__MASK 0x0000000f +#define NV30_3D_RC_FINAL0_D_INPUT__SHIFT 0 +#define NV30_3D_RC_FINAL0_D_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL0_D_INPUT_CONSTANT_COLOR0 0x00000001 +#define NV30_3D_RC_FINAL0_D_INPUT_CONSTANT_COLOR1 0x00000002 +#define NV30_3D_RC_FINAL0_D_INPUT_FOG 0x00000003 +#define NV30_3D_RC_FINAL0_D_INPUT_PRIMARY_COLOR 0x00000004 +#define NV30_3D_RC_FINAL0_D_INPUT_SECONDARY_COLOR 0x00000005 +#define NV30_3D_RC_FINAL0_D_INPUT_TEXTURE0 0x00000008 +#define NV30_3D_RC_FINAL0_D_INPUT_TEXTURE1 0x00000009 +#define NV30_3D_RC_FINAL0_D_INPUT_TEXTURE2 0x0000000a +#define NV30_3D_RC_FINAL0_D_INPUT_TEXTURE3 0x0000000b +#define NV30_3D_RC_FINAL0_D_INPUT_SPARE0 0x0000000c +#define NV30_3D_RC_FINAL0_D_INPUT_SPARE1 0x0000000d +#define NV30_3D_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0000000e +#define NV30_3D_RC_FINAL0_D_INPUT_E_TIMES_F 0x0000000f +#define NV30_3D_RC_FINAL0_D_COMPONENT_USAGE__MASK 0x00000010 +#define NV30_3D_RC_FINAL0_D_COMPONENT_USAGE__SHIFT 4 +#define NV30_3D_RC_FINAL0_D_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL0_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV30_3D_RC_FINAL0_D_MAPPING__MASK 0x000000e0 +#define NV30_3D_RC_FINAL0_D_MAPPING__SHIFT 5 +#define NV30_3D_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT 0x00000020 +#define NV30_3D_RC_FINAL0_D_MAPPING_EXPAND_NORMAL 0x00000040 +#define NV30_3D_RC_FINAL0_D_MAPPING_EXPAND_NEGATE 0x00000060 +#define NV30_3D_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL 0x00000080 +#define NV30_3D_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE 0x000000a0 +#define NV30_3D_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY 0x000000c0 +#define NV30_3D_RC_FINAL0_D_MAPPING_SIGNED_NEGATE 0x000000e0 +#define NV30_3D_RC_FINAL0_C_INPUT__MASK 0x00000f00 +#define NV30_3D_RC_FINAL0_C_INPUT__SHIFT 8 +#define NV30_3D_RC_FINAL0_C_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL0_C_INPUT_CONSTANT_COLOR0 0x00000100 +#define NV30_3D_RC_FINAL0_C_INPUT_CONSTANT_COLOR1 0x00000200 +#define NV30_3D_RC_FINAL0_C_INPUT_FOG 0x00000300 +#define NV30_3D_RC_FINAL0_C_INPUT_PRIMARY_COLOR 0x00000400 +#define NV30_3D_RC_FINAL0_C_INPUT_SECONDARY_COLOR 0x00000500 +#define NV30_3D_RC_FINAL0_C_INPUT_TEXTURE0 0x00000800 +#define NV30_3D_RC_FINAL0_C_INPUT_TEXTURE1 0x00000900 +#define NV30_3D_RC_FINAL0_C_INPUT_TEXTURE2 0x00000a00 +#define NV30_3D_RC_FINAL0_C_INPUT_TEXTURE3 0x00000b00 +#define NV30_3D_RC_FINAL0_C_INPUT_SPARE0 0x00000c00 +#define NV30_3D_RC_FINAL0_C_INPUT_SPARE1 0x00000d00 +#define NV30_3D_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00 +#define NV30_3D_RC_FINAL0_C_INPUT_E_TIMES_F 0x00000f00 +#define NV30_3D_RC_FINAL0_C_COMPONENT_USAGE__MASK 0x00001000 +#define NV30_3D_RC_FINAL0_C_COMPONENT_USAGE__SHIFT 12 +#define NV30_3D_RC_FINAL0_C_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL0_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV30_3D_RC_FINAL0_C_MAPPING__MASK 0x0000e000 +#define NV30_3D_RC_FINAL0_C_MAPPING__SHIFT 13 +#define NV30_3D_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT 0x00002000 +#define NV30_3D_RC_FINAL0_C_MAPPING_EXPAND_NORMAL 0x00004000 +#define NV30_3D_RC_FINAL0_C_MAPPING_EXPAND_NEGATE 0x00006000 +#define NV30_3D_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL 0x00008000 +#define NV30_3D_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE 0x0000a000 +#define NV30_3D_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY 0x0000c000 +#define NV30_3D_RC_FINAL0_C_MAPPING_SIGNED_NEGATE 0x0000e000 +#define NV30_3D_RC_FINAL0_B_INPUT__MASK 0x000f0000 +#define NV30_3D_RC_FINAL0_B_INPUT__SHIFT 16 +#define NV30_3D_RC_FINAL0_B_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL0_B_INPUT_CONSTANT_COLOR0 0x00010000 +#define NV30_3D_RC_FINAL0_B_INPUT_CONSTANT_COLOR1 0x00020000 +#define NV30_3D_RC_FINAL0_B_INPUT_FOG 0x00030000 +#define NV30_3D_RC_FINAL0_B_INPUT_PRIMARY_COLOR 0x00040000 +#define NV30_3D_RC_FINAL0_B_INPUT_SECONDARY_COLOR 0x00050000 +#define NV30_3D_RC_FINAL0_B_INPUT_TEXTURE0 0x00080000 +#define NV30_3D_RC_FINAL0_B_INPUT_TEXTURE1 0x00090000 +#define NV30_3D_RC_FINAL0_B_INPUT_TEXTURE2 0x000a0000 +#define NV30_3D_RC_FINAL0_B_INPUT_TEXTURE3 0x000b0000 +#define NV30_3D_RC_FINAL0_B_INPUT_SPARE0 0x000c0000 +#define NV30_3D_RC_FINAL0_B_INPUT_SPARE1 0x000d0000 +#define NV30_3D_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000e0000 +#define NV30_3D_RC_FINAL0_B_INPUT_E_TIMES_F 0x000f0000 +#define NV30_3D_RC_FINAL0_B_COMPONENT_USAGE__MASK 0x00100000 +#define NV30_3D_RC_FINAL0_B_COMPONENT_USAGE__SHIFT 20 +#define NV30_3D_RC_FINAL0_B_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL0_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV30_3D_RC_FINAL0_B_MAPPING__MASK 0x00e00000 +#define NV30_3D_RC_FINAL0_B_MAPPING__SHIFT 21 +#define NV30_3D_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT 0x00200000 +#define NV30_3D_RC_FINAL0_B_MAPPING_EXPAND_NORMAL 0x00400000 +#define NV30_3D_RC_FINAL0_B_MAPPING_EXPAND_NEGATE 0x00600000 +#define NV30_3D_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL 0x00800000 +#define NV30_3D_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE 0x00a00000 +#define NV30_3D_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY 0x00c00000 +#define NV30_3D_RC_FINAL0_B_MAPPING_SIGNED_NEGATE 0x00e00000 +#define NV30_3D_RC_FINAL0_A_INPUT__MASK 0x0f000000 +#define NV30_3D_RC_FINAL0_A_INPUT__SHIFT 24 +#define NV30_3D_RC_FINAL0_A_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL0_A_INPUT_CONSTANT_COLOR0 0x01000000 +#define NV30_3D_RC_FINAL0_A_INPUT_CONSTANT_COLOR1 0x02000000 +#define NV30_3D_RC_FINAL0_A_INPUT_FOG 0x03000000 +#define NV30_3D_RC_FINAL0_A_INPUT_PRIMARY_COLOR 0x04000000 +#define NV30_3D_RC_FINAL0_A_INPUT_SECONDARY_COLOR 0x05000000 +#define NV30_3D_RC_FINAL0_A_INPUT_TEXTURE0 0x08000000 +#define NV30_3D_RC_FINAL0_A_INPUT_TEXTURE1 0x09000000 +#define NV30_3D_RC_FINAL0_A_INPUT_TEXTURE2 0x0a000000 +#define NV30_3D_RC_FINAL0_A_INPUT_TEXTURE3 0x0b000000 +#define NV30_3D_RC_FINAL0_A_INPUT_SPARE0 0x0c000000 +#define NV30_3D_RC_FINAL0_A_INPUT_SPARE1 0x0d000000 +#define NV30_3D_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0e000000 +#define NV30_3D_RC_FINAL0_A_INPUT_E_TIMES_F 0x0f000000 +#define NV30_3D_RC_FINAL0_A_COMPONENT_USAGE__MASK 0x10000000 +#define NV30_3D_RC_FINAL0_A_COMPONENT_USAGE__SHIFT 28 +#define NV30_3D_RC_FINAL0_A_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL0_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV30_3D_RC_FINAL0_A_MAPPING__MASK 0xe0000000 +#define NV30_3D_RC_FINAL0_A_MAPPING__SHIFT 29 +#define NV30_3D_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT 0x20000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_EXPAND_NORMAL 0x40000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_EXPAND_NEGATE 0x60000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL 0x80000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE 0xa0000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY 0xc0000000 +#define NV30_3D_RC_FINAL0_A_MAPPING_SIGNED_NEGATE 0xe0000000 + +#define NV30_3D_RC_FINAL1 0x000008f8 +#define NV30_3D_RC_FINAL1_COLOR_SUM_CLAMP 0x00000080 +#define NV30_3D_RC_FINAL1_G_INPUT__MASK 0x00000f00 +#define NV30_3D_RC_FINAL1_G_INPUT__SHIFT 8 +#define NV30_3D_RC_FINAL1_G_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL1_G_INPUT_CONSTANT_COLOR0 0x00000100 +#define NV30_3D_RC_FINAL1_G_INPUT_CONSTANT_COLOR1 0x00000200 +#define NV30_3D_RC_FINAL1_G_INPUT_FOG 0x00000300 +#define NV30_3D_RC_FINAL1_G_INPUT_PRIMARY_COLOR 0x00000400 +#define NV30_3D_RC_FINAL1_G_INPUT_SECONDARY_COLOR 0x00000500 +#define NV30_3D_RC_FINAL1_G_INPUT_TEXTURE0 0x00000800 +#define NV30_3D_RC_FINAL1_G_INPUT_TEXTURE1 0x00000900 +#define NV30_3D_RC_FINAL1_G_INPUT_TEXTURE2 0x00000a00 +#define NV30_3D_RC_FINAL1_G_INPUT_TEXTURE3 0x00000b00 +#define NV30_3D_RC_FINAL1_G_INPUT_SPARE0 0x00000c00 +#define NV30_3D_RC_FINAL1_G_INPUT_SPARE1 0x00000d00 +#define NV30_3D_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00 +#define NV30_3D_RC_FINAL1_G_INPUT_E_TIMES_F 0x00000f00 +#define NV30_3D_RC_FINAL1_G_COMPONENT_USAGE__MASK 0x00001000 +#define NV30_3D_RC_FINAL1_G_COMPONENT_USAGE__SHIFT 12 +#define NV30_3D_RC_FINAL1_G_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL1_G_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV30_3D_RC_FINAL1_G_MAPPING__MASK 0x0000e000 +#define NV30_3D_RC_FINAL1_G_MAPPING__SHIFT 13 +#define NV30_3D_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT 0x00002000 +#define NV30_3D_RC_FINAL1_G_MAPPING_EXPAND_NORMAL 0x00004000 +#define NV30_3D_RC_FINAL1_G_MAPPING_EXPAND_NEGATE 0x00006000 +#define NV30_3D_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL 0x00008000 +#define NV30_3D_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE 0x0000a000 +#define NV30_3D_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY 0x0000c000 +#define NV30_3D_RC_FINAL1_G_MAPPING_SIGNED_NEGATE 0x0000e000 +#define NV30_3D_RC_FINAL1_F_INPUT__MASK 0x000f0000 +#define NV30_3D_RC_FINAL1_F_INPUT__SHIFT 16 +#define NV30_3D_RC_FINAL1_F_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL1_F_INPUT_CONSTANT_COLOR0 0x00010000 +#define NV30_3D_RC_FINAL1_F_INPUT_CONSTANT_COLOR1 0x00020000 +#define NV30_3D_RC_FINAL1_F_INPUT_FOG 0x00030000 +#define NV30_3D_RC_FINAL1_F_INPUT_PRIMARY_COLOR 0x00040000 +#define NV30_3D_RC_FINAL1_F_INPUT_SECONDARY_COLOR 0x00050000 +#define NV30_3D_RC_FINAL1_F_INPUT_TEXTURE0 0x00080000 +#define NV30_3D_RC_FINAL1_F_INPUT_TEXTURE1 0x00090000 +#define NV30_3D_RC_FINAL1_F_INPUT_TEXTURE2 0x000a0000 +#define NV30_3D_RC_FINAL1_F_INPUT_TEXTURE3 0x000b0000 +#define NV30_3D_RC_FINAL1_F_INPUT_SPARE0 0x000c0000 +#define NV30_3D_RC_FINAL1_F_INPUT_SPARE1 0x000d0000 +#define NV30_3D_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000e0000 +#define NV30_3D_RC_FINAL1_F_INPUT_E_TIMES_F 0x000f0000 +#define NV30_3D_RC_FINAL1_F_COMPONENT_USAGE__MASK 0x00100000 +#define NV30_3D_RC_FINAL1_F_COMPONENT_USAGE__SHIFT 20 +#define NV30_3D_RC_FINAL1_F_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL1_F_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV30_3D_RC_FINAL1_F_MAPPING__MASK 0x00e00000 +#define NV30_3D_RC_FINAL1_F_MAPPING__SHIFT 21 +#define NV30_3D_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT 0x00200000 +#define NV30_3D_RC_FINAL1_F_MAPPING_EXPAND_NORMAL 0x00400000 +#define NV30_3D_RC_FINAL1_F_MAPPING_EXPAND_NEGATE 0x00600000 +#define NV30_3D_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL 0x00800000 +#define NV30_3D_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE 0x00a00000 +#define NV30_3D_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY 0x00c00000 +#define NV30_3D_RC_FINAL1_F_MAPPING_SIGNED_NEGATE 0x00e00000 +#define NV30_3D_RC_FINAL1_E_INPUT__MASK 0x0f000000 +#define NV30_3D_RC_FINAL1_E_INPUT__SHIFT 24 +#define NV30_3D_RC_FINAL1_E_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_FINAL1_E_INPUT_CONSTANT_COLOR0 0x01000000 +#define NV30_3D_RC_FINAL1_E_INPUT_CONSTANT_COLOR1 0x02000000 +#define NV30_3D_RC_FINAL1_E_INPUT_FOG 0x03000000 +#define NV30_3D_RC_FINAL1_E_INPUT_PRIMARY_COLOR 0x04000000 +#define NV30_3D_RC_FINAL1_E_INPUT_SECONDARY_COLOR 0x05000000 +#define NV30_3D_RC_FINAL1_E_INPUT_TEXTURE0 0x08000000 +#define NV30_3D_RC_FINAL1_E_INPUT_TEXTURE1 0x09000000 +#define NV30_3D_RC_FINAL1_E_INPUT_TEXTURE2 0x0a000000 +#define NV30_3D_RC_FINAL1_E_INPUT_TEXTURE3 0x0b000000 +#define NV30_3D_RC_FINAL1_E_INPUT_SPARE0 0x0c000000 +#define NV30_3D_RC_FINAL1_E_INPUT_SPARE1 0x0d000000 +#define NV30_3D_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0e000000 +#define NV30_3D_RC_FINAL1_E_INPUT_E_TIMES_F 0x0f000000 +#define NV30_3D_RC_FINAL1_E_COMPONENT_USAGE__MASK 0x10000000 +#define NV30_3D_RC_FINAL1_E_COMPONENT_USAGE__SHIFT 28 +#define NV30_3D_RC_FINAL1_E_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_FINAL1_E_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV30_3D_RC_FINAL1_E_MAPPING__MASK 0xe0000000 +#define NV30_3D_RC_FINAL1_E_MAPPING__SHIFT 29 +#define NV30_3D_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT 0x20000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_EXPAND_NORMAL 0x40000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_EXPAND_NEGATE 0x60000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL 0x80000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE 0xa0000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY 0xc0000000 +#define NV30_3D_RC_FINAL1_E_MAPPING_SIGNED_NEGATE 0xe0000000 + +#define NV30_3D_RC_ENABLE 0x000008fc +#define NV30_3D_RC_ENABLE_NUM_COMBINERS__MASK 0x0000000f +#define NV30_3D_RC_ENABLE_NUM_COMBINERS__SHIFT 0 +#define NV30_3D_RC_ENABLE_STAGE_CONSTANT_COLOR0 0x0000f000 +#define NV30_3D_RC_ENABLE_STAGE_CONSTANT_COLOR1 0x000f0000 + + +#define NV30_3D_RC_IN_ALPHA(i0) (0x00000900 + 0x20*(i0)) +#define NV30_3D_RC_IN_ALPHA_D_INPUT__MASK 0x0000000f +#define NV30_3D_RC_IN_ALPHA_D_INPUT__SHIFT 0 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0 0x00000001 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1 0x00000002 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_FOG 0x00000003 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR 0x00000004 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR 0x00000005 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_TEXTURE0 0x00000008 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_TEXTURE1 0x00000009 +#define NV30_3D_RC_IN_ALPHA_D_INPUT_TEXTURE2 0x0000000a +#define NV30_3D_RC_IN_ALPHA_D_INPUT_TEXTURE3 0x0000000b +#define NV30_3D_RC_IN_ALPHA_D_INPUT_SPARE0 0x0000000c +#define NV30_3D_RC_IN_ALPHA_D_INPUT_SPARE1 0x0000000d +#define NV30_3D_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0000000e +#define NV30_3D_RC_IN_ALPHA_D_INPUT_E_TIMES_F 0x0000000f +#define NV30_3D_RC_IN_ALPHA_D_COMPONENT_USAGE__MASK 0x00000010 +#define NV30_3D_RC_IN_ALPHA_D_COMPONENT_USAGE__SHIFT 4 +#define NV30_3D_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE 0x00000000 +#define NV30_3D_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING__MASK 0x000000e0 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING__SHIFT 5 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT 0x00000020 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL 0x00000040 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE 0x00000060 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL 0x00000080 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE 0x000000a0 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY 0x000000c0 +#define NV30_3D_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE 0x000000e0 +#define NV30_3D_RC_IN_ALPHA_C_INPUT__MASK 0x00000f00 +#define NV30_3D_RC_IN_ALPHA_C_INPUT__SHIFT 8 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0 0x00000100 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1 0x00000200 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_FOG 0x00000300 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR 0x00000400 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR 0x00000500 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_TEXTURE0 0x00000800 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_TEXTURE1 0x00000900 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_TEXTURE2 0x00000a00 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_TEXTURE3 0x00000b00 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_SPARE0 0x00000c00 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_SPARE1 0x00000d00 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00 +#define NV30_3D_RC_IN_ALPHA_C_INPUT_E_TIMES_F 0x00000f00 +#define NV30_3D_RC_IN_ALPHA_C_COMPONENT_USAGE__MASK 0x00001000 +#define NV30_3D_RC_IN_ALPHA_C_COMPONENT_USAGE__SHIFT 12 +#define NV30_3D_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE 0x00000000 +#define NV30_3D_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING__MASK 0x0000e000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING__SHIFT 13 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT 0x00002000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL 0x00004000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE 0x00006000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL 0x00008000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE 0x0000a000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY 0x0000c000 +#define NV30_3D_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE 0x0000e000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT__MASK 0x000f0000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT__SHIFT 16 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0 0x00010000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1 0x00020000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_FOG 0x00030000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR 0x00040000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR 0x00050000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_TEXTURE0 0x00080000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_TEXTURE1 0x00090000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_TEXTURE2 0x000a0000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_TEXTURE3 0x000b0000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_SPARE0 0x000c0000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_SPARE1 0x000d0000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000e0000 +#define NV30_3D_RC_IN_ALPHA_B_INPUT_E_TIMES_F 0x000f0000 +#define NV30_3D_RC_IN_ALPHA_B_COMPONENT_USAGE__MASK 0x00100000 +#define NV30_3D_RC_IN_ALPHA_B_COMPONENT_USAGE__SHIFT 20 +#define NV30_3D_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE 0x00000000 +#define NV30_3D_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING__MASK 0x00e00000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING__SHIFT 21 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT 0x00200000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL 0x00400000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE 0x00600000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL 0x00800000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE 0x00a00000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY 0x00c00000 +#define NV30_3D_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE 0x00e00000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT__MASK 0x0f000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT__SHIFT 24 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0 0x01000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1 0x02000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_FOG 0x03000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR 0x04000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR 0x05000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_TEXTURE0 0x08000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_TEXTURE1 0x09000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_TEXTURE2 0x0a000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_TEXTURE3 0x0b000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_SPARE0 0x0c000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_SPARE1 0x0d000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0e000000 +#define NV30_3D_RC_IN_ALPHA_A_INPUT_E_TIMES_F 0x0f000000 +#define NV30_3D_RC_IN_ALPHA_A_COMPONENT_USAGE__MASK 0x10000000 +#define NV30_3D_RC_IN_ALPHA_A_COMPONENT_USAGE__SHIFT 28 +#define NV30_3D_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE 0x00000000 +#define NV30_3D_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING__MASK 0xe0000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING__SHIFT 29 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT 0x20000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL 0x40000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE 0x60000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL 0x80000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE 0xa0000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY 0xc0000000 +#define NV30_3D_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE 0xe0000000 + +#define NV30_3D_RC_IN_RGB(i0) (0x00000904 + 0x20*(i0)) +#define NV30_3D_RC_IN_RGB_D_INPUT__MASK 0x0000000f +#define NV30_3D_RC_IN_RGB_D_INPUT__SHIFT 0 +#define NV30_3D_RC_IN_RGB_D_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0 0x00000001 +#define NV30_3D_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1 0x00000002 +#define NV30_3D_RC_IN_RGB_D_INPUT_FOG 0x00000003 +#define NV30_3D_RC_IN_RGB_D_INPUT_PRIMARY_COLOR 0x00000004 +#define NV30_3D_RC_IN_RGB_D_INPUT_SECONDARY_COLOR 0x00000005 +#define NV30_3D_RC_IN_RGB_D_INPUT_TEXTURE0 0x00000008 +#define NV30_3D_RC_IN_RGB_D_INPUT_TEXTURE1 0x00000009 +#define NV30_3D_RC_IN_RGB_D_INPUT_TEXTURE2 0x0000000a +#define NV30_3D_RC_IN_RGB_D_INPUT_TEXTURE3 0x0000000b +#define NV30_3D_RC_IN_RGB_D_INPUT_SPARE0 0x0000000c +#define NV30_3D_RC_IN_RGB_D_INPUT_SPARE1 0x0000000d +#define NV30_3D_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0000000e +#define NV30_3D_RC_IN_RGB_D_INPUT_E_TIMES_F 0x0000000f +#define NV30_3D_RC_IN_RGB_D_COMPONENT_USAGE__MASK 0x00000010 +#define NV30_3D_RC_IN_RGB_D_COMPONENT_USAGE__SHIFT 4 +#define NV30_3D_RC_IN_RGB_D_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV30_3D_RC_IN_RGB_D_MAPPING__MASK 0x000000e0 +#define NV30_3D_RC_IN_RGB_D_MAPPING__SHIFT 5 +#define NV30_3D_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT 0x00000020 +#define NV30_3D_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL 0x00000040 +#define NV30_3D_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE 0x00000060 +#define NV30_3D_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL 0x00000080 +#define NV30_3D_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE 0x000000a0 +#define NV30_3D_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY 0x000000c0 +#define NV30_3D_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE 0x000000e0 +#define NV30_3D_RC_IN_RGB_C_INPUT__MASK 0x00000f00 +#define NV30_3D_RC_IN_RGB_C_INPUT__SHIFT 8 +#define NV30_3D_RC_IN_RGB_C_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0 0x00000100 +#define NV30_3D_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1 0x00000200 +#define NV30_3D_RC_IN_RGB_C_INPUT_FOG 0x00000300 +#define NV30_3D_RC_IN_RGB_C_INPUT_PRIMARY_COLOR 0x00000400 +#define NV30_3D_RC_IN_RGB_C_INPUT_SECONDARY_COLOR 0x00000500 +#define NV30_3D_RC_IN_RGB_C_INPUT_TEXTURE0 0x00000800 +#define NV30_3D_RC_IN_RGB_C_INPUT_TEXTURE1 0x00000900 +#define NV30_3D_RC_IN_RGB_C_INPUT_TEXTURE2 0x00000a00 +#define NV30_3D_RC_IN_RGB_C_INPUT_TEXTURE3 0x00000b00 +#define NV30_3D_RC_IN_RGB_C_INPUT_SPARE0 0x00000c00 +#define NV30_3D_RC_IN_RGB_C_INPUT_SPARE1 0x00000d00 +#define NV30_3D_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00 +#define NV30_3D_RC_IN_RGB_C_INPUT_E_TIMES_F 0x00000f00 +#define NV30_3D_RC_IN_RGB_C_COMPONENT_USAGE__MASK 0x00001000 +#define NV30_3D_RC_IN_RGB_C_COMPONENT_USAGE__SHIFT 12 +#define NV30_3D_RC_IN_RGB_C_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV30_3D_RC_IN_RGB_C_MAPPING__MASK 0x0000e000 +#define NV30_3D_RC_IN_RGB_C_MAPPING__SHIFT 13 +#define NV30_3D_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT 0x00002000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL 0x00004000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE 0x00006000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL 0x00008000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE 0x0000a000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY 0x0000c000 +#define NV30_3D_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE 0x0000e000 +#define NV30_3D_RC_IN_RGB_B_INPUT__MASK 0x000f0000 +#define NV30_3D_RC_IN_RGB_B_INPUT__SHIFT 16 +#define NV30_3D_RC_IN_RGB_B_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0 0x00010000 +#define NV30_3D_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1 0x00020000 +#define NV30_3D_RC_IN_RGB_B_INPUT_FOG 0x00030000 +#define NV30_3D_RC_IN_RGB_B_INPUT_PRIMARY_COLOR 0x00040000 +#define NV30_3D_RC_IN_RGB_B_INPUT_SECONDARY_COLOR 0x00050000 +#define NV30_3D_RC_IN_RGB_B_INPUT_TEXTURE0 0x00080000 +#define NV30_3D_RC_IN_RGB_B_INPUT_TEXTURE1 0x00090000 +#define NV30_3D_RC_IN_RGB_B_INPUT_TEXTURE2 0x000a0000 +#define NV30_3D_RC_IN_RGB_B_INPUT_TEXTURE3 0x000b0000 +#define NV30_3D_RC_IN_RGB_B_INPUT_SPARE0 0x000c0000 +#define NV30_3D_RC_IN_RGB_B_INPUT_SPARE1 0x000d0000 +#define NV30_3D_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000e0000 +#define NV30_3D_RC_IN_RGB_B_INPUT_E_TIMES_F 0x000f0000 +#define NV30_3D_RC_IN_RGB_B_COMPONENT_USAGE__MASK 0x00100000 +#define NV30_3D_RC_IN_RGB_B_COMPONENT_USAGE__SHIFT 20 +#define NV30_3D_RC_IN_RGB_B_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV30_3D_RC_IN_RGB_B_MAPPING__MASK 0x00e00000 +#define NV30_3D_RC_IN_RGB_B_MAPPING__SHIFT 21 +#define NV30_3D_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT 0x00200000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL 0x00400000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE 0x00600000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL 0x00800000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE 0x00a00000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY 0x00c00000 +#define NV30_3D_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE 0x00e00000 +#define NV30_3D_RC_IN_RGB_A_INPUT__MASK 0x0f000000 +#define NV30_3D_RC_IN_RGB_A_INPUT__SHIFT 24 +#define NV30_3D_RC_IN_RGB_A_INPUT_ZERO 0x00000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0 0x01000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1 0x02000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_FOG 0x03000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_PRIMARY_COLOR 0x04000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_SECONDARY_COLOR 0x05000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_TEXTURE0 0x08000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_TEXTURE1 0x09000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_TEXTURE2 0x0a000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_TEXTURE3 0x0b000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_SPARE0 0x0c000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_SPARE1 0x0d000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0e000000 +#define NV30_3D_RC_IN_RGB_A_INPUT_E_TIMES_F 0x0f000000 +#define NV30_3D_RC_IN_RGB_A_COMPONENT_USAGE__MASK 0x10000000 +#define NV30_3D_RC_IN_RGB_A_COMPONENT_USAGE__SHIFT 28 +#define NV30_3D_RC_IN_RGB_A_COMPONENT_USAGE_RGB 0x00000000 +#define NV30_3D_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING__MASK 0xe0000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING__SHIFT 29 +#define NV30_3D_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY 0x00000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT 0x20000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL 0x40000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE 0x60000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL 0x80000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE 0xa0000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY 0xc0000000 +#define NV30_3D_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE 0xe0000000 + +#define NV30_3D_RC_CONSTANT_COLOR0(i0) (0x00000908 + 0x20*(i0)) +#define NV30_3D_RC_CONSTANT_COLOR0_B__MASK 0x000000ff +#define NV30_3D_RC_CONSTANT_COLOR0_B__SHIFT 0 +#define NV30_3D_RC_CONSTANT_COLOR0_G__MASK 0x0000ff00 +#define NV30_3D_RC_CONSTANT_COLOR0_G__SHIFT 8 +#define NV30_3D_RC_CONSTANT_COLOR0_R__MASK 0x00ff0000 +#define NV30_3D_RC_CONSTANT_COLOR0_R__SHIFT 16 +#define NV30_3D_RC_CONSTANT_COLOR0_A__MASK 0xff000000 +#define NV30_3D_RC_CONSTANT_COLOR0_A__SHIFT 24 + +#define NV30_3D_RC_CONSTANT_COLOR1(i0) (0x0000090c + 0x20*(i0)) +#define NV30_3D_RC_CONSTANT_COLOR1_B__MASK 0x000000ff +#define NV30_3D_RC_CONSTANT_COLOR1_B__SHIFT 0 +#define NV30_3D_RC_CONSTANT_COLOR1_G__MASK 0x0000ff00 +#define NV30_3D_RC_CONSTANT_COLOR1_G__SHIFT 8 +#define NV30_3D_RC_CONSTANT_COLOR1_R__MASK 0x00ff0000 +#define NV30_3D_RC_CONSTANT_COLOR1_R__SHIFT 16 +#define NV30_3D_RC_CONSTANT_COLOR1_A__MASK 0xff000000 +#define NV30_3D_RC_CONSTANT_COLOR1_A__SHIFT 24 + +#define NV30_3D_RC_OUT_ALPHA(i0) (0x00000910 + 0x20*(i0)) +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT__MASK 0x0000000f +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT__SHIFT 0 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_ZERO 0x00000000 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0 0x00000001 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1 0x00000002 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_FOG 0x00000003 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR 0x00000004 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR 0x00000005 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0 0x00000008 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1 0x00000009 +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE2 0x0000000a +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE3 0x0000000b +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_SPARE0 0x0000000c +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_SPARE1 0x0000000d +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0000000e +#define NV30_3D_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F 0x0000000f +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT__MASK 0x000000f0 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT__SHIFT 4 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_ZERO 0x00000000 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0 0x00000010 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1 0x00000020 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_FOG 0x00000030 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR 0x00000040 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR 0x00000050 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0 0x00000080 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1 0x00000090 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE2 0x000000a0 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE3 0x000000b0 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_SPARE0 0x000000c0 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_SPARE1 0x000000d0 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000000e0 +#define NV30_3D_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F 0x000000f0 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT__MASK 0x00000f00 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT__SHIFT 8 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_ZERO 0x00000000 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0 0x00000100 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1 0x00000200 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_FOG 0x00000300 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR 0x00000400 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR 0x00000500 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0 0x00000800 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1 0x00000900 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE2 0x00000a00 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE3 0x00000b00 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0 0x00000c00 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1 0x00000d00 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00 +#define NV30_3D_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F 0x00000f00 +#define NV30_3D_RC_OUT_ALPHA_CD_DOT_PRODUCT 0x00001000 +#define NV30_3D_RC_OUT_ALPHA_AB_DOT_PRODUCT 0x00002000 +#define NV30_3D_RC_OUT_ALPHA_MUX_SUM 0x00004000 +#define NV30_3D_RC_OUT_ALPHA_BIAS__MASK 0x00008000 +#define NV30_3D_RC_OUT_ALPHA_BIAS__SHIFT 15 +#define NV30_3D_RC_OUT_ALPHA_BIAS_NONE 0x00000000 +#define NV30_3D_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF 0x00008000 +#define NV30_3D_RC_OUT_ALPHA_SCALE__MASK 0x00030000 +#define NV30_3D_RC_OUT_ALPHA_SCALE__SHIFT 16 +#define NV30_3D_RC_OUT_ALPHA_SCALE_NONE 0x00000000 +#define NV30_3D_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO 0x00010000 +#define NV30_3D_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR 0x00020000 +#define NV30_3D_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF 0x00030000 + +#define NV30_3D_RC_OUT_RGB(i0) (0x00000914 + 0x20*(i0)) +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT__MASK 0x0000000f +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT__SHIFT 0 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_ZERO 0x00000000 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0 0x00000001 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1 0x00000002 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_FOG 0x00000003 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR 0x00000004 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR 0x00000005 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE0 0x00000008 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE1 0x00000009 +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE2 0x0000000a +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE3 0x0000000b +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_SPARE0 0x0000000c +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_SPARE1 0x0000000d +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0000000e +#define NV30_3D_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F 0x0000000f +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT__MASK 0x000000f0 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT__SHIFT 4 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_ZERO 0x00000000 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0 0x00000010 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1 0x00000020 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_FOG 0x00000030 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR 0x00000040 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR 0x00000050 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE0 0x00000080 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE1 0x00000090 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE2 0x000000a0 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE3 0x000000b0 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_SPARE0 0x000000c0 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_SPARE1 0x000000d0 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000000e0 +#define NV30_3D_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F 0x000000f0 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT__MASK 0x00000f00 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT__SHIFT 8 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_ZERO 0x00000000 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0 0x00000100 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1 0x00000200 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_FOG 0x00000300 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR 0x00000400 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR 0x00000500 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0 0x00000800 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1 0x00000900 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE2 0x00000a00 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE3 0x00000b00 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE0 0x00000c00 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE1 0x00000d00 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00 +#define NV30_3D_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F 0x00000f00 +#define NV30_3D_RC_OUT_RGB_CD_DOT_PRODUCT 0x00001000 +#define NV30_3D_RC_OUT_RGB_AB_DOT_PRODUCT 0x00002000 +#define NV30_3D_RC_OUT_RGB_MUX_SUM 0x00004000 +#define NV30_3D_RC_OUT_RGB_BIAS__MASK 0x00008000 +#define NV30_3D_RC_OUT_RGB_BIAS__SHIFT 15 +#define NV30_3D_RC_OUT_RGB_BIAS_NONE 0x00000000 +#define NV30_3D_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF 0x00008000 +#define NV30_3D_RC_OUT_RGB_SCALE__MASK 0x00030000 +#define NV30_3D_RC_OUT_RGB_SCALE__SHIFT 16 +#define NV30_3D_RC_OUT_RGB_SCALE_NONE 0x00000000 +#define NV30_3D_RC_OUT_RGB_SCALE_SCALE_BY_TWO 0x00010000 +#define NV30_3D_RC_OUT_RGB_SCALE_SCALE_BY_FOUR 0x00020000 +#define NV30_3D_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF 0x00030000 + +#define NV30_3D_VIEWPORT_HORIZ 0x00000a00 +#define NV30_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff +#define NV30_3D_VIEWPORT_HORIZ_X__SHIFT 0 +#define NV30_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000 +#define NV30_3D_VIEWPORT_HORIZ_W__SHIFT 16 + +#define NV30_3D_VIEWPORT_VERT 0x00000a04 +#define NV30_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff +#define NV30_3D_VIEWPORT_VERT_Y__SHIFT 0 +#define NV30_3D_VIEWPORT_VERT_H__MASK 0xffff0000 +#define NV30_3D_VIEWPORT_VERT_H__SHIFT 16 + +#define NV30_3D_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION 0x00000a10 + + +#define NV30_3D_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x00000a10 + +#define NV30_3D_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x00000a14 + +#define NV30_3D_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x00000a18 + +#define NV30_3D_VIEWPORT_TRANSLATE 0x00000a20 + + +#define NV30_3D_VIEWPORT_TRANSLATE_X 0x00000a20 + +#define NV30_3D_VIEWPORT_TRANSLATE_Y 0x00000a24 + +#define NV30_3D_VIEWPORT_TRANSLATE_Z 0x00000a28 + +#define NV30_3D_VIEWPORT_TRANSLATE_W 0x00000a2c + +#define NV30_3D_VIEWPORT_SCALE 0x00000a30 + + +#define NV30_3D_VIEWPORT_SCALE_X 0x00000a30 + +#define NV30_3D_VIEWPORT_SCALE_Y 0x00000a34 + +#define NV30_3D_VIEWPORT_SCALE_Z 0x00000a38 + +#define NV30_3D_VIEWPORT_SCALE_W 0x00000a3c + +#define NV30_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000a60 + +#define NV30_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000a64 + +#define NV30_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000a68 + +#define NV30_3D_DEPTH_FUNC 0x00000a6c +#define NV30_3D_DEPTH_FUNC_NEVER 0x00000200 +#define NV30_3D_DEPTH_FUNC_LESS 0x00000201 +#define NV30_3D_DEPTH_FUNC_EQUAL 0x00000202 +#define NV30_3D_DEPTH_FUNC_LEQUAL 0x00000203 +#define NV30_3D_DEPTH_FUNC_GREATER 0x00000204 +#define NV30_3D_DEPTH_FUNC_NOTEQUAL 0x00000205 +#define NV30_3D_DEPTH_FUNC_GEQUAL 0x00000206 +#define NV30_3D_DEPTH_FUNC_ALWAYS 0x00000207 + +#define NV30_3D_DEPTH_WRITE_ENABLE 0x00000a70 + +#define NV30_3D_DEPTH_TEST_ENABLE 0x00000a74 + +#define NV30_3D_POLYGON_OFFSET_FACTOR 0x00000a78 + +#define NV30_3D_POLYGON_OFFSET_UNITS 0x00000a7c + +#define NV30_3D_VTX_ATTR_3I_XY(i0) (0x00000a80 + 0x8*(i0)) +#define NV30_3D_VTX_ATTR_3I_XY__ESIZE 0x00000008 +#define NV30_3D_VTX_ATTR_3I_XY__LEN 0x00000010 +#define NV30_3D_VTX_ATTR_3I_XY_X__MASK 0x0000ffff +#define NV30_3D_VTX_ATTR_3I_XY_X__SHIFT 0 +#define NV30_3D_VTX_ATTR_3I_XY_Y__MASK 0xffff0000 +#define NV30_3D_VTX_ATTR_3I_XY_Y__SHIFT 16 + +#define NV30_3D_VTX_ATTR_3I_Z(i0) (0x00000a84 + 0x8*(i0)) +#define NV30_3D_VTX_ATTR_3I_Z__ESIZE 0x00000008 +#define NV30_3D_VTX_ATTR_3I_Z__LEN 0x00000010 +#define NV30_3D_VTX_ATTR_3I_Z_Z__MASK 0x0000ffff +#define NV30_3D_VTX_ATTR_3I_Z_Z__SHIFT 0 + +#define NV30_3D_TEX_FILTER_OPTIMIZATION(i0) (0x00000b00 + 0x4*(i0)) +#define NV30_3D_TEX_FILTER_OPTIMIZATION__ESIZE 0x00000004 +#define NV30_3D_TEX_FILTER_OPTIMIZATION__LEN 0x00000004 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR__MASK 0x0000001f +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR__SHIFT 0 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR_OFF 0x00000000 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR_HIGH_QUALITY 0x00000004 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR_QUALITY 0x00000006 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR_PERFORMANCE 0x00000008 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_TRILINEAR_HIGH_PERFORMANCE 0x00000018 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_ANISO_SAMPLE__MASK 0x000001c0 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_ANISO_SAMPLE__SHIFT 6 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_ANISO_SAMPLE_OFF 0x00000000 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_ANISO_SAMPLE_HIGH_QUALITY 0x000000c0 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_ANISO_SAMPLE_QUALITY 0x000001c0 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_ANISO_SAMPLE_PERFORMANCE 0x00000140 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_UNKNOWN__MASK 0x00007c00 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_UNKNOWN__SHIFT 10 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_UNKNOWN_OFF 0x00000000 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_UNKNOWN_PARTIAL 0x00002c00 +#define NV40_3D_TEX_FILTER_OPTIMIZATION_UNKNOWN_FULL 0x00007c00 + +#define NV40_3D_UNK0B40(i0) (0x00000b40 + 0x4*(i0)) +#define NV40_3D_UNK0B40__ESIZE 0x00000004 +#define NV40_3D_UNK0B40__LEN 0x00000008 + +#define NV30_3D_VP_UPLOAD_INST(i0) (0x00000b80 + 0x4*(i0)) +#define NV30_3D_VP_UPLOAD_INST__ESIZE 0x00000004 +#define NV30_3D_VP_UPLOAD_INST__LEN 0x00000004 + + +#define NV30_3D_TEX_CLIP_PLANE(i0) (0x00000e00 + 0x10*(i0)) + + +#define NV30_3D_TEX_CLIP_PLANE_X(i0) (0x00000e00 + 0x10*(i0)) + +#define NV30_3D_TEX_CLIP_PLANE_Y(i0) (0x00000e04 + 0x10*(i0)) + +#define NV30_3D_TEX_CLIP_PLANE_Z(i0) (0x00000e08 + 0x10*(i0)) + +#define NV30_3D_TEX_CLIP_PLANE_W(i0) (0x00000e0c + 0x10*(i0)) + +#define NV30_3D_LIGHT 0x00001000 + + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT(i0) (0x00001000 + 0x40*(i0)) + + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(i0) (0x00001000 + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(i0) (0x00001004 + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(i0) (0x00001008 + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE(i0) (0x0000100c + 0x40*(i0)) + + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(i0) (0x0000100c + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(i0) (0x00001010 + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(i0) (0x00001014 + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR(i0) (0x00001018 + 0x40*(i0)) + + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(i0) (0x00001018 + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(i0) (0x0000101c + 0x40*(i0)) + +#define NV30_3D_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(i0) (0x00001020 + 0x40*(i0)) + +#define NV30_3D_LIGHT_UNK24(i0) (0x00001024 + 0x40*(i0)) + +#define NV30_3D_LIGHT_HALF_VECTOR(i0) (0x00001028 + 0x40*(i0)) + + +#define NV30_3D_LIGHT_HALF_VECTOR_X(i0) (0x00001028 + 0x40*(i0)) + +#define NV30_3D_LIGHT_HALF_VECTOR_Y(i0) (0x0000102c + 0x40*(i0)) + +#define NV30_3D_LIGHT_HALF_VECTOR_Z(i0) (0x00001030 + 0x40*(i0)) + +#define NV30_3D_LIGHT_DIRECTION(i0) (0x00001034 + 0x40*(i0)) + + +#define NV30_3D_LIGHT_DIRECTION_X(i0) (0x00001034 + 0x40*(i0)) + +#define NV30_3D_LIGHT_DIRECTION_Y(i0) (0x00001038 + 0x40*(i0)) + +#define NV30_3D_LIGHT_DIRECTION_Z(i0) (0x0000103c + 0x40*(i0)) + + +#define NV30_3D_LIGHT_SPOT_CUTOFF_A(i0) (0x00001200 + 0x40*(i0)) + +#define NV30_3D_LIGHT_SPOT_CUTOFF_B(i0) (0x00001204 + 0x40*(i0)) + +#define NV30_3D_LIGHT_SPOT_CUTOFF_C(i0) (0x00001208 + 0x40*(i0)) + +#define NV30_3D_LIGHT_SPOT_DIR(i0) (0x0000120c + 0x40*(i0)) + + +#define NV30_3D_LIGHT_SPOT_DIR_X(i0) (0x0000120c + 0x40*(i0)) + +#define NV30_3D_LIGHT_SPOT_DIR_Y(i0) (0x00001210 + 0x40*(i0)) + +#define NV30_3D_LIGHT_SPOT_DIR_Z(i0) (0x00001214 + 0x40*(i0)) + +#define NV30_3D_LIGHT_SPOT_CUTOFF_D(i0) (0x00001218 + 0x40*(i0)) + +#define NV30_3D_LIGHT_POSITION(i0) (0x0000121c + 0x40*(i0)) + + +#define NV30_3D_LIGHT_POSITION_X(i0) (0x0000121c + 0x40*(i0)) + +#define NV30_3D_LIGHT_POSITION_Y(i0) (0x00001220 + 0x40*(i0)) + +#define NV30_3D_LIGHT_POSITION_Z(i0) (0x00001224 + 0x40*(i0)) + +#define NV30_3D_LIGHT_ATTENUATION(i0) (0x00001228 + 0x40*(i0)) + +#define NV30_3D_LIGHT_ATTENUATION_CONSTANT(i0) (0x00001228 + 0x40*(i0)) + +#define NV30_3D_LIGHT_ATTENUATION_LINEAR(i0) (0x0000122c + 0x40*(i0)) + +#define NV30_3D_LIGHT_ATTENUATION_QUADRATIC(i0) (0x00001230 + 0x40*(i0)) + +#define NV30_3D_FRONT_MATERIAL_SHININESS(i0) (0x00001400 + 0x4*(i0)) +#define NV30_3D_FRONT_MATERIAL_SHININESS__ESIZE 0x00000004 +#define NV30_3D_FRONT_MATERIAL_SHININESS__LEN 0x00000006 + +#define NV30_3D_ENABLED_LIGHTS 0x00001420 + +#define NV30_3D_VERTEX_TWO_SIDE_ENABLE 0x0000142c + +#define NV30_3D_FP_REG_CONTROL 0x00001450 +#define NV30_3D_FP_REG_CONTROL_UNK0__MASK 0x0000ffff +#define NV30_3D_FP_REG_CONTROL_UNK0__SHIFT 0 +#define NV30_3D_FP_REG_CONTROL_UNK1__MASK 0xffff0000 +#define NV30_3D_FP_REG_CONTROL_UNK1__SHIFT 16 + +#define NV30_3D_FLATSHADE_FIRST 0x00001454 + +#define NV30_3D_EDGEFLAG 0x0000145c +#define NV30_3D_EDGEFLAG_ENABLE 0x00000001 + +#define NV30_3D_VP_CLIP_PLANES_ENABLE 0x00001478 +#define NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0 0x00000002 +#define NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE1 0x00000020 +#define NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE2 0x00000200 +#define NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE3 0x00002000 +#define NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE4 0x00020000 +#define NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE5 0x00200000 + +#define NV30_3D_POLYGON_STIPPLE_ENABLE 0x0000147c + +#define NV30_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001480 + 0x4*(i0)) +#define NV30_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004 +#define NV30_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020 + +#define NV30_3D_VTX_ATTR_3F(i0) (0x00001500 + 0x10*(i0)) +#define NV30_3D_VTX_ATTR_3F__ESIZE 0x00000010 +#define NV30_3D_VTX_ATTR_3F__LEN 0x00000010 + + +#define NV30_3D_VTX_ATTR_3F_X(i0) (0x00001500 + 0x10*(i0)) + +#define NV30_3D_VTX_ATTR_3F_Y(i0) (0x00001504 + 0x10*(i0)) + +#define NV30_3D_VTX_ATTR_3F_Z(i0) (0x00001508 + 0x10*(i0)) + + +#define NV30_3D_VP_CLIP_PLANE(i0, i1) (0x00001600 + 0x10*(i0) + 0x4*(i1)) +#define NV30_3D_VP_CLIP_PLANE__ESIZE 0x00000004 +#define NV30_3D_VP_CLIP_PLANE__LEN 0x00000004 + +#define NV30_3D_VTXBUF(i0) (0x00001680 + 0x4*(i0)) +#define NV30_3D_VTXBUF__ESIZE 0x00000004 +#define NV30_3D_VTXBUF__LEN 0x00000010 +#define NV30_3D_VTXBUF_OFFSET__MASK 0x0fffffff +#define NV30_3D_VTXBUF_OFFSET__SHIFT 0 +#define NV30_3D_VTXBUF_DMA1 0x80000000 + +#define NV40_3D_VTX_CACHE_INVALIDATE 0x00001714 + +#define NV30_3D_VTXFMT(i0) (0x00001740 + 0x4*(i0)) +#define NV30_3D_VTXFMT__ESIZE 0x00000004 +#define NV30_3D_VTXFMT__LEN 0x00000010 +#define NV30_3D_VTXFMT_TYPE__MASK 0x0000000f +#define NV30_3D_VTXFMT_TYPE__SHIFT 0 +#define NV30_3D_VTXFMT_TYPE_B8G8R8A8_UNORM 0x00000000 +#define NV30_3D_VTXFMT_TYPE_V16_SNORM 0x00000001 +#define NV30_3D_VTXFMT_TYPE_V32_FLOAT 0x00000002 +#define NV30_3D_VTXFMT_TYPE_V16_FLOAT 0x00000003 +#define NV30_3D_VTXFMT_TYPE_U8_UNORM 0x00000004 +#define NV30_3D_VTXFMT_TYPE_V16_SSCALED 0x00000005 +#define NV30_3D_VTXFMT_TYPE_U8_USCALED 0x00000007 +#define NV30_3D_VTXFMT_SIZE__MASK 0x000000f0 +#define NV30_3D_VTXFMT_SIZE__SHIFT 4 +#define NV30_3D_VTXFMT_STRIDE__MASK 0x0000ff00 +#define NV30_3D_VTXFMT_STRIDE__SHIFT 8 + +#define NV30_3D_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION 0x000017a0 + + +#define NV30_3D_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000017a0 + +#define NV30_3D_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000017a4 + +#define NV30_3D_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000017a8 + +#define NV30_3D_COLOR_MATERIAL_BACK 0x000017b0 + + +#define NV30_3D_COLOR_MATERIAL_BACK_R 0x000017b0 + +#define NV30_3D_COLOR_MATERIAL_BACK_G 0x000017b4 + +#define NV30_3D_COLOR_MATERIAL_BACK_B 0x000017b8 + +#define NV30_3D_COLOR_MATERIAL_BACK_A 0x000017bc + +#define NV30_3D_QUERY_RESET 0x000017c8 + +#define NV30_3D_QUERY_ENABLE 0x000017cc + +#define NV30_3D_QUERY_GET 0x00001800 +#define NV30_3D_QUERY_GET_OFFSET__MASK 0x00ffffff +#define NV30_3D_QUERY_GET_OFFSET__SHIFT 0 +#define NV30_3D_QUERY_GET_UNK24__MASK 0xff000000 +#define NV30_3D_QUERY_GET_UNK24__SHIFT 24 + +#define NV30_3D_VERTEX_BEGIN_END 0x00001808 +#define NV30_3D_VERTEX_BEGIN_END_STOP 0x00000000 +#define NV30_3D_VERTEX_BEGIN_END_POINTS 0x00000001 +#define NV30_3D_VERTEX_BEGIN_END_LINES 0x00000002 +#define NV30_3D_VERTEX_BEGIN_END_LINE_LOOP 0x00000003 +#define NV30_3D_VERTEX_BEGIN_END_LINE_STRIP 0x00000004 +#define NV30_3D_VERTEX_BEGIN_END_TRIANGLES 0x00000005 +#define NV30_3D_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006 +#define NV30_3D_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007 +#define NV30_3D_VERTEX_BEGIN_END_QUADS 0x00000008 +#define NV30_3D_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009 +#define NV30_3D_VERTEX_BEGIN_END_POLYGON 0x0000000a + +#define NV30_3D_VB_ELEMENT_U16 0x0000180c +#define NV30_3D_VB_ELEMENT_U16_0__MASK 0x0000ffff +#define NV30_3D_VB_ELEMENT_U16_0__SHIFT 0 +#define NV30_3D_VB_ELEMENT_U16_1__MASK 0xffff0000 +#define NV30_3D_VB_ELEMENT_U16_1__SHIFT 16 + +#define NV30_3D_VB_ELEMENT_U32 0x00001810 + +#define NV30_3D_VB_VERTEX_BATCH 0x00001814 +#define NV30_3D_VB_VERTEX_BATCH_OFFSET__MASK 0x00ffffff +#define NV30_3D_VB_VERTEX_BATCH_OFFSET__SHIFT 0 +#define NV30_3D_VB_VERTEX_BATCH_COUNT__MASK 0xff000000 +#define NV30_3D_VB_VERTEX_BATCH_COUNT__SHIFT 24 + +#define NV30_3D_VERTEX_DATA 0x00001818 + +#define NV30_3D_IDXBUF_OFFSET 0x0000181c + +#define NV30_3D_IDXBUF_FORMAT 0x00001820 +#define NV30_3D_IDXBUF_FORMAT_DMA1 0x00000001 +#define NV30_3D_IDXBUF_FORMAT_TYPE__MASK 0x000000f0 +#define NV30_3D_IDXBUF_FORMAT_TYPE__SHIFT 4 +#define NV30_3D_IDXBUF_FORMAT_TYPE_U32 0x00000000 +#define NV30_3D_IDXBUF_FORMAT_TYPE_U16 0x00000010 + +#define NV30_3D_VB_INDEX_BATCH 0x00001824 +#define NV30_3D_VB_INDEX_BATCH_START__MASK 0x00ffffff +#define NV30_3D_VB_INDEX_BATCH_START__SHIFT 0 +#define NV30_3D_VB_INDEX_BATCH_COUNT__MASK 0xff000000 +#define NV30_3D_VB_INDEX_BATCH_COUNT__SHIFT 24 + +#define NV30_3D_POLYGON_MODE_FRONT 0x00001828 +#define NV30_3D_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV30_3D_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV30_3D_POLYGON_MODE_FRONT_FILL 0x00001b02 + +#define NV30_3D_POLYGON_MODE_BACK 0x0000182c +#define NV30_3D_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV30_3D_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV30_3D_POLYGON_MODE_BACK_FILL 0x00001b02 + +#define NV30_3D_CULL_FACE 0x00001830 +#define NV30_3D_CULL_FACE_FRONT 0x00000404 +#define NV30_3D_CULL_FACE_BACK 0x00000405 +#define NV30_3D_CULL_FACE_FRONT_AND_BACK 0x00000408 + +#define NV30_3D_FRONT_FACE 0x00001834 +#define NV30_3D_FRONT_FACE_CW 0x00000900 +#define NV30_3D_FRONT_FACE_CCW 0x00000901 + +#define NV30_3D_POLYGON_SMOOTH_ENABLE 0x00001838 + +#define NV30_3D_CULL_FACE_ENABLE 0x0000183c + +#define NV30_3D_TEX_PALETTE_OFFSET(i0) (0x00001840 + 0x4*(i0)) +#define NV30_3D_TEX_PALETTE_OFFSET__ESIZE 0x00000004 +#define NV30_3D_TEX_PALETTE_OFFSET__LEN 0x00000008 + +#define NV40_3D_TEX_SIZE1(i0) (0x00001840 + 0x4*(i0)) +#define NV40_3D_TEX_SIZE1__ESIZE 0x00000004 +#define NV40_3D_TEX_SIZE1__LEN 0x00000008 +#define NV40_3D_TEX_SIZE1_DEPTH__MASK 0xfff00000 +#define NV40_3D_TEX_SIZE1_DEPTH__SHIFT 20 +#define NV40_3D_TEX_SIZE1_PITCH__MASK 0x0000ffff +#define NV40_3D_TEX_SIZE1_PITCH__SHIFT 0 + +#define NV30_3D_VTX_ATTR_2F(i0) (0x00001880 + 0x8*(i0)) +#define NV30_3D_VTX_ATTR_2F__ESIZE 0x00000008 +#define NV30_3D_VTX_ATTR_2F__LEN 0x00000010 + + +#define NV30_3D_VTX_ATTR_2F_X(i0) (0x00001880 + 0x8*(i0)) + +#define NV30_3D_VTX_ATTR_2F_Y(i0) (0x00001884 + 0x8*(i0)) + +#define NV30_3D_VTX_ATTR_2I(i0) (0x00001900 + 0x4*(i0)) +#define NV30_3D_VTX_ATTR_2I__ESIZE 0x00000004 +#define NV30_3D_VTX_ATTR_2I__LEN 0x00000010 +#define NV30_3D_VTX_ATTR_2I_X__MASK 0x0000ffff +#define NV30_3D_VTX_ATTR_2I_X__SHIFT 0 +#define NV30_3D_VTX_ATTR_2I_Y__MASK 0xffff0000 +#define NV30_3D_VTX_ATTR_2I_Y__SHIFT 16 + +#define NV30_3D_VTX_ATTR_4UB(i0) (0x00001940 + 0x4*(i0)) +#define NV30_3D_VTX_ATTR_4UB__ESIZE 0x00000004 +#define NV30_3D_VTX_ATTR_4UB__LEN 0x00000010 +#define NV30_3D_VTX_ATTR_4UB_X__MASK 0x000000ff +#define NV30_3D_VTX_ATTR_4UB_X__SHIFT 0 +#define NV30_3D_VTX_ATTR_4UB_Y__MASK 0x0000ff00 +#define NV30_3D_VTX_ATTR_4UB_Y__SHIFT 8 +#define NV30_3D_VTX_ATTR_4UB_Z__MASK 0x00ff0000 +#define NV30_3D_VTX_ATTR_4UB_Z__SHIFT 16 +#define NV30_3D_VTX_ATTR_4UB_W__MASK 0xff000000 +#define NV30_3D_VTX_ATTR_4UB_W__SHIFT 24 + +#define NV30_3D_VTX_ATTR_4I(i0) (0x00001980 + 0x8*(i0)) +#define NV30_3D_VTX_ATTR_4I__ESIZE 0x00000008 +#define NV30_3D_VTX_ATTR_4I__LEN 0x00000010 + +#define NV30_3D_VTX_ATTR_4I_XY(i0) (0x00001980 + 0x8*(i0)) +#define NV30_3D_VTX_ATTR_4I_XY_X__MASK 0x0000ffff +#define NV30_3D_VTX_ATTR_4I_XY_X__SHIFT 0 +#define NV30_3D_VTX_ATTR_4I_XY_Y__MASK 0xffff0000 +#define NV30_3D_VTX_ATTR_4I_XY_Y__SHIFT 16 + +#define NV30_3D_VTX_ATTR_4I_ZW(i0) (0x00001984 + 0x8*(i0)) +#define NV30_3D_VTX_ATTR_4I_ZW_Z__MASK 0x0000ffff +#define NV30_3D_VTX_ATTR_4I_ZW_Z__SHIFT 0 +#define NV30_3D_VTX_ATTR_4I_ZW_W__MASK 0xffff0000 +#define NV30_3D_VTX_ATTR_4I_ZW_W__SHIFT 16 + +#define NV30_3D_TEX_OFFSET(i0) (0x00001a00 + 0x20*(i0)) +#define NV30_3D_TEX_OFFSET__ESIZE 0x00000020 +#define NV30_3D_TEX_OFFSET__LEN 0x00000008 + +#define NV30_3D_TEX_FORMAT(i0) (0x00001a04 + 0x20*(i0)) +#define NV30_3D_TEX_FORMAT__ESIZE 0x00000020 +#define NV30_3D_TEX_FORMAT__LEN 0x00000008 +#define NV30_3D_TEX_FORMAT_DMA0 0x00000001 +#define NV30_3D_TEX_FORMAT_DMA1 0x00000002 +#define NV30_3D_TEX_FORMAT_CUBIC 0x00000004 +#define NV30_3D_TEX_FORMAT_NO_BORDER 0x00000008 +#define NV30_3D_TEX_FORMAT_DIMS__MASK 0x000000f0 +#define NV30_3D_TEX_FORMAT_DIMS__SHIFT 4 +#define NV30_3D_TEX_FORMAT_DIMS_1D 0x00000010 +#define NV30_3D_TEX_FORMAT_DIMS_2D 0x00000020 +#define NV30_3D_TEX_FORMAT_DIMS_3D 0x00000030 +#define NV30_3D_TEX_FORMAT_FORMAT__MASK 0x0000ff00 +#define NV30_3D_TEX_FORMAT_FORMAT__SHIFT 8 +#define NV30_3D_TEX_FORMAT_FORMAT_L8 0x00000000 +#define NV30_3D_TEX_FORMAT_FORMAT_I8 0x00000100 +#define NV30_3D_TEX_FORMAT_FORMAT_A1R5G5B5 0x00000200 +#define NV30_3D_TEX_FORMAT_FORMAT_A4R4G4B4 0x00000400 +#define NV30_3D_TEX_FORMAT_FORMAT_R5G6B5 0x00000500 +#define NV30_3D_TEX_FORMAT_FORMAT_A8R8G8B8 0x00000600 +#define NV30_3D_TEX_FORMAT_FORMAT_X8R8G8B8 0x00000700 +#define NV30_3D_TEX_FORMAT_FORMAT_INDEX8 0x00000b00 +#define NV30_3D_TEX_FORMAT_FORMAT_DXT1 0x00000c00 +#define NV30_3D_TEX_FORMAT_FORMAT_DXT3 0x00000e00 +#define NV30_3D_TEX_FORMAT_FORMAT_DXT5 0x00000f00 +#define NV30_3D_TEX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00001000 +#define NV30_3D_TEX_FORMAT_FORMAT_R5G6B5_RECT 0x00001100 +#define NV30_3D_TEX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00001200 +#define NV30_3D_TEX_FORMAT_FORMAT_L8_RECT 0x00001300 +#define NV30_3D_TEX_FORMAT_FORMAT_DSDT8_RECT 0x00001700 +#define NV30_3D_TEX_FORMAT_FORMAT_A8L8 0x00001a00 +#define NV30_3D_TEX_FORMAT_FORMAT_I8_RECT 0x00001b00 +#define NV30_3D_TEX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00001d00 +#define NV30_3D_TEX_FORMAT_FORMAT_R8G8B8_RECT 0x00001e00 +#define NV30_3D_TEX_FORMAT_FORMAT_A8L8_RECT 0x00002000 +#define NV30_3D_TEX_FORMAT_FORMAT_Z24 0x00002a00 +#define NV30_3D_TEX_FORMAT_FORMAT_Z24_RECT 0x00002b00 +#define NV30_3D_TEX_FORMAT_FORMAT_Z16 0x00002c00 +#define NV30_3D_TEX_FORMAT_FORMAT_Z16_RECT 0x00002d00 +#define NV30_3D_TEX_FORMAT_FORMAT_DSDT8 0x00002800 +#define NV30_3D_TEX_FORMAT_FORMAT_HILO16 0x00003300 +#define NV30_3D_TEX_FORMAT_FORMAT_HILO16_RECT 0x00003600 +#define NV30_3D_TEX_FORMAT_FORMAT_HILO8 0x00004400 +#define NV30_3D_TEX_FORMAT_FORMAT_SIGNED_HILO8 0x00004500 +#define NV30_3D_TEX_FORMAT_FORMAT_HILO8_RECT 0x00004600 +#define NV30_3D_TEX_FORMAT_FORMAT_SIGNED_HILO8_RECT 0x00004700 +#define NV30_3D_TEX_FORMAT_FORMAT_A16 0x00003200 +#define NV30_3D_TEX_FORMAT_FORMAT_A16_RECT 0x00003500 +#define NV30_3D_TEX_FORMAT_FORMAT_UNK3F 0x00003f00 +#define NV30_3D_TEX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00004a00 +#define NV30_3D_TEX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00004b00 +#define NV30_3D_TEX_FORMAT_FORMAT_FLOAT_R32_NV 0x00004c00 +#define NV40_3D_TEX_FORMAT_FORMAT__MASK 0x00001f00 +#define NV40_3D_TEX_FORMAT_FORMAT__SHIFT 8 +#define NV40_3D_TEX_FORMAT_FORMAT_L8 0x00000100 +#define NV40_3D_TEX_FORMAT_FORMAT_A1R5G5B5 0x00000200 +#define NV40_3D_TEX_FORMAT_FORMAT_A4R4G4B4 0x00000300 +#define NV40_3D_TEX_FORMAT_FORMAT_R5G6B5 0x00000400 +#define NV40_3D_TEX_FORMAT_FORMAT_A8R8G8B8 0x00000500 +#define NV40_3D_TEX_FORMAT_FORMAT_DXT1 0x00000600 +#define NV40_3D_TEX_FORMAT_FORMAT_DXT3 0x00000700 +#define NV40_3D_TEX_FORMAT_FORMAT_DXT5 0x00000800 +#define NV40_3D_TEX_FORMAT_FORMAT_A8L8 0x00000b00 +#define NV40_3D_TEX_FORMAT_FORMAT_Z24 0x00001000 +#define NV40_3D_TEX_FORMAT_FORMAT_Z16 0x00001200 +#define NV40_3D_TEX_FORMAT_FORMAT_A16 0x00001400 +#define NV40_3D_TEX_FORMAT_FORMAT_A16L16 0x00001500 +#define NV40_3D_TEX_FORMAT_FORMAT_HILO8 0x00001800 +#define NV40_3D_TEX_FORMAT_FORMAT_RGBA16F 0x00001a00 +#define NV40_3D_TEX_FORMAT_FORMAT_RGBA32F 0x00001b00 +#define NV40_3D_TEX_FORMAT_LINEAR 0x00002000 +#define NV40_3D_TEX_FORMAT_RECT 0x00004000 +#define NV40_3D_TEX_FORMAT_MIPMAP_COUNT__MASK 0x000f0000 +#define NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT 16 +#define NV30_3D_TEX_FORMAT_MIPMAP 0x00080000 +#define NV30_3D_TEX_FORMAT_BASE_SIZE_U__MASK 0x00f00000 +#define NV30_3D_TEX_FORMAT_BASE_SIZE_U__SHIFT 20 +#define NV30_3D_TEX_FORMAT_BASE_SIZE_V__MASK 0x0f000000 +#define NV30_3D_TEX_FORMAT_BASE_SIZE_V__SHIFT 24 +#define NV30_3D_TEX_FORMAT_BASE_SIZE_W__MASK 0xf0000000 +#define NV30_3D_TEX_FORMAT_BASE_SIZE_W__SHIFT 28 + +#define NV30_3D_TEX_WRAP(i0) (0x00001a08 + 0x20*(i0)) +#define NV30_3D_TEX_WRAP__ESIZE 0x00000020 +#define NV30_3D_TEX_WRAP__LEN 0x00000008 +#define NV30_3D_TEX_WRAP_S__MASK 0x000000ff +#define NV30_3D_TEX_WRAP_S__SHIFT 0 +#define NV30_3D_TEX_WRAP_S_REPEAT 0x00000001 +#define NV30_3D_TEX_WRAP_S_MIRRORED_REPEAT 0x00000002 +#define NV30_3D_TEX_WRAP_S_CLAMP_TO_EDGE 0x00000003 +#define NV30_3D_TEX_WRAP_S_CLAMP_TO_BORDER 0x00000004 +#define NV30_3D_TEX_WRAP_S_CLAMP 0x00000005 +#define NV40_3D_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE 0x00000006 +#define NV40_3D_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER 0x00000007 +#define NV40_3D_TEX_WRAP_S_MIRROR_CLAMP 0x00000008 +#define NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION__MASK 0x00000070 +#define NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION__SHIFT 4 +#define NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION_OFF 0x00000000 +#define NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION_QUALITY 0x00000020 +#define NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION_PERFORMANCE 0x00000030 +#define NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION_HIGH_PERFORMANCE 0x00000070 +#define NV30_3D_TEX_WRAP_T__MASK 0x00000f00 +#define NV30_3D_TEX_WRAP_T__SHIFT 8 +#define NV30_3D_TEX_WRAP_T_REPEAT 0x00000100 +#define NV30_3D_TEX_WRAP_T_MIRRORED_REPEAT 0x00000200 +#define NV30_3D_TEX_WRAP_T_CLAMP_TO_EDGE 0x00000300 +#define NV30_3D_TEX_WRAP_T_CLAMP_TO_BORDER 0x00000400 +#define NV30_3D_TEX_WRAP_T_CLAMP 0x00000500 +#define NV40_3D_TEX_WRAP_T_MIRROR_CLAMP_TO_EDGE 0x00000600 +#define NV40_3D_TEX_WRAP_T_MIRROR_CLAMP_TO_BORDER 0x00000700 +#define NV40_3D_TEX_WRAP_T_MIRROR_CLAMP 0x00000800 +#define NV30_3D_TEX_WRAP_EXPAND_NORMAL 0x0000f000 +#define NV30_3D_TEX_WRAP_R__MASK 0x000f0000 +#define NV30_3D_TEX_WRAP_R__SHIFT 16 +#define NV30_3D_TEX_WRAP_R_REPEAT 0x00010000 +#define NV30_3D_TEX_WRAP_R_MIRRORED_REPEAT 0x00020000 +#define NV30_3D_TEX_WRAP_R_CLAMP_TO_EDGE 0x00030000 +#define NV30_3D_TEX_WRAP_R_CLAMP_TO_BORDER 0x00040000 +#define NV30_3D_TEX_WRAP_R_CLAMP 0x00050000 +#define NV40_3D_TEX_WRAP_R_MIRROR_CLAMP_TO_EDGE 0x00060000 +#define NV40_3D_TEX_WRAP_R_MIRROR_CLAMP_TO_BORDER 0x00070000 +#define NV40_3D_TEX_WRAP_R_MIRROR_CLAMP 0x00080000 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER__MASK 0x00f00000 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER__SHIFT 20 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER_NONE 0x00000000 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER_RED 0x00100000 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER_GREEN 0x00200000 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER_BLUE 0x00400000 +#define NV40_3D_TEX_WRAP_GAMMA_DECREASE_FILTER_ALL 0x00f00000 +#define NV30_3D_TEX_WRAP_RCOMP__MASK 0xf0000000 +#define NV30_3D_TEX_WRAP_RCOMP__SHIFT 28 +#define NV30_3D_TEX_WRAP_RCOMP_NEVER 0x00000000 +#define NV30_3D_TEX_WRAP_RCOMP_GREATER 0x10000000 +#define NV30_3D_TEX_WRAP_RCOMP_EQUAL 0x20000000 +#define NV30_3D_TEX_WRAP_RCOMP_GEQUAL 0x30000000 +#define NV30_3D_TEX_WRAP_RCOMP_LESS 0x40000000 +#define NV30_3D_TEX_WRAP_RCOMP_NOTEQUAL 0x50000000 +#define NV30_3D_TEX_WRAP_RCOMP_LEQUAL 0x60000000 +#define NV30_3D_TEX_WRAP_RCOMP_ALWAYS 0x70000000 + +#define NV30_3D_TEX_ENABLE(i0) (0x00001a0c + 0x20*(i0)) +#define NV30_3D_TEX_ENABLE__ESIZE 0x00000020 +#define NV30_3D_TEX_ENABLE__LEN 0x00000008 +#define NV30_3D_TEX_ENABLE_ANISO__MASK 0x00000030 +#define NV30_3D_TEX_ENABLE_ANISO__SHIFT 4 +#define NV30_3D_TEX_ENABLE_ANISO_NONE 0x00000000 +#define NV30_3D_TEX_ENABLE_ANISO_2X 0x00000010 +#define NV30_3D_TEX_ENABLE_ANISO_4X 0x00000020 +#define NV30_3D_TEX_ENABLE_ANISO_8X 0x00000030 +#define NV40_3D_TEX_ENABLE_ANISO__MASK 0x000000f0 +#define NV40_3D_TEX_ENABLE_ANISO__SHIFT 4 +#define NV40_3D_TEX_ENABLE_ANISO_NONE 0x00000000 +#define NV40_3D_TEX_ENABLE_ANISO_2X 0x00000010 +#define NV40_3D_TEX_ENABLE_ANISO_4X 0x00000020 +#define NV40_3D_TEX_ENABLE_ANISO_6X 0x00000030 +#define NV40_3D_TEX_ENABLE_ANISO_8X 0x00000040 +#define NV40_3D_TEX_ENABLE_ANISO_10X 0x00000050 +#define NV40_3D_TEX_ENABLE_ANISO_12X 0x00000060 +#define NV40_3D_TEX_ENABLE_ANISO_16X 0x00000070 +#define NV30_3D_TEX_ENABLE_MIPMAP_MAX_LOD__MASK 0x0003c000 +#define NV30_3D_TEX_ENABLE_MIPMAP_MAX_LOD__SHIFT 14 +#define NV40_3D_TEX_ENABLE_MIPMAP_MAX_LOD__MASK 0x00038000 +#define NV40_3D_TEX_ENABLE_MIPMAP_MAX_LOD__SHIFT 15 +#define NV30_3D_TEX_ENABLE_MIPMAP_MIN_LOD__MASK 0x3c000000 +#define NV30_3D_TEX_ENABLE_MIPMAP_MIN_LOD__SHIFT 26 +#define NV40_3D_TEX_ENABLE_MIPMAP_MIN_LOD__MASK 0x38000000 +#define NV40_3D_TEX_ENABLE_MIPMAP_MIN_LOD__SHIFT 27 +#define NV30_3D_TEX_ENABLE_ENABLE 0x40000000 +#define NV40_3D_TEX_ENABLE_ENABLE 0x80000000 + +#define NV30_3D_TEX_SWIZZLE(i0) (0x00001a10 + 0x20*(i0)) +#define NV30_3D_TEX_SWIZZLE__ESIZE 0x00000020 +#define NV30_3D_TEX_SWIZZLE__LEN 0x00000008 +#define NV30_3D_TEX_SWIZZLE_S1_W__MASK 0x00000003 +#define NV30_3D_TEX_SWIZZLE_S1_W__SHIFT 0 +#define NV30_3D_TEX_SWIZZLE_S1_W_W 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S1_W_Z 0x00000001 +#define NV30_3D_TEX_SWIZZLE_S1_W_Y 0x00000002 +#define NV30_3D_TEX_SWIZZLE_S1_W_X 0x00000003 +#define NV30_3D_TEX_SWIZZLE_S1_Z__MASK 0x0000000c +#define NV30_3D_TEX_SWIZZLE_S1_Z__SHIFT 2 +#define NV30_3D_TEX_SWIZZLE_S1_Z_W 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S1_Z_Z 0x00000004 +#define NV30_3D_TEX_SWIZZLE_S1_Z_Y 0x00000008 +#define NV30_3D_TEX_SWIZZLE_S1_Z_X 0x0000000c +#define NV30_3D_TEX_SWIZZLE_S1_Y__MASK 0x00000030 +#define NV30_3D_TEX_SWIZZLE_S1_Y__SHIFT 4 +#define NV30_3D_TEX_SWIZZLE_S1_Y_W 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S1_Y_Z 0x00000010 +#define NV30_3D_TEX_SWIZZLE_S1_Y_Y 0x00000020 +#define NV30_3D_TEX_SWIZZLE_S1_Y_X 0x00000030 +#define NV30_3D_TEX_SWIZZLE_S1_X__MASK 0x000000c0 +#define NV30_3D_TEX_SWIZZLE_S1_X__SHIFT 6 +#define NV30_3D_TEX_SWIZZLE_S1_X_W 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S1_X_Z 0x00000040 +#define NV30_3D_TEX_SWIZZLE_S1_X_Y 0x00000080 +#define NV30_3D_TEX_SWIZZLE_S1_X_X 0x000000c0 +#define NV30_3D_TEX_SWIZZLE_S0_W__MASK 0x00000300 +#define NV30_3D_TEX_SWIZZLE_S0_W__SHIFT 8 +#define NV30_3D_TEX_SWIZZLE_S0_W_ZERO 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S0_W_ONE 0x00000100 +#define NV30_3D_TEX_SWIZZLE_S0_W_S1 0x00000200 +#define NV30_3D_TEX_SWIZZLE_S0_Z__MASK 0x00000c00 +#define NV30_3D_TEX_SWIZZLE_S0_Z__SHIFT 10 +#define NV30_3D_TEX_SWIZZLE_S0_Z_ZERO 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S0_Z_ONE 0x00000400 +#define NV30_3D_TEX_SWIZZLE_S0_Z_S1 0x00000800 +#define NV30_3D_TEX_SWIZZLE_S0_Y__MASK 0x00003000 +#define NV30_3D_TEX_SWIZZLE_S0_Y__SHIFT 12 +#define NV30_3D_TEX_SWIZZLE_S0_Y_ZERO 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S0_Y_ONE 0x00001000 +#define NV30_3D_TEX_SWIZZLE_S0_Y_S1 0x00002000 +#define NV30_3D_TEX_SWIZZLE_S0_X__MASK 0x0000c000 +#define NV30_3D_TEX_SWIZZLE_S0_X__SHIFT 14 +#define NV30_3D_TEX_SWIZZLE_S0_X_ZERO 0x00000000 +#define NV30_3D_TEX_SWIZZLE_S0_X_ONE 0x00004000 +#define NV30_3D_TEX_SWIZZLE_S0_X_S1 0x00008000 +#define NV30_3D_TEX_SWIZZLE_RECT_PITCH__MASK 0xffff0000 +#define NV30_3D_TEX_SWIZZLE_RECT_PITCH__SHIFT 16 + +#define NV30_3D_TEX_FILTER(i0) (0x00001a14 + 0x20*(i0)) +#define NV30_3D_TEX_FILTER__ESIZE 0x00000020 +#define NV30_3D_TEX_FILTER__LEN 0x00000008 +#define NV30_3D_TEX_FILTER_LOD_BIAS__MASK 0x00000f00 +#define NV30_3D_TEX_FILTER_LOD_BIAS__SHIFT 8 +#define NV30_3D_TEX_FILTER_MIN__MASK 0x000f0000 +#define NV30_3D_TEX_FILTER_MIN__SHIFT 16 +#define NV30_3D_TEX_FILTER_MIN_NEAREST 0x00010000 +#define NV30_3D_TEX_FILTER_MIN_LINEAR 0x00020000 +#define NV30_3D_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST 0x00030000 +#define NV30_3D_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST 0x00040000 +#define NV30_3D_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR 0x00050000 +#define NV30_3D_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR 0x00060000 +#define NV30_3D_TEX_FILTER_MAG__MASK 0x0f000000 +#define NV30_3D_TEX_FILTER_MAG__SHIFT 24 +#define NV30_3D_TEX_FILTER_MAG_NEAREST 0x01000000 +#define NV30_3D_TEX_FILTER_MAG_LINEAR 0x02000000 +#define NV30_3D_TEX_FILTER_SIGNED_BLUE 0x10000000 +#define NV30_3D_TEX_FILTER_SIGNED_GREEN 0x20000000 +#define NV30_3D_TEX_FILTER_SIGNED_RED 0x40000000 +#define NV30_3D_TEX_FILTER_SIGNED_ALPHA 0x80000000 + +#define NV30_3D_TEX_NPOT_SIZE(i0) (0x00001a18 + 0x20*(i0)) +#define NV30_3D_TEX_NPOT_SIZE__ESIZE 0x00000020 +#define NV30_3D_TEX_NPOT_SIZE__LEN 0x00000008 +#define NV30_3D_TEX_NPOT_SIZE_H__MASK 0x0000ffff +#define NV30_3D_TEX_NPOT_SIZE_H__SHIFT 0 +#define NV30_3D_TEX_NPOT_SIZE_W__MASK 0xffff0000 +#define NV30_3D_TEX_NPOT_SIZE_W__SHIFT 16 + +#define NV30_3D_TEX_BORDER_COLOR(i0) (0x00001a1c + 0x20*(i0)) +#define NV30_3D_TEX_BORDER_COLOR__ESIZE 0x00000020 +#define NV30_3D_TEX_BORDER_COLOR__LEN 0x00000008 +#define NV30_3D_TEX_BORDER_COLOR_B__MASK 0x000000ff +#define NV30_3D_TEX_BORDER_COLOR_B__SHIFT 0 +#define NV30_3D_TEX_BORDER_COLOR_G__MASK 0x0000ff00 +#define NV30_3D_TEX_BORDER_COLOR_G__SHIFT 8 +#define NV30_3D_TEX_BORDER_COLOR_R__MASK 0x00ff0000 +#define NV30_3D_TEX_BORDER_COLOR_R__SHIFT 16 +#define NV30_3D_TEX_BORDER_COLOR_A__MASK 0xff000000 +#define NV30_3D_TEX_BORDER_COLOR_A__SHIFT 24 + +#define NV30_3D_VTX_ATTR_4F(i0) (0x00001c00 + 0x10*(i0)) +#define NV30_3D_VTX_ATTR_4F__ESIZE 0x00000010 +#define NV30_3D_VTX_ATTR_4F__LEN 0x00000010 + + +#define NV30_3D_VTX_ATTR_4F_X(i0) (0x00001c00 + 0x10*(i0)) + +#define NV30_3D_VTX_ATTR_4F_Y(i0) (0x00001c04 + 0x10*(i0)) + +#define NV30_3D_VTX_ATTR_4F_Z(i0) (0x00001c08 + 0x10*(i0)) + +#define NV30_3D_VTX_ATTR_4F_W(i0) (0x00001c0c + 0x10*(i0)) + +#define NV30_3D_FP_CONTROL 0x00001d60 +#define NV30_3D_FP_CONTROL_USED_REGS_MINUS1_DIV2__MASK 0x0000000f +#define NV30_3D_FP_CONTROL_USED_REGS_MINUS1_DIV2__SHIFT 0 +#define NV30_3D_FP_CONTROL_USES_KIL 0x00000080 +#define NV40_3D_FP_CONTROL_KIL 0x00000080 +#define NV40_3D_FP_CONTROL_TEMP_COUNT__MASK 0xff000000 +#define NV40_3D_FP_CONTROL_TEMP_COUNT__SHIFT 24 + +#define NV30_3D_DEPTH_CONTROL 0x00001d78 +#define NV30_3D_DEPTH_CONTROL_CLAMP 0x000000f0 + +#define NV30_3D_MULTISAMPLE_CONTROL 0x00001d7c +#define NV30_3D_MULTISAMPLE_CONTROL_ENABLE 0x00000001 +#define NV30_3D_MULTISAMPLE_CONTROL_SAMPLE_ALPHA_TO_COVERAGE 0x00000010 +#define NV30_3D_MULTISAMPLE_CONTROL_SAMPLE_ALPHA_TO_ONE 0x00000100 +#define NV30_3D_MULTISAMPLE_CONTROL_SAMPLE_COVERAGE__MASK 0xffff0000 +#define NV30_3D_MULTISAMPLE_CONTROL_SAMPLE_COVERAGE__SHIFT 16 + +#define NV30_3D_COORD_CONVENTIONS 0x00001d88 +#define NV30_3D_COORD_CONVENTIONS_HEIGHT__MASK 0x00000fff +#define NV30_3D_COORD_CONVENTIONS_HEIGHT__SHIFT 0 +#define NV30_3D_COORD_CONVENTIONS_ORIGIN__MASK 0x00001000 +#define NV30_3D_COORD_CONVENTIONS_ORIGIN__SHIFT 12 +#define NV30_3D_COORD_CONVENTIONS_ORIGIN_NORMAL 0x00000000 +#define NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED 0x00001000 +#define NV30_3D_COORD_CONVENTIONS_CENTER__MASK 0x00010000 +#define NV30_3D_COORD_CONVENTIONS_CENTER__SHIFT 16 +#define NV30_3D_COORD_CONVENTIONS_CENTER_HALF_INTEGER 0x00000000 +#define NV30_3D_COORD_CONVENTIONS_CENTER_INTEGER 0x00010000 + +#define NV30_3D_CLEAR_DEPTH_VALUE 0x00001d8c + +#define NV30_3D_CLEAR_COLOR_VALUE 0x00001d90 +#define NV30_3D_CLEAR_COLOR_VALUE_B__MASK 0x000000ff +#define NV30_3D_CLEAR_COLOR_VALUE_B__SHIFT 0 +#define NV30_3D_CLEAR_COLOR_VALUE_G__MASK 0x0000ff00 +#define NV30_3D_CLEAR_COLOR_VALUE_G__SHIFT 8 +#define NV30_3D_CLEAR_COLOR_VALUE_R__MASK 0x00ff0000 +#define NV30_3D_CLEAR_COLOR_VALUE_R__SHIFT 16 +#define NV30_3D_CLEAR_COLOR_VALUE_A__MASK 0xff000000 +#define NV30_3D_CLEAR_COLOR_VALUE_A__SHIFT 24 + +#define NV30_3D_CLEAR_BUFFERS 0x00001d94 +#define NV30_3D_CLEAR_BUFFERS_DEPTH 0x00000001 +#define NV30_3D_CLEAR_BUFFERS_STENCIL 0x00000002 +#define NV30_3D_CLEAR_BUFFERS_COLOR_R 0x00000010 +#define NV30_3D_CLEAR_BUFFERS_COLOR_G 0x00000020 +#define NV30_3D_CLEAR_BUFFERS_COLOR_B 0x00000040 +#define NV30_3D_CLEAR_BUFFERS_COLOR_A 0x00000080 + +#define NV30_3D_DO_VERTICES 0x00001dac + +#define NV30_3D_LINE_STIPPLE_ENABLE 0x00001db4 + +#define NV30_3D_LINE_STIPPLE_PATTERN 0x00001db8 +#define NV30_3D_LINE_STIPPLE_PATTERN_FACTOR__MASK 0x0000ffff +#define NV30_3D_LINE_STIPPLE_PATTERN_FACTOR__SHIFT 0 +#define NV30_3D_LINE_STIPPLE_PATTERN_PATTERN__MASK 0xffff0000 +#define NV30_3D_LINE_STIPPLE_PATTERN_PATTERN__SHIFT 16 + +#define NV30_3D_BACK_MATERIAL_SHININESS(i0) (0x00001e20 + 0x4*(i0)) +#define NV30_3D_BACK_MATERIAL_SHININESS__ESIZE 0x00000004 +#define NV30_3D_BACK_MATERIAL_SHININESS__LEN 0x00000006 + +#define NV30_3D_VTX_ATTR_1F(i0) (0x00001e40 + 0x4*(i0)) +#define NV30_3D_VTX_ATTR_1F__ESIZE 0x00000004 +#define NV30_3D_VTX_ATTR_1F__LEN 0x00000010 + +#define NV30_3D_ENGINE 0x00001e94 +#define NV30_3D_ENGINE_FP 0x00000001 +#define NV30_3D_ENGINE_VP 0x00000002 +#define NV30_3D_ENGINE_FIXED 0x00000004 + +#define NV30_3D_VP_UPLOAD_FROM_ID 0x00001e9c + +#define NV30_3D_VP_START_FROM_ID 0x00001ea0 + +#define NV30_3D_POINT_PARAMETERS(i0) (0x00001ec0 + 0x4*(i0)) +#define NV30_3D_POINT_PARAMETERS__ESIZE 0x00000004 +#define NV30_3D_POINT_PARAMETERS__LEN 0x00000008 + +#define NV30_3D_POINT_SIZE 0x00001ee0 + +#define NV30_3D_POINT_PARAMETERS_ENABLE 0x00001ee4 + +#define NV30_3D_POINT_SPRITE 0x00001ee8 +#define NV30_3D_POINT_SPRITE_ENABLE 0x00000001 +#define NV30_3D_POINT_SPRITE_R_MODE__MASK 0x00000006 +#define NV30_3D_POINT_SPRITE_R_MODE__SHIFT 1 +#define NV30_3D_POINT_SPRITE_R_MODE_ZERO 0x00000000 +#define NV30_3D_POINT_SPRITE_R_MODE_R 0x00000002 +#define NV30_3D_POINT_SPRITE_R_MODE_S 0x00000004 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_0 0x00000100 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_1 0x00000200 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_2 0x00000400 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_3 0x00000800 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_4 0x00001000 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_5 0x00002000 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_6 0x00004000 +#define NV30_3D_POINT_SPRITE_COORD_REPLACE_7 0x00008000 + +#define NV30_3D_VP_UPLOAD_CONST_ID 0x00001efc + +#define NV30_3D_VP_UPLOAD_CONST(i0) (0x00001f00 + 0x10*(i0)) +#define NV30_3D_VP_UPLOAD_CONST__ESIZE 0x00000010 +#define NV30_3D_VP_UPLOAD_CONST__LEN 0x00000004 + + +#define NV30_3D_VP_UPLOAD_CONST_X(i0) (0x00001f00 + 0x10*(i0)) + +#define NV30_3D_VP_UPLOAD_CONST_Y(i0) (0x00001f04 + 0x10*(i0)) + +#define NV30_3D_VP_UPLOAD_CONST_Z(i0) (0x00001f08 + 0x10*(i0)) + +#define NV30_3D_VP_UPLOAD_CONST_W(i0) (0x00001f0c + 0x10*(i0)) + +#define NV30_3D_UNK1F80(i0) (0x00001f80 + 0x4*(i0)) +#define NV30_3D_UNK1F80__ESIZE 0x00000004 +#define NV30_3D_UNK1F80__LEN 0x00000010 + +#define NV40_3D_TEX_CACHE_CTL 0x00001fd8 + +#define NV40_3D_VP_ATTRIB_EN 0x00001ff0 + +#define NV40_3D_VP_RESULT_EN 0x00001ff4 + + +#endif /* NV30_40_3D_XML */ diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c index dec073ac900..d6ede5b40a1 100644 --- a/src/gallium/drivers/nvfx/nv30_fragtex.c +++ b/src/gallium/drivers/nvfx/nv30_fragtex.c @@ -1,7 +1,6 @@ #include "util/u_format.h" #include "nvfx_context.h" -#include "nouveau/nouveau_util.h" #include "nvfx_tex.h" #include "nvfx_resource.h" @@ -10,138 +9,109 @@ nv30_sampler_state_init(struct pipe_context *pipe, struct nvfx_sampler_state *ps, const struct pipe_sampler_state *cso) { - if (cso->max_anisotropy >= 8) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 4) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; - } else - if (cso->max_anisotropy >= 2) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; - } + float limit; + if (cso->max_anisotropy >= 2) { - float limit; + if (cso->max_anisotropy >= 8) + ps->en |= NV30_3D_TEX_ENABLE_ANISO_8X; + else if (cso->max_anisotropy >= 4) + ps->en |= NV30_3D_TEX_ENABLE_ANISO_4X; + else if (cso->max_anisotropy >= 2) + ps->en |= NV30_3D_TEX_ENABLE_ANISO_2X; + } - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + limit = CLAMP(cso->lod_bias, -16.0, 15.0 + (255.0 / 256.0)); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; - limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; + ps->max_lod = (int)CLAMP(cso->max_lod, 0.0, 15.0); + ps->min_lod = (int)CLAMP(cso->min_lod, 0.0, 15.0); - limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; - } + ps->en |= NV30_3D_TEX_ENABLE_ENABLE; } -#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ -{ \ - TRUE, \ - PIPE_FORMAT_##m, \ - NV34TCL_TX_FORMAT_FORMAT_##tf, \ - (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \ - NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \ - NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \ - NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w) \ -} - -struct nv30_texture_format { - boolean defined; - uint pipe; - int format; - int swizzle; -}; - -static struct nv30_texture_format -nv30_texture_formats[] = { - _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W), - _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), - _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), - _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), - _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), - _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X), - _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), - _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), - _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), - _(Z16_UNORM , R5G6B5 , S1, S1, S1, ONE, X, X, X, X), - _(S8_USCALED_Z24_UNORM , A8R8G8B8, S1, S1, S1, ONE, X, X, X, X), - _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), - _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), - _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W), - _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W), - {}, -}; - -static struct nv30_texture_format * -nv30_fragtex_format(uint pipe_format) +void +nv30_sampler_view_init(struct pipe_context *pipe, + struct nvfx_sampler_view *sv) { - struct nv30_texture_format *tf = nv30_texture_formats; - - while (tf->defined) { - if (tf->pipe == pipe_format) - return tf; - tf++; - } - - NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format)); - return NULL; + struct pipe_resource* pt = sv->base.texture; + struct nvfx_texture_format *tf = &nvfx_texture_formats[sv->base.format]; + unsigned txf; + unsigned level = pt->target == PIPE_TEXTURE_CUBE ? 0 : sv->base.first_level; + + assert(tf->fmt[0] >= 0); + + txf = sv->u.init_fmt; + txf |= (level != sv->base.last_level ? NV30_3D_TEX_FORMAT_MIPMAP : 0); + txf |= util_logbase2(u_minify(pt->width0, level)) << NV30_3D_TEX_FORMAT_BASE_SIZE_U__SHIFT; + txf |= util_logbase2(u_minify(pt->height0, level)) << NV30_3D_TEX_FORMAT_BASE_SIZE_V__SHIFT; + txf |= util_logbase2(u_minify(pt->depth0, level)) << NV30_3D_TEX_FORMAT_BASE_SIZE_W__SHIFT; + txf |= 0x10000; + + sv->u.nv30.fmt[0] = tf->fmt[0] | txf; + sv->u.nv30.fmt[1] = tf->fmt[1] | txf; + sv->u.nv30.fmt[2] = tf->fmt[2] | txf; + sv->u.nv30.fmt[3] = tf->fmt[3] | txf; + + sv->swizzle |= (nvfx_subresource_pitch(pt, 0) << NV30_3D_TEX_SWIZZLE_RECT_PITCH__SHIFT); + + if(pt->height0 <= 1 || util_format_is_compressed(sv->base.format)) + sv->u.nv30.rect = -1; + else + sv->u.nv30.rect = !!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR); + + sv->lod_offset = sv->base.first_level - level; + sv->max_lod_limit = sv->base.last_level - level; } - void nv30_fragtex_set(struct nvfx_context *nvfx, int unit) { struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; - struct nvfx_miptree *nv30mt = (struct nvfx_miptree *)nvfx->fragment_sampler_views[unit]->texture; - struct pipe_resource *pt = &nv30mt->base.base; - struct nouveau_bo *bo = nv30mt->base.bo; - struct nv30_texture_format *tf; + struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit]; + struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo; struct nouveau_channel* chan = nvfx->screen->base.channel; - uint32_t txf, txs; + unsigned txf; unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + unsigned use_rect; + unsigned max_lod = MIN2(ps->max_lod + sv->lod_offset, sv->max_lod_limit); + unsigned min_lod = MIN2(ps->min_lod + sv->lod_offset, max_lod) ; - tf = nv30_fragtex_format(pt->format); - if (!tf) - return; - - txf = tf->format; - txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); - txf |= log2i(pt->width0) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT; - txf |= log2i(pt->height0) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT; - txf |= log2i(pt->depth0) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT; - txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000; - - switch (pt->target) { - case PIPE_TEXTURE_CUBE: - txf |= NV34TCL_TX_FORMAT_CUBIC; - /* fall-through */ - case PIPE_TEXTURE_2D: - txf |= NV34TCL_TX_FORMAT_DIMS_2D; - break; - case PIPE_TEXTURE_3D: - txf |= NV34TCL_TX_FORMAT_DIMS_3D; - break; - case PIPE_TEXTURE_1D: - txf |= NV34TCL_TX_FORMAT_DIMS_1D; - break; - default: - NOUVEAU_ERR("Unknown target %d\n", pt->target); - return; + if(sv->u.nv30.rect < 0) + { + /* in the case of compressed or 1D textures, we can get away with this, + * since the layout is the same + */ + use_rect = ps->fmt; + } + else + { + static boolean warned = FALSE; + if( !!ps->fmt != sv->u.nv30.rect && !warned) { + warned = TRUE; + fprintf(stderr, + "Unimplemented: coordinate normalization mismatch. Possible reasons:\n" + "1. ARB_texture_non_power_of_two is being used despite the fact it isn't supported\n" + "2. The state tracker is not using the appropriate coordinate normalization\n" + "3. The state tracker is not supported\n"); + } + + use_rect = sv->u.nv30.rect; } - txs = tf->swizzle; + txf = sv->u.nv30.fmt[ps->compare + (use_rect ? 2 : 0)]; MARK_RING(chan, 9, 2); - OUT_RING(chan, RING_3D(NV34TCL_TX_OFFSET(unit), 8)); - OUT_RELOC(chan, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); - OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR, - NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); - OUT_RING(chan, ps->wrap); - OUT_RING(chan, NV34TCL_TX_ENABLE_ENABLE | ps->en); - OUT_RING(chan, txs); - OUT_RING(chan, ps->filt | 0x2000 /*voodoo*/); - OUT_RING(chan, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | - pt->height0); + OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8)); + OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, bo, txf, + tex_flags | NOUVEAU_BO_OR, + NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1); + OUT_RING(chan, (ps->wrap & sv->wrap_mask) | sv->wrap); + OUT_RING(chan, ps->en | (min_lod << NV30_3D_TEX_ENABLE_MIPMAP_MIN_LOD__SHIFT) | (max_lod << NV30_3D_TEX_ENABLE_MIPMAP_MAX_LOD__SHIFT)); + OUT_RING(chan, sv->swizzle); + OUT_RING(chan, ps->filt | sv->filt); + OUT_RING(chan, sv->npot_size); OUT_RING(chan, ps->bcol); nvfx->hw_txf[unit] = txf; diff --git a/src/gallium/drivers/nvfx/nv30_vertprog.h b/src/gallium/drivers/nvfx/nv30_vertprog.h index ec0444c07f8..e8c16b0341a 100644 --- a/src/gallium/drivers/nvfx/nv30_vertprog.h +++ b/src/gallium/drivers/nvfx/nv30_vertprog.h @@ -60,6 +60,9 @@ /* DWORD 0 */ +/* guess that this is the same as nv40 */ +#define NV30_VP_INST_INDEX_INPUT (1 << 27) + #define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) #define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ #define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ @@ -68,7 +71,7 @@ #define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16 #define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16) #define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15) -#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16) +#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 16) #define NV30_VP_INST_COND_TEST_ENABLE (1<<14) #define NV30_VP_INST_COND_SHIFT 11 #define NV30_VP_INST_COND_MASK (0x07 << 11) @@ -111,7 +114,7 @@ #define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/ #define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/ #define NV30_VP_INST_IADDR_SHIFT 2 -#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */ +#define NV30_VP_INST_IADDR_MASK (0x1FF << 2) /* NV30_VP_SRC2_LOW_MASK << 28 */ /* DWORD 3 */ #define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/ @@ -125,7 +128,7 @@ #define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ #define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ #define NV30_VP_INST_DEST_SHIFT 2 -#define NV30_VP_INST_DEST_MASK (0x0F << 2) +#define NV30_VP_INST_DEST_MASK (0x1F << 2) # define NV30_VP_INST_DEST_POS 0 # define NV30_VP_INST_DEST_BFC0 1 # define NV30_VP_INST_DEST_BFC1 2 @@ -133,7 +136,11 @@ # define NV30_VP_INST_DEST_COL1 4 # define NV30_VP_INST_DEST_FOGC 5 # define NV30_VP_INST_DEST_PSZ 6 -# define NV30_VP_INST_DEST_TC(n) (8+n) +# define NV30_VP_INST_DEST_TC(n) (8+(n)) +# define NV30_VP_INST_DEST_CLP(n) (17 + (n)) + +/* guess that this is the same as nv40 */ +#define NV30_VP_INST_INDEX_CONST (1 << 1) /* Useful to split the source selection regs into their pieces */ #define NV30_VP_SRC0_HIGH_SHIFT 6 diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c index 0068b1ba54a..d4fb73702da 100644 --- a/src/gallium/drivers/nvfx/nv40_fragtex.c +++ b/src/gallium/drivers/nvfx/nv40_fragtex.c @@ -8,168 +8,97 @@ nv40_sampler_state_init(struct pipe_context *pipe, struct nvfx_sampler_state *ps, const struct pipe_sampler_state *cso) { + float limit; if (cso->max_anisotropy >= 2) { /* no idea, binary driver sets it, works without it.. meh.. */ ps->wrap |= (1 << 5); - if (cso->max_anisotropy >= 16) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; - } else - if (cso->max_anisotropy >= 12) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; - } else - if (cso->max_anisotropy >= 10) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; - } else - if (cso->max_anisotropy >= 8) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 6) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; - } else - if (cso->max_anisotropy >= 4) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; - } else { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; - } + if (cso->max_anisotropy >= 16) + ps->en |= NV40_3D_TEX_ENABLE_ANISO_16X; + else if (cso->max_anisotropy >= 12) + ps->en |= NV40_3D_TEX_ENABLE_ANISO_12X; + else if (cso->max_anisotropy >= 10) + ps->en |= NV40_3D_TEX_ENABLE_ANISO_10X; + else if (cso->max_anisotropy >= 8) + ps->en |= NV40_3D_TEX_ENABLE_ANISO_8X; + else if (cso->max_anisotropy >= 6) + ps->en |= NV40_3D_TEX_ENABLE_ANISO_6X; + else if (cso->max_anisotropy >= 4) + ps->en |= NV40_3D_TEX_ENABLE_ANISO_4X; + else + ps->en |= NV40_3D_TEX_ENABLE_ANISO_2X; } - { - float limit; + limit = CLAMP(cso->lod_bias, -16.0, 15.0 + (255.0 / 256.0)); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + ps->max_lod = (int)(CLAMP(cso->max_lod, 0.0, 15.0 + (255.0 / 256.0)) * 256.0); + ps->min_lod = (int)(CLAMP(cso->min_lod, 0.0, 15.0 + (255.0 / 256.0)) * 256.0); - limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 7; - - limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 19; - } + ps->en |= NV40_3D_TEX_ENABLE_ENABLE; } -#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \ -{ \ - TRUE, \ - PIPE_FORMAT_##m, \ - NV40TCL_TEX_FORMAT_FORMAT_##tf, \ - (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \ - NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \ - NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \ - NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w), \ - ((NV34TCL_TX_FILTER_SIGNED_RED*sx) | (NV34TCL_TX_FILTER_SIGNED_GREEN*sy) | \ - (NV34TCL_TX_FILTER_SIGNED_BLUE*sz) | (NV34TCL_TX_FILTER_SIGNED_ALPHA*sw)) \ -} - -struct nv40_texture_format { - boolean defined; - uint pipe; - int format; - int swizzle; - int sign; -}; - -static struct nv40_texture_format -nv40_texture_formats[] = { - _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), - _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), - _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), - _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0), - _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1), - _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0), - _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0), - _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), - _(S8_USCALED_Z24_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), - _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), - _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - {}, -}; - -static struct nv40_texture_format * -nv40_fragtex_format(uint pipe_format) +void +nv40_sampler_view_init(struct pipe_context *pipe, + struct nvfx_sampler_view *sv) { - struct nv40_texture_format *tf = nv40_texture_formats; - - while (tf->defined) { - if (tf->pipe == pipe_format) - return tf; - tf++; + struct pipe_resource* pt = sv->base.texture; + struct nvfx_miptree* mt = (struct nvfx_miptree*)pt; + struct nvfx_texture_format *tf = &nvfx_texture_formats[sv->base.format]; + unsigned txf; + unsigned level = pt->target == PIPE_TEXTURE_CUBE ? 0 : sv->base.first_level; + assert(tf->fmt[4] >= 0); + + txf = sv->u.init_fmt; + txf |= 0x8000; + if(pt->target == PIPE_TEXTURE_CUBE) + txf |= ((pt->last_level + 1) << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT); + else + txf |= (((sv->base.last_level - sv->base.first_level) + 1) << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT); + + if (!mt->linear_pitch) + sv->u.nv40.npot_size2 = 0; + else { + sv->u.nv40.npot_size2 = mt->linear_pitch; + txf |= NV40_3D_TEX_FORMAT_LINEAR; } - NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format)); - return NULL; -} + sv->u.nv40.fmt[0] = tf->fmt[4] | txf; + sv->u.nv40.fmt[1] = tf->fmt[5] | txf; + sv->u.nv40.npot_size2 |= (u_minify(pt->depth0, level) << NV40_3D_TEX_SIZE1_DEPTH__SHIFT); + + sv->lod_offset = (sv->base.first_level - level) * 256; + sv->max_lod_limit = (sv->base.last_level - level) * 256; +} void nv40_fragtex_set(struct nvfx_context *nvfx, int unit) { struct nouveau_channel* chan = nvfx->screen->base.channel; struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; - struct nvfx_miptree *nv40mt = (struct nvfx_miptree *)nvfx->fragment_sampler_views[unit]->texture; - struct nouveau_bo *bo = nv40mt->base.bo; - struct pipe_resource *pt = &nv40mt->base.base; - struct nv40_texture_format *tf; - - uint32_t txf, txs, txp; + struct nvfx_sampler_view* sv = (struct nvfx_sampler_view*)nvfx->fragment_sampler_views[unit]; + struct nouveau_bo *bo = ((struct nvfx_miptree *)sv->base.texture)->base.bo; unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + unsigned txf; + unsigned max_lod = MIN2(ps->max_lod + sv->lod_offset, sv->max_lod_limit); + unsigned min_lod = MIN2(ps->min_lod + sv->lod_offset, max_lod); - tf = nv40_fragtex_format(pt->format); - if (!tf) - assert(0); - - txf = ps->fmt; - txf |= tf->format | 0x8000; - txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT); - - if (1) /* XXX */ - txf |= NV34TCL_TX_FORMAT_NO_BORDER; - - switch (pt->target) { - case PIPE_TEXTURE_CUBE: - txf |= NV34TCL_TX_FORMAT_CUBIC; - /* fall-through */ - case PIPE_TEXTURE_2D: - txf |= NV34TCL_TX_FORMAT_DIMS_2D; - break; - case PIPE_TEXTURE_3D: - txf |= NV34TCL_TX_FORMAT_DIMS_3D; - break; - case PIPE_TEXTURE_1D: - txf |= NV34TCL_TX_FORMAT_DIMS_1D; - break; - default: - NOUVEAU_ERR("Unknown target %d\n", pt->target); - return; - } - - if (!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR)) { - txp = 0; - } else { - txp = nv40mt->level[0].pitch; - txf |= NV40TCL_TEX_FORMAT_LINEAR; - } - - txs = tf->swizzle; + txf = sv->u.nv40.fmt[ps->compare] | ps->fmt; - MARK_RING(chan, 11 + 2 * !unit, 2); - OUT_RING(chan, RING_3D(NV34TCL_TX_OFFSET(unit), 8)); - OUT_RELOC(chan, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); + MARK_RING(chan, 11, 2); + OUT_RING(chan, RING_3D(NV30_3D_TEX_OFFSET(unit), 8)); + OUT_RELOC(chan, bo, sv->offset, tex_flags | NOUVEAU_BO_LOW, 0, 0); OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR, - NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); - OUT_RING(chan, ps->wrap); - OUT_RING(chan, NV40TCL_TEX_ENABLE_ENABLE | ps->en); - OUT_RING(chan, txs); - OUT_RING(chan, ps->filt | tf->sign | 0x2000 /*voodoo*/); - OUT_RING(chan, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | pt->height0); + NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1); + OUT_RING(chan, (ps->wrap & sv->wrap_mask) | sv->wrap); + OUT_RING(chan, ps->en | (min_lod << 19) | (max_lod << 7)); + OUT_RING(chan, sv->swizzle); + OUT_RING(chan, ps->filt | sv->filt); + OUT_RING(chan, sv->npot_size); OUT_RING(chan, ps->bcol); - OUT_RING(chan, RING_3D(NV40TCL_TEX_SIZE1(unit), 1)); - OUT_RING(chan, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); + OUT_RING(chan, RING_3D(NV40_3D_TEX_SIZE1(unit), 1)); + OUT_RING(chan, sv->u.nv40.npot_size2); nvfx->hw_txf[unit] = txf; nvfx->hw_samplers |= (1 << unit); diff --git a/src/gallium/drivers/nvfx/nv40_vertprog.h b/src/gallium/drivers/nvfx/nv40_vertprog.h index 7337293babc..c1f2208f513 100644 --- a/src/gallium/drivers/nvfx/nv40_vertprog.h +++ b/src/gallium/drivers/nvfx/nv40_vertprog.h @@ -38,13 +38,14 @@ #define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) /* use address reg as index into attribs */ #define NV40_VP_INST_INDEX_INPUT (1 << 27) +#define NV40_VP_INST_SATURATE (1 << 26) #define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) #define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) #define NV40_VP_INST_SRC2_ABS (1 << 23) #define NV40_VP_INST_SRC1_ABS (1 << 22) #define NV40_VP_INST_SRC0_ABS (1 << 21) #define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 -#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) +#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x3F << 15) #define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) #define NV40_VP_INST_COND_SHIFT 10 #define NV40_VP_INST_COND_MASK (0x7 << 10) @@ -100,7 +101,7 @@ #define NV40_VP_INST_SRC2H_SHIFT 0 #define NV40_VP_INST_SRC2H_MASK (0x3F << 0) #define NV40_VP_INST_IADDRH_SHIFT 0 -#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) +#define NV40_VP_INST_IADDRH_MASK (0x3F << 0) /* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ #define NV40_VP_INST_IADDRL_SHIFT 29 diff --git a/src/gallium/drivers/nvfx/nvfx_buffer.c b/src/gallium/drivers/nvfx/nvfx_buffer.c index 05b824b8f74..041099e0e56 100644 --- a/src/gallium/drivers/nvfx/nvfx_buffer.c +++ b/src/gallium/drivers/nvfx/nvfx_buffer.c @@ -6,115 +6,39 @@ #include "nouveau/nouveau_screen.h" #include "nouveau/nouveau_winsys.h" #include "nvfx_resource.h" +#include "nvfx_screen.h" - -/* Currently using separate implementations for buffers and textures, - * even though gallium has a unified abstraction of these objects. - * Eventually these should be combined, and mechanisms like transfers - * be adapted to work for both buffer and texture uploads. - */ -static void nvfx_buffer_destroy(struct pipe_screen *pscreen, +void nvfx_buffer_destroy(struct pipe_screen *pscreen, struct pipe_resource *presource) { - struct nvfx_resource *buffer = nvfx_resource(presource); + struct nvfx_buffer *buffer = nvfx_buffer(presource); - nouveau_screen_bo_release(pscreen, buffer->bo); + if(!(buffer->base.base.flags & NVFX_RESOURCE_FLAG_USER)) + align_free(buffer->data); + nouveau_screen_bo_release(pscreen, buffer->base.bo); FREE(buffer); } - - - -/* Utility functions for transfer create/destroy are hooked in and - * just record the arguments to those functions. - */ -static void * -nvfx_buffer_transfer_map( struct pipe_context *pipe, - struct pipe_transfer *transfer ) -{ - struct nvfx_resource *buffer = nvfx_resource(transfer->resource); - uint8_t *map; - - map = nouveau_screen_bo_map_range( pipe->screen, - buffer->bo, - transfer->box.x, - transfer->box.width, - nouveau_screen_transfer_flags(transfer->usage) ); - if (map == NULL) - return NULL; - - return map + transfer->box.x; -} - - - -static void nvfx_buffer_transfer_flush_region( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) -{ - struct nvfx_resource *buffer = nvfx_resource(transfer->resource); - - nouveau_screen_bo_map_flush_range(pipe->screen, - buffer->bo, - transfer->box.x + box->x, - box->width); -} - -static void nvfx_buffer_transfer_unmap( struct pipe_context *pipe, - struct pipe_transfer *transfer ) -{ - struct nvfx_resource *buffer = nvfx_resource(transfer->resource); - - nouveau_screen_bo_unmap(pipe->screen, buffer->bo); -} - - - - -struct u_resource_vtbl nvfx_buffer_vtbl = -{ - u_default_resource_get_handle, /* get_handle */ - nvfx_buffer_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ - nvfx_buffer_transfer_map, /* transfer_map */ - nvfx_buffer_transfer_flush_region, /* transfer_flush_region */ - nvfx_buffer_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ -}; - - - struct pipe_resource * nvfx_buffer_create(struct pipe_screen *pscreen, const struct pipe_resource *template) { - struct nvfx_resource *buffer; + struct nvfx_screen* screen = nvfx_screen(pscreen); + struct nvfx_buffer* buffer; - buffer = CALLOC_STRUCT(nvfx_resource); + buffer = CALLOC_STRUCT(nvfx_buffer); if (!buffer) return NULL; - buffer->base = *template; - buffer->vtbl = &nvfx_buffer_vtbl; - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.screen = pscreen; - - buffer->bo = nouveau_screen_bo_new(pscreen, - 16, - buffer->base.usage, - buffer->base.bind, - buffer->base.width0); - - if (buffer->bo == NULL) - goto fail; - - return &buffer->base; + buffer->base.base = *template; + buffer->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; + pipe_reference_init(&buffer->base.base.reference, 1); + buffer->base.base.screen = pscreen; + buffer->size = util_format_get_stride(template->format, template->width0); + buffer->bytes_to_draw_until_static = buffer->size * screen->static_reuse_threshold; + buffer->data = align_malloc(buffer->size, 16); -fail: - FREE(buffer); - return NULL; + return &buffer->base.base; } @@ -124,30 +48,49 @@ nvfx_user_buffer_create(struct pipe_screen *pscreen, unsigned bytes, unsigned usage) { - struct nvfx_resource *buffer; + struct nvfx_screen* screen = nvfx_screen(pscreen); + struct nvfx_buffer* buffer; - buffer = CALLOC_STRUCT(nvfx_resource); + buffer = CALLOC_STRUCT(nvfx_buffer); if (!buffer) return NULL; - pipe_reference_init(&buffer->base.reference, 1); - buffer->vtbl = &nvfx_buffer_vtbl; - buffer->base.screen = pscreen; - buffer->base.format = PIPE_FORMAT_R8_UNORM; - buffer->base.usage = PIPE_USAGE_IMMUTABLE; - buffer->base.bind = usage; - buffer->base.width0 = bytes; - buffer->base.height0 = 1; - buffer->base.depth0 = 1; - - buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes); - if (!buffer->bo) - goto fail; - - return &buffer->base; - -fail: - FREE(buffer); - return NULL; + pipe_reference_init(&buffer->base.base.reference, 1); + buffer->base.base.flags = NVFX_RESOURCE_FLAG_LINEAR | NVFX_RESOURCE_FLAG_USER; + buffer->base.base.screen = pscreen; + buffer->base.base.format = PIPE_FORMAT_R8_UNORM; + buffer->base.base.usage = PIPE_USAGE_IMMUTABLE; + buffer->base.base.bind = usage; + buffer->base.base.width0 = bytes; + buffer->base.base.height0 = 1; + buffer->base.base.depth0 = 1; + buffer->data = ptr; + buffer->size = bytes; + buffer->bytes_to_draw_until_static = bytes * screen->static_reuse_threshold; + buffer->dirty_end = bytes; + + return &buffer->base.base; } +void nvfx_buffer_upload(struct nvfx_buffer* buffer) +{ + unsigned dirty = buffer->dirty_end - buffer->dirty_begin; + if(!buffer->base.bo) + { + buffer->base.bo = nouveau_screen_bo_new(buffer->base.base.screen, + 16, + buffer->base.base.usage, + buffer->base.base.bind, + buffer->base.base.width0); + } + + if(dirty) + { + // TODO: may want to use a temporary in some cases + nouveau_bo_map(buffer->base.bo, NOUVEAU_BO_WR + | (buffer->dirty_unsynchronized ? NOUVEAU_BO_NOSYNC : 0)); + memcpy((uint8_t*)buffer->base.bo->map + buffer->dirty_begin, buffer->data + buffer->dirty_begin, dirty); + nouveau_bo_unmap(buffer->base.bo); + buffer->dirty_begin = buffer->dirty_end = 0; + } +} diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 6d2dc4d5bf6..95834d23273 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -1,5 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" +#include "util/u_framebuffer.h" #include "nvfx_context.h" #include "nvfx_screen.h" @@ -12,13 +13,14 @@ nvfx_flush(struct pipe_context *pipe, unsigned flags, struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; + /* XXX: we need to actually be intelligent here */ if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(chan, eng3d, 0x1fd8, 1); - OUT_RING (chan, 2); - BEGIN_RING(chan, eng3d, 0x1fd8, 1); - OUT_RING (chan, 1); + WAIT_RING(chan, 4); + OUT_RING(chan, RING_3D(0x1fd8, 1)); + OUT_RING(chan, 2); + OUT_RING(chan, RING_3D(0x1fd8, 1)); + OUT_RING(chan, 1); } FIRE_RING(chan); @@ -31,8 +33,22 @@ nvfx_destroy(struct pipe_context *pipe) { struct nvfx_context *nvfx = nvfx_context(pipe); + if(nvfx->dummy_fs) + pipe->delete_fs_state(pipe, nvfx->dummy_fs); + + for(unsigned i = 0; i < nvfx->vtxbuf_nr; ++i) + pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0); + pipe_resource_reference(&nvfx->idxbuf.buffer, 0); + util_unreference_framebuffer_state(&nvfx->framebuffer); + for(unsigned i = 0; i < PIPE_MAX_SAMPLERS; ++i) + pipe_sampler_view_reference(&nvfx->fragment_sampler_views[i], 0); + if (nvfx->draw) draw_destroy(nvfx->draw); + + if(nvfx->screen->cur_ctx == nvfx) + nvfx->screen->cur_ctx = NULL; + FREE(nvfx); } @@ -55,19 +71,26 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.screen = pscreen; nvfx->pipe.priv = priv; nvfx->pipe.destroy = nvfx_destroy; - nvfx->pipe.draw_arrays = nvfx_draw_arrays; - nvfx->pipe.draw_elements = nvfx_draw_elements; + nvfx->pipe.draw_vbo = nvfx_draw_vbo; nvfx->pipe.clear = nvfx_clear; nvfx->pipe.flush = nvfx_flush; - screen->base.channel->user_private = nvfx; - nvfx->is_nv4x = screen->is_nv4x; + nvfx->use_nv4x = screen->use_nv4x; + /* TODO: it seems that nv30 might have fixed function clipping usable with vertex programs + * However, my code for that doesn't work, so use vp clipping for all cards, which works. + */ + nvfx->use_vp_clipping = TRUE; nvfx_init_query_functions(nvfx); nvfx_init_surface_functions(nvfx); nvfx_init_state_functions(nvfx); + nvfx_init_sampling_functions(nvfx); + nvfx_init_vbo_functions(nvfx); + nvfx_init_fragprog_functions(nvfx); + nvfx_init_vertprog_functions(nvfx); nvfx_init_resource_functions(&nvfx->pipe); + nvfx_init_transfer_functions(&nvfx->pipe); /* Create, configure, and install fallback swtnl path */ nvfx->draw = draw_create(&nvfx->pipe); @@ -79,6 +102,12 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) /* set these to that we init them on first validation */ nvfx->state.scissor_enabled = ~0; - nvfx->state.stipple_enabled = ~0; + nvfx->hw_pointsprite_control = -1; + nvfx->hw_vp_output = -1; + nvfx->use_vertex_buffers = -1; + nvfx->relocs_needed = NVFX_RELOCATE_ALL; + + LIST_INITHEAD(&nvfx->render_cache); + return &nvfx->pipe; } diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index e48f9f3aa88..6ef2a6945d7 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -11,12 +11,14 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_inlines.h" +#include "util/u_double_list.h" #include "draw/draw_vertex.h" +#include "util/u_blitter.h" #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" - +#include "nv30-40_3d.xml.h" #include "nvfx_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -42,17 +44,26 @@ #define NVFX_NEW_SR (1 << 13) #define NVFX_NEW_VERTCONST (1 << 14) #define NVFX_NEW_FRAGCONST (1 << 15) +#define NVFX_NEW_INDEX (1 << 16) +#define NVFX_NEW_SPRITE (1 << 17) + +#define NVFX_RELOCATE_FRAMEBUFFER (1 << 0) +#define NVFX_RELOCATE_FRAGTEX (1 << 1) +#define NVFX_RELOCATE_FRAGPROG (1 << 2) +#define NVFX_RELOCATE_VTXBUF (1 << 3) +#define NVFX_RELOCATE_IDXBUF (1 << 4) +#define NVFX_RELOCATE_ALL 0x1f struct nvfx_rasterizer_state { struct pipe_rasterizer_state pipe; unsigned sb_len; - uint32_t sb[32]; + uint32_t sb[34]; }; struct nvfx_zsa_state { struct pipe_depth_stencil_alpha_state pipe; unsigned sb_len; - uint32_t sb[26]; + uint32_t sb[24]; }; struct nvfx_blend_state { @@ -64,13 +75,57 @@ struct nvfx_blend_state { struct nvfx_state { unsigned scissor_enabled; - unsigned stipple_enabled; unsigned fp_samplers; + unsigned render_temps; +}; + +struct nvfx_per_vertex_element { + unsigned idx; + unsigned vertex_buffer_index; + unsigned src_offset; +}; + +struct nvfx_low_frequency_element { + unsigned idx; + unsigned vertex_buffer_index; + unsigned src_offset; + void (*fetch_rgba_float)(float *dst, const uint8_t *src, unsigned i, unsigned j); + unsigned ncomp; +}; + +struct nvfx_per_instance_element { + struct nvfx_low_frequency_element base; + unsigned instance_divisor; +}; + +struct nvfx_per_vertex_buffer_info +{ + unsigned vertex_buffer_index; + unsigned per_vertex_size; }; struct nvfx_vtxelt_state { struct pipe_vertex_element pipe[16]; unsigned num_elements; + unsigned vtxfmt[16]; + + unsigned num_per_vertex_buffer_infos; + struct nvfx_per_vertex_buffer_info per_vertex_buffer_info[16]; + + unsigned num_per_vertex; + struct nvfx_per_vertex_element per_vertex[16]; + + unsigned num_per_instance; + struct nvfx_per_instance_element per_instance[16]; + + unsigned num_constant; + struct nvfx_low_frequency_element constant[16]; + + boolean needs_translate; + struct translate* translate; + + unsigned vertex_length; + unsigned max_vertices_per_packet; }; struct nvfx_render_target { @@ -86,19 +141,17 @@ struct nvfx_context { struct nvfx_screen *screen; unsigned is_nv4x; /* either 0 or ~0 */ + unsigned use_nv4x; /* either 0 or ~0 */ + boolean use_vp_clipping; struct draw_context *draw; + /* one is for user-requested operations, the other is for temporary copying inside them */ + struct blitter_context* blitter[2]; + unsigned blitters_in_use; + struct list_head render_cache; /* HW state derived from pipe states */ struct nvfx_state state; - struct { - struct nvfx_vertex_program *vertprog; - - unsigned nr_attribs; - unsigned hw[PIPE_MAX_SHADER_INPUTS]; - unsigned draw[PIPE_MAX_SHADER_INPUTS]; - unsigned emit[PIPE_MAX_SHADER_INPUTS]; - } swtnl; enum { HW, SWTNL, SWRAST @@ -110,8 +163,8 @@ struct nvfx_context { struct pipe_scissor_state scissor; unsigned stipple[32]; struct pipe_clip_state clip; - struct nvfx_vertex_program *vertprog; - struct nvfx_fragment_program *fragprog; + struct nvfx_pipe_vertex_program *vertprog; + struct nvfx_pipe_fragment_program *fragprog; struct pipe_resource *constbuf[PIPE_SHADER_TYPES]; unsigned constbuf_nr[PIPE_SHADER_TYPES]; struct nvfx_rasterizer_state *rasterizer; @@ -121,23 +174,37 @@ struct nvfx_context { struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; - struct pipe_resource *idxbuf; - unsigned idxbuf_format; + struct pipe_index_buffer idxbuf; struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; + struct nvfx_pipe_fragment_program* dummy_fs; + struct pipe_query* query; + unsigned nr_samplers; unsigned nr_textures; unsigned dirty_samplers; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned vtxbuf_nr; struct nvfx_vtxelt_state *vtxelt; + int base_vertex; + boolean use_index_buffer; + /* -1 = hardware input setup is outdated + * 0 = hardware input setup is for inline vertices + * 1 = hardware input setup is for hardware vertices + */ + int use_vertex_buffers; - unsigned vbo_bo; unsigned hw_vtxelt_nr; - uint8_t hw_samplers; - uint32_t hw_txf[8]; + unsigned hw_samplers; + uint32_t hw_txf[16]; struct nvfx_render_target hw_rt[4]; struct nvfx_render_target hw_zeta; + int hw_pointsprite_control; + int hw_vp_output; + struct nvfx_fragment_program* hw_fragprog; + struct nvfx_vertex_program* hw_vertprog; + + unsigned relocs_needed; }; static INLINE struct nvfx_context * @@ -174,15 +241,12 @@ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, /* nvfx_draw.c */ extern struct draw_stage *nvfx_draw_render_stage(struct nvfx_context *nvfx); -extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe, - struct pipe_resource *idxbuf, - unsigned ib_size, int ib_bias, - unsigned mode, - unsigned start, unsigned count); +extern void nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info); extern void nvfx_vtxfmt_validate(struct nvfx_context *nvfx); /* nvfx_fb.c */ -extern void nvfx_state_framebuffer_validate(struct nvfx_context *nvfx); +extern int nvfx_framebuffer_prepare(struct nvfx_context *nvfx); +extern void nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result); void nvfx_framebuffer_relocate(struct nvfx_context *nvfx); @@ -190,19 +254,24 @@ nvfx_framebuffer_relocate(struct nvfx_context *nvfx); extern void nvfx_fragprog_destroy(struct nvfx_context *, struct nvfx_fragment_program *); extern void nvfx_fragprog_validate(struct nvfx_context *nvfx); -extern void -nvfx_fragprog_relocate(struct nvfx_context *nvfx); +extern void nvfx_fragprog_relocate(struct nvfx_context *nvfx); +extern void nvfx_init_fragprog_functions(struct nvfx_context *nvfx); /* nvfx_fragtex.c */ +extern void nvfx_init_sampling_functions(struct nvfx_context *nvfx); extern void nvfx_fragtex_validate(struct nvfx_context *nvfx); -extern void -nvfx_fragtex_relocate(struct nvfx_context *nvfx); +extern void nvfx_fragtex_relocate(struct nvfx_context *nvfx); + +struct nvfx_sampler_view; /* nv30_fragtex.c */ extern void nv30_sampler_state_init(struct pipe_context *pipe, struct nvfx_sampler_state *ps, const struct pipe_sampler_state *cso); +extern void +nv30_sampler_view_init(struct pipe_context *pipe, + struct nvfx_sampler_view *sv); extern void nv30_fragtex_set(struct nvfx_context *nvfx, int unit); /* nv40_fragtex.c */ @@ -210,6 +279,9 @@ extern void nv40_sampler_state_init(struct pipe_context *pipe, struct nvfx_sampler_state *ps, const struct pipe_sampler_state *cso); +extern void +nv40_sampler_view_init(struct pipe_context *pipe, + struct nvfx_sampler_view *sv); extern void nv40_fragtex_set(struct nvfx_context *nvfx, int unit); /* nvfx_state.c */ @@ -224,28 +296,76 @@ extern void nvfx_state_sr_validate(struct nvfx_context *nvfx); extern void nvfx_state_zsa_validate(struct nvfx_context *nvfx); /* nvfx_state_emit.c */ -extern void nvfx_state_relocate(struct nvfx_context *nvfx); +extern void nvfx_state_relocate(struct nvfx_context *nvfx, unsigned relocs); extern boolean nvfx_state_validate(struct nvfx_context *nvfx); extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx); -extern void nvfx_state_emit(struct nvfx_context *nvfx); + +static inline void +nvfx_state_emit(struct nvfx_context *nvfx) +{ + unsigned relocs = NVFX_RELOCATE_FRAMEBUFFER | NVFX_RELOCATE_FRAGTEX | NVFX_RELOCATE_FRAGPROG; + if (nvfx->render_mode == HW) + { + relocs |= NVFX_RELOCATE_VTXBUF; + if(nvfx->use_index_buffer) + relocs |= NVFX_RELOCATE_IDXBUF; + } + + relocs &= nvfx->relocs_needed; + if(relocs) + nvfx_state_relocate(nvfx, relocs); +} /* nvfx_transfer.c */ -extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx); +extern void nvfx_init_transfer_functions(struct pipe_context *pipe); /* nvfx_vbo.c */ extern boolean nvfx_vbo_validate(struct nvfx_context *nvfx); +extern void nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx); extern void nvfx_vbo_relocate(struct nvfx_context *nvfx); -extern void nvfx_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nvfx_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, - unsigned count); +extern void nvfx_idxbuf_validate(struct nvfx_context* nvfx); +extern void nvfx_idxbuf_relocate(struct nvfx_context* nvfx); +extern void nvfx_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info); +extern void nvfx_init_vbo_functions(struct nvfx_context *nvfx); +extern unsigned nvfx_vertex_formats[]; /* nvfx_vertprog.c */ extern boolean nvfx_vertprog_validate(struct nvfx_context *nvfx); extern void nvfx_vertprog_destroy(struct nvfx_context *, struct nvfx_vertex_program *); +extern void nvfx_init_vertprog_functions(struct nvfx_context *nvfx); + +/* nvfx_push.c */ +extern void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); + +/* must WAIT_RING(chan, ncomp + 1) or equivalent beforehand! */ +static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, unsigned attrib, const float* v, unsigned ncomp) +{ + switch (ncomp) { + case 4: + OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_4F_X(attrib), 4)); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + OUT_RING(chan, fui(v[2])); + OUT_RING(chan, fui(v[3])); + break; + case 3: + OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_3F_X(attrib), 3)); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + OUT_RING(chan, fui(v[2])); + break; + case 2: + OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_2F_X(attrib), 2)); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + break; + case 1: + OUT_RING(chan, RING_3D(NV30_3D_VTX_ATTR_1F(attrib), 1)); + OUT_RING(chan, fui(v[0])); + break; + } +} #endif diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c index 22cff370b77..61f888a8ea2 100644 --- a/src/gallium/drivers/nvfx/nvfx_draw.c +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -1,6 +1,5 @@ #include "pipe/p_shader_tokens.h" #include "util/u_inlines.h" -#include "tgsi/tgsi_ureg.h" #include "util/u_pack_color.h" @@ -9,11 +8,7 @@ #include "draw/draw_pipe.h" #include "nvfx_context.h" - -/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very - * often at all. Uses "quadro style" vertex submission + a fixed vertex - * layout to avoid the need to generate a vertex program or vtxfmt. - */ +#include "nvfx_resource.h" struct nvfx_render_stage { struct draw_stage stage; @@ -27,67 +22,18 @@ nvfx_render_stage(struct draw_stage *stage) return (struct nvfx_render_stage *)stage; } -static INLINE void -nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v) +static void +nvfx_render_flush(struct draw_stage *stage, unsigned flags) { - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - unsigned i; + struct nvfx_render_stage *rs = nvfx_render_stage(stage); + struct nvfx_context *nvfx = rs->nvfx; + struct nouveau_channel *chan = nvfx->screen->base.channel; - for (i = 0; i < nvfx->swtnl.nr_attribs; i++) { - unsigned idx = nvfx->swtnl.draw[i]; - unsigned hw = nvfx->swtnl.hw[i]; - - switch (nvfx->swtnl.emit[i]) { - case EMIT_OMIT: - break; - case EMIT_1F: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_1F(hw), 1); - OUT_RING (chan, fui(v->data[idx][0])); - break; - case EMIT_2F: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_2F_X(hw), 2); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - break; - case EMIT_3F: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_3F_X(hw), 3); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - OUT_RING (chan, fui(v->data[idx][2])); - break; - case EMIT_4F: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - OUT_RING (chan, fui(v->data[idx][2])); - OUT_RING (chan, fui(v->data[idx][3])); - break; - case 0xff: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); - OUT_RING (chan, fui(v->data[idx][0] / v->data[idx][3])); - OUT_RING (chan, fui(v->data[idx][1] / v->data[idx][3])); - OUT_RING (chan, fui(v->data[idx][2] / v->data[idx][3])); - OUT_RING (chan, fui(1.0f / v->data[idx][3])); - break; - case EMIT_4UB: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1); - OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), - float_to_ubyte(v->data[idx][1]), - float_to_ubyte(v->data[idx][2]), - float_to_ubyte(v->data[idx][3]))); - case EMIT_4UB_BGRA: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1); - OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][2]), - float_to_ubyte(v->data[idx][1]), - float_to_ubyte(v->data[idx][0]), - float_to_ubyte(v->data[idx][3]))); - break; - default: - assert(0); - break; - } + if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) { + assert(AVAIL_RING(chan) >= 2); + OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP); + rs->prim = NV30_3D_VERTEX_BEGIN_END_STOP; } } @@ -100,82 +46,87 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - unsigned i; + boolean no_elements = nvfx->vertprog->draw_no_elements; + unsigned num_attribs = nvfx->vertprog->draw_elements; - /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ - if (AVAIL_RING(chan) < ((count * 20) + 6)) { - if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { - NOUVEAU_ERR("AIII, missed flush\n"); - assert(0); - } + /* we need to account the flush as well here even if it is done afterthis + * function + */ + if (AVAIL_RING(chan) < ((1 + count * num_attribs * 4) + 6 + 64)) { + nvfx_render_flush(stage, 0); FIRE_RING(chan); nvfx_state_emit(nvfx); + + assert(AVAIL_RING(chan) >= ((1 + count * num_attribs * 4) + 6 + 64)); } /* Switch primitive modes if necessary */ if (rs->prim != mode) { - if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) { + OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP); + } + + /* XXX: any command a lot of times seems to (mostly) fix corruption that would otherwise happen */ + /* this seems to cause issues on nv3x, and also be unneeded there */ + if(nvfx->is_nv4x) + { + int i; + for(i = 0; i < 32; ++i) + { + OUT_RING(chan, RING_3D(0x1dac, 1)); + OUT_RING(chan, 0); + } } - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); OUT_RING (chan, mode); rs->prim = mode; } - /* Emit vertex data */ - for (i = 0; i < count; i++) - nvfx_render_vertex(nvfx, prim->v[i]); - - /* If it's likely we'll need to empty the push buffer soon, finish - * off the primitive now. - */ - if (AVAIL_RING(chan) < ((count * 20) + 6)) { - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); - rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; + OUT_RING(chan, RING_3D_NI(NV30_3D_VERTEX_DATA, num_attribs * 4 * count)); + if(no_elements) { + OUT_RING(chan, 0); + OUT_RING(chan, 0); + OUT_RING(chan, 0); + OUT_RING(chan, 0); + } else { + for (unsigned i = 0; i < count; ++i) + { + struct vertex_header* v = prim->v[i]; + /* TODO: disable divide where it's causing the problem, and remove this hack */ + OUT_RING(chan, fui(v->data[0][0] / v->data[0][3])); + OUT_RING(chan, fui(v->data[0][1] / v->data[0][3])); + OUT_RING(chan, fui(v->data[0][2] / v->data[0][3])); + OUT_RING(chan, fui(1.0f / v->data[0][3])); + OUT_RINGp(chan, &v->data[1][0], 4 * (num_attribs - 1)); + } } } static void nvfx_render_point(struct draw_stage *draw, struct prim_header *prim) { - nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_POINTS, 1); + nvfx_render_prim(draw, prim, NV30_3D_VERTEX_BEGIN_END_POINTS, 1); } static void nvfx_render_line(struct draw_stage *draw, struct prim_header *prim) { - nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_LINES, 2); + nvfx_render_prim(draw, prim, NV30_3D_VERTEX_BEGIN_END_LINES, 2); } static void nvfx_render_tri(struct draw_stage *draw, struct prim_header *prim) { - nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_TRIANGLES, 3); -} - -static void -nvfx_render_flush(struct draw_stage *draw, unsigned flags) -{ - struct nvfx_render_stage *rs = nvfx_render_stage(draw); - struct nvfx_context *nvfx = rs->nvfx; - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); - rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; - } + nvfx_render_prim(draw, prim, NV30_3D_VERTEX_BEGIN_END_TRIANGLES, 3); } static void nvfx_render_reset_stipple_counter(struct draw_stage *draw) { + /* this doesn't really seem to work, but it matters rather little */ + nvfx_render_flush(draw, 0); } static void @@ -184,40 +135,11 @@ nvfx_render_destroy(struct draw_stage *draw) FREE(draw); } -static struct nvfx_vertex_program * -nvfx_create_drawvp(struct nvfx_context *nvfx) -{ - struct ureg_program *ureg; - uint i; - - ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); - if (ureg == NULL) - return NULL; - - ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), ureg_DECL_vs_input(ureg, 0)); - ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0), ureg_DECL_vs_input(ureg, 3)); - ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1), ureg_DECL_vs_input(ureg, 4)); - ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 0), ureg_DECL_vs_input(ureg, 3)); - ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 1), ureg_DECL_vs_input(ureg, 4)); - ureg_MOV(ureg, - ureg_writemask(ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 1), TGSI_WRITEMASK_X), - ureg_DECL_vs_input(ureg, 5)); - for (i = 0; i < 8; ++i) - ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, i), ureg_DECL_vs_input(ureg, 8 + i)); - - ureg_END( ureg ); - - return ureg_create_shader_and_destroy( ureg, &nvfx->pipe ); -} - struct draw_stage * nvfx_draw_render_stage(struct nvfx_context *nvfx) { struct nvfx_render_stage *render = CALLOC_STRUCT(nvfx_render_stage); - if (!nvfx->swtnl.vertprog) - nvfx->swtnl.vertprog = nvfx_create_drawvp(nvfx); - render->nvfx = nvfx; render->stage.draw = nvfx->draw; render->stage.point = nvfx_render_point; @@ -231,120 +153,37 @@ nvfx_draw_render_stage(struct nvfx_context *nvfx) } void -nvfx_draw_elements_swtnl(struct pipe_context *pipe, - struct pipe_resource *idxbuf, - unsigned idxbuf_size, int idxbuf_bias, - unsigned mode, unsigned start, unsigned count) +nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info) { struct nvfx_context *nvfx = nvfx_context(pipe); - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; - struct pipe_transfer *ib_transfer = NULL; - struct pipe_transfer *cb_transfer = NULL; unsigned i; void *map; if (!nvfx_state_validate_swtnl(nvfx)) return; + nvfx_state_emit(nvfx); + /* these must be passed without adding the offsets */ for (i = 0; i < nvfx->vtxbuf_nr; i++) { - map = pipe_buffer_map(pipe, nvfx->vtxbuf[i].buffer, - PIPE_TRANSFER_READ, - &vb_transfer[i]); + map = nvfx_buffer(nvfx->vtxbuf[i].buffer)->data; draw_set_mapped_vertex_buffer(nvfx->draw, i, map); } - if (idxbuf) { - map = pipe_buffer_map(pipe, idxbuf, - PIPE_TRANSFER_READ, - &ib_transfer); - draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, idxbuf_bias, map); - } else { - draw_set_mapped_element_buffer(nvfx->draw, 0, 0, NULL); - } + map = NULL; + if (info->indexed && nvfx->idxbuf.buffer) + map = nvfx_buffer(nvfx->idxbuf.buffer)->data; + draw_set_mapped_index_buffer(nvfx->draw, map); if (nvfx->constbuf[PIPE_SHADER_VERTEX]) { const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX]; - map = pipe_buffer_map(pipe, - nvfx->constbuf[PIPE_SHADER_VERTEX], - PIPE_TRANSFER_READ, - &cb_transfer); + map = nvfx_buffer(nvfx->constbuf[PIPE_SHADER_VERTEX])->data; draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0, map, nr); } - draw_arrays(nvfx->draw, mode, start, count); - - for (i = 0; i < nvfx->vtxbuf_nr; i++) - pipe_buffer_unmap(pipe, nvfx->vtxbuf[i].buffer, vb_transfer[i]); - - if (idxbuf) - pipe_buffer_unmap(pipe, idxbuf, ib_transfer); - - if (nvfx->constbuf[PIPE_SHADER_VERTEX]) - pipe_buffer_unmap(pipe, nvfx->constbuf[PIPE_SHADER_VERTEX], - cb_transfer); + draw_vbo(nvfx->draw, info); draw_flush(nvfx->draw); - pipe->flush(pipe, 0, NULL); -} - -static INLINE void -emit_attrib(struct nvfx_context *nvfx, unsigned hw, unsigned emit, - unsigned semantic, unsigned index) -{ - unsigned draw_out = draw_find_shader_output(nvfx->draw, semantic, index); - unsigned a = nvfx->swtnl.nr_attribs++; - - nvfx->swtnl.hw[a] = hw; - nvfx->swtnl.emit[a] = emit; - nvfx->swtnl.draw[a] = draw_out; -} - -void -nvfx_vtxfmt_validate(struct nvfx_context *nvfx) -{ - struct nvfx_fragment_program *fp = nvfx->fragprog; - unsigned colour = 0, texcoords = 0, fog = 0, i; - - /* Determine needed fragprog inputs */ - for (i = 0; i < fp->info.num_inputs; i++) { - switch (fp->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - break; - case TGSI_SEMANTIC_COLOR: - colour |= (1 << fp->info.input_semantic_index[i]); - break; - case TGSI_SEMANTIC_GENERIC: - texcoords |= (1 << fp->info.input_semantic_index[i]); - break; - case TGSI_SEMANTIC_FOG: - fog = 1; - break; - default: - assert(0); - } - } - - nvfx->swtnl.nr_attribs = 0; - - /* Map draw vtxprog output to hw attribute IDs */ - for (i = 0; i < 2; i++) { - if (!(colour & (1 << i))) - continue; - emit_attrib(nvfx, 3 + i, EMIT_4F, TGSI_SEMANTIC_COLOR, i); - } - - for (i = 0; i < 8; i++) { - if (!(texcoords & (1 << i))) - continue; - emit_attrib(nvfx, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); - } - - if (fog) { - emit_attrib(nvfx, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); - } - - emit_attrib(nvfx, 0, 0xff, TGSI_SEMANTIC_POSITION, 0); } diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index 6772d9bd516..13e8beed479 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -1,87 +1,81 @@ +#include <float.h> #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "util/u_inlines.h" +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_ureg.h" #include "nvfx_context.h" #include "nvfx_shader.h" +#include "nvfx_resource.h" -#define MAX_CONSTS 128 -#define MAX_IMM 32 struct nvfx_fpc { + struct nvfx_pipe_fragment_program* pfp; struct nvfx_fragment_program *fp; - uint attrib_map[PIPE_MAX_SHADER_INPUTS]; - - unsigned r_temps; - unsigned r_temps_discard; - struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; - struct nvfx_sreg *r_temp; + unsigned max_temps; + unsigned long long r_temps; + unsigned long long r_temps_discard; + struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nvfx_reg *r_temp; + unsigned sprite_coord_temp; int num_regs; unsigned inst_offset; unsigned have_const; - struct { - int pipe; - float vals[4]; - } consts[MAX_CONSTS]; - int nr_consts; + struct util_dynarray imm_data; - struct nvfx_sreg imm[MAX_IMM]; + struct nvfx_reg* r_imm; unsigned nr_imm; + + unsigned char generic_to_slot[256]; /* semantic idx for each input semantic */ + + struct util_dynarray if_stack; + //struct util_dynarray loop_stack; + struct util_dynarray label_relocs; }; -static INLINE struct nvfx_sreg +static INLINE struct nvfx_reg temp(struct nvfx_fpc *fpc) { - int idx = ffs(~fpc->r_temps) - 1; + int idx = __builtin_ctzll(~fpc->r_temps); - if (idx < 0) { + if (idx >= fpc->max_temps) { NOUVEAU_ERR("out of temps!!\n"); assert(0); - return nvfx_sr(NVFXSR_TEMP, 0); + return nvfx_reg(NVFXSR_TEMP, 0); } - fpc->r_temps |= (1 << idx); - fpc->r_temps_discard |= (1 << idx); - return nvfx_sr(NVFXSR_TEMP, idx); + fpc->r_temps |= (1ULL << idx); + fpc->r_temps_discard |= (1ULL << idx); + return nvfx_reg(NVFXSR_TEMP, idx); } static INLINE void release_temps(struct nvfx_fpc *fpc) { fpc->r_temps &= ~fpc->r_temps_discard; - fpc->r_temps_discard = 0; + fpc->r_temps_discard = 0ULL; } -static INLINE struct nvfx_sreg -constant(struct nvfx_fpc *fpc, int pipe, float vals[4]) +static inline struct nvfx_reg +nvfx_fp_imm(struct nvfx_fpc *fpc, float a, float b, float c, float d) { - int idx; - - if (fpc->nr_consts == MAX_CONSTS) - assert(0); - idx = fpc->nr_consts++; + float v[4] = {a, b, c, d}; + int idx = fpc->imm_data.size >> 4; - fpc->consts[idx].pipe = pipe; - if (pipe == -1) - memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); - return nvfx_sr(NVFXSR_CONST, idx); + memcpy(util_dynarray_grow(&fpc->imm_data, sizeof(float) * 4), v, 4 * sizeof(float)); + return nvfx_reg(NVFXSR_IMM, idx); } -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nvfx_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \ - (d), (m), (s0), (s1), (s2)) -#define tex(cc,s,o,u,d,m,s0,s1,s2) \ - nvfx_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \ - (d), (m), (s0), none, none) - static void grow_insns(struct nvfx_fpc *fpc, int size) { @@ -92,44 +86,59 @@ grow_insns(struct nvfx_fpc *fpc, int size) } static void -emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src) +emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_src src) { struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw = &fp->insn[fpc->inst_offset]; uint32_t sr = 0; - switch (src.type) { + switch (src.reg.type) { case NVFXSR_INPUT: sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); - hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT); + hw[0] |= (src.reg.index << NVFX_FP_OP_INPUT_SRC_SHIFT); break; case NVFXSR_OUTPUT: sr |= NVFX_FP_REG_SRC_HALF; /* fall-through */ case NVFXSR_TEMP: sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); - sr |= (src.index << NVFX_FP_REG_SRC_SHIFT); + sr |= (src.reg.index << NVFX_FP_REG_SRC_SHIFT); + break; + case NVFXSR_RELOCATED: + sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); + sr |= (fpc->sprite_coord_temp << NVFX_FP_REG_SRC_SHIFT); + //printf("adding relocation at %x for %x\n", fpc->inst_offset, src.index); + util_dynarray_append(&fpc->fp->slot_relocations[src.reg.index], unsigned, fpc->inst_offset + pos + 1); + break; + case NVFXSR_IMM: + if (!fpc->have_const) { + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + fpc->have_const = 1; + } + + memcpy(&fp->insn[fpc->inst_offset + 4], + (float*)fpc->imm_data.data + src.reg.index * 4, + sizeof(uint32_t) * 4); + + sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); break; case NVFXSR_CONST: if (!fpc->have_const) { grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; fpc->have_const = 1; } - hw = &fp->insn[fpc->inst_offset]; - if (fpc->consts[src.index].pipe >= 0) { + { struct nvfx_fragment_program_data *fpd; fp->consts = realloc(fp->consts, ++fp->nr_consts * sizeof(*fpd)); fpd = &fp->consts[fp->nr_consts - 1]; fpd->offset = fpc->inst_offset + 4; - fpd->index = fpc->consts[src.index].pipe; + fpd->index = src.reg.index; memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); - } else { - memcpy(&fp->insn[fpc->inst_offset + 4], - fpc->consts[src.index].vals, - sizeof(uint32_t) * 4); } sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); @@ -156,7 +165,7 @@ emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src) } static void -emit_dst(struct nvfx_fpc *fpc, struct nvfx_sreg dst) +emit_dst(struct nvfx_fpc *fpc, struct nvfx_reg dst) { struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw = &fp->insn[fpc->inst_offset]; @@ -184,9 +193,7 @@ emit_dst(struct nvfx_fpc *fpc, struct nvfx_sreg dst) } static void -nvfx_fp_arith(struct nvfx_fpc *fpc, int sat, int op, - struct nvfx_sreg dst, int mask, - struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) +nvfx_fp_emit(struct nvfx_fpc *fpc, struct nvfx_insn insn) { struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw; @@ -197,68 +204,225 @@ nvfx_fp_arith(struct nvfx_fpc *fpc, int sat, int op, hw = &fp->insn[fpc->inst_offset]; memset(hw, 0, sizeof(uint32_t) * 4); - if (op == NVFX_FP_OP_OPCODE_KIL) - fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; - hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT); - hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT); - hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT); + if (insn.op == NVFX_FP_OP_OPCODE_KIL) + fp->fp_control |= NV30_3D_FP_CONTROL_USES_KIL; + hw[0] |= (insn.op << NVFX_FP_OP_OPCODE_SHIFT); + hw[0] |= (insn.mask << NVFX_FP_OP_OUTMASK_SHIFT); + hw[2] |= (insn.scale << NVFX_FP_OP_DST_SCALE_SHIFT); - if (sat) + if (insn.sat) hw[0] |= NVFX_FP_OP_OUT_SAT; - if (dst.cc_update) + if (insn.cc_update) hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; - hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT); - hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | - (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | - (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | - (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); - - emit_dst(fpc, dst); - emit_src(fpc, 0, s0); - emit_src(fpc, 1, s1); - emit_src(fpc, 2, s2); + hw[1] |= (insn.cc_test << NVFX_FP_OP_COND_SHIFT); + hw[1] |= ((insn.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (insn.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | + (insn.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | + (insn.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); + + if(insn.unit >= 0) + { + hw[0] |= (insn.unit << NVFX_FP_OP_TEX_UNIT_SHIFT); + fp->samplers |= (1 << insn.unit); + } + + emit_dst(fpc, insn.dst); + emit_src(fpc, 0, insn.src[0]); + emit_src(fpc, 1, insn.src[1]); + emit_src(fpc, 2, insn.src[2]); } +#define arith(s,o,d,m,s0,s1,s2) \ + nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, -1, \ + (d), (m), (s0), (s1), (s2)) + +#define tex(s,o,u,d,m,s0,s1,s2) \ + nvfx_insn((s), NVFX_FP_OP_OPCODE_##o, (u), \ + (d), (m), (s0), none, none) + +/* IF src.x != 0, as TGSI specifies */ static void -nvfx_fp_tex(struct nvfx_fpc *fpc, int sat, int op, int unit, - struct nvfx_sreg dst, int mask, - struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) +nv40_fp_if(struct nvfx_fpc *fpc, struct nvfx_src src) { - struct nvfx_fragment_program *fp = fpc->fp; + const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); + struct nvfx_insn insn = arith(0, MOV, none.reg, NVFX_FP_MASK_X, src, none, none); + uint32_t *hw; + insn.cc_update = 1; + nvfx_fp_emit(fpc, insn); - nvfx_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + fpc->inst_offset = fpc->fp->insn_len; + grow_insns(fpc, 4); + hw = &fpc->fp->insn[fpc->inst_offset]; + /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ + hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) | + NV40_FP_OP_OUT_NONE | + (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); + /* Use .xxxx swizzle so that we check only src[0].x*/ + hw[1] = (0 << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (0 << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | + (0 << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | + (0 << NVFX_FP_OP_COND_SWZ_W_SHIFT) | + (NVFX_FP_OP_COND_NE << NVFX_FP_OP_COND_SHIFT); + hw[2] = 0; /* | NV40_FP_OP_OPCODE_IS_BRANCH | else_offset */ + hw[3] = 0; /* | endif_offset */ + util_dynarray_append(&fpc->if_stack, unsigned, fpc->inst_offset); +} - fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT); - fp->samplers |= (1 << unit); +/* IF src.x != 0, as TGSI specifies */ +static void +nv40_fp_cal(struct nvfx_fpc *fpc, unsigned target) +{ + struct nvfx_relocation reloc; + uint32_t *hw; + fpc->inst_offset = fpc->fp->insn_len; + grow_insns(fpc, 4); + hw = &fpc->fp->insn[fpc->inst_offset]; + /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ + hw[0] = (NV40_FP_OP_BRA_OPCODE_CAL << NVFX_FP_OP_OPCODE_SHIFT); + /* Use .xxxx swizzle so that we check only src[0].x*/ + hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | + (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */ + hw[3] = 0; + reloc.target = target; + reloc.location = fpc->inst_offset + 2; + util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); } -static INLINE struct nvfx_sreg +static void +nv40_fp_ret(struct nvfx_fpc *fpc) +{ + uint32_t *hw; + fpc->inst_offset = fpc->fp->insn_len; + grow_insns(fpc, 4); + hw = &fpc->fp->insn[fpc->inst_offset]; + /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ + hw[0] = (NV40_FP_OP_BRA_OPCODE_RET << NVFX_FP_OP_OPCODE_SHIFT); + /* Use .xxxx swizzle so that we check only src[0].x*/ + hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | + (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | call_offset */ + hw[3] = 0; +} + +static void +nv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target) +{ + struct nvfx_relocation reloc; + uint32_t *hw; + fpc->inst_offset = fpc->fp->insn_len; + grow_insns(fpc, 4); + hw = &fpc->fp->insn[fpc->inst_offset]; + /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ + hw[0] = (NV40_FP_OP_BRA_OPCODE_REP << NVFX_FP_OP_OPCODE_SHIFT) | + NV40_FP_OP_OUT_NONE | + (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); + /* Use .xxxx swizzle so that we check only src[0].x*/ + hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_ALL_SHIFT) | + (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | + (count << NV40_FP_OP_REP_COUNT1_SHIFT) | + (count << NV40_FP_OP_REP_COUNT2_SHIFT) | + (count << NV40_FP_OP_REP_COUNT3_SHIFT); + hw[3] = 0; /* | end_offset */ + reloc.target = target; + reloc.location = fpc->inst_offset + 3; + util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); + //util_dynarray_append(&fpc->loop_stack, unsigned, target); +} + +/* warning: this only works forward, and probably only if not inside any IF */ +static void +nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target) +{ + struct nvfx_relocation reloc; + uint32_t *hw; + fpc->inst_offset = fpc->fp->insn_len; + grow_insns(fpc, 4); + hw = &fpc->fp->insn[fpc->inst_offset]; + /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ + hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) | + NV40_FP_OP_OUT_NONE | + (NVFX_FP_PRECISION_FP16 << NVFX_FP_OP_PRECISION_SHIFT); + /* Use .xxxx swizzle so that we check only src[0].x*/ + hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (NVFX_FP_OP_COND_FL << NVFX_FP_OP_COND_SHIFT); + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; /* | else_offset */ + hw[3] = 0; /* | endif_offset */ + reloc.target = target; + reloc.location = fpc->inst_offset + 2; + util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); + reloc.target = target; + reloc.location = fpc->inst_offset + 3; + util_dynarray_append(&fpc->label_relocs, struct nvfx_relocation, reloc); +} + +static void +nv40_fp_brk(struct nvfx_fpc *fpc) +{ + uint32_t *hw; + fpc->inst_offset = fpc->fp->insn_len; + grow_insns(fpc, 4); + hw = &fpc->fp->insn[fpc->inst_offset]; + /* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */ + hw[0] = (NV40_FP_OP_BRA_OPCODE_BRK << NVFX_FP_OP_OPCODE_SHIFT) | + NV40_FP_OP_OUT_NONE; + /* Use .xxxx swizzle so that we check only src[0].x*/ + hw[1] = (NVFX_SWZ_IDENTITY << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (NVFX_FP_OP_COND_TR << NVFX_FP_OP_COND_SHIFT); + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH; + hw[3] = 0; +} + +static INLINE struct nvfx_src tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc) { - struct nvfx_sreg src = { 0 }; + struct nvfx_src src; switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nvfx_sr(NVFXSR_INPUT, - fpc->attrib_map[fsrc->Register.Index]); + if(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_POSITION) { + assert(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 0); + src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_POSITION); + } else if(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_COLOR) { + if(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 0) + src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL0); + else if(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 1) + src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL1); + else + assert(0); + } else if(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG) { + assert(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 0); + src.reg = nvfx_reg(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_FOGC); + } else if(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FACE) { + /* TODO: check this has the correct values */ + /* XXX: what do we do for nv30 here (assuming it lacks facing)?! */ + assert(fpc->pfp->info.input_semantic_index[fsrc->Register.Index] == 0); + src.reg = nvfx_reg(NVFXSR_INPUT, NV40_FP_OP_INPUT_SRC_FACING); + } else { + assert(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_GENERIC); + src.reg = nvfx_reg(NVFXSR_RELOCATED, fpc->generic_to_slot[fpc->pfp->info.input_semantic_index[fsrc->Register.Index]]); + } break; case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->Register.Index, NULL); + src.reg = nvfx_reg(NVFXSR_CONST, fsrc->Register.Index); break; case TGSI_FILE_IMMEDIATE: assert(fsrc->Register.Index < fpc->nr_imm); - src = fpc->imm[fsrc->Register.Index]; + src.reg = fpc->r_imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - src = fpc->r_temp[fsrc->Register.Index]; + src.reg = fpc->r_temp[fsrc->Register.Index]; break; /* NV40 fragprog result regs are just temps, so this is simple */ case TGSI_FILE_OUTPUT: - src = fpc->r_result[fsrc->Register.Index]; + src.reg = fpc->r_result[fsrc->Register.Index]; break; default: NOUVEAU_ERR("bad src file\n"); + src.reg.index = 0; + src.reg.type = 0; break; } @@ -268,10 +432,13 @@ tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc) src.swz[1] = fsrc->Register.SwizzleY; src.swz[2] = fsrc->Register.SwizzleZ; src.swz[3] = fsrc->Register.SwizzleW; + src.indirect = 0; + src.indirect_reg = 0; + src.indirect_swz = 0; return src; } -static INLINE struct nvfx_sreg +static INLINE struct nvfx_reg tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) { switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: @@ -279,10 +446,10 @@ tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) { case TGSI_FILE_TEMPORARY: return fpc->r_temp[fdst->Register.Index]; case TGSI_FILE_NULL: - return nvfx_sr(NVFXSR_NONE, 0); + return nvfx_reg(NVFXSR_NONE, 0); default: NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); - return nvfx_sr(NVFXSR_NONE, 0); + return nvfx_reg(NVFXSR_NONE, 0); } } @@ -302,8 +469,10 @@ static boolean nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, const struct tgsi_full_instruction *finst) { - const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - struct nvfx_sreg src[3], dst, tmp; + const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); + struct nvfx_insn insn; + struct nvfx_src src[3], tmp; + struct nvfx_reg dst; int mask, sat, unit = 0; int ai = -1, ci = -1, ii = -1; int i; @@ -327,13 +496,26 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->Register.Index) { + if(fpc->pfp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG && (0 + || fsrc->Register.SwizzleX == PIPE_SWIZZLE_ALPHA + || fsrc->Register.SwizzleY == PIPE_SWIZZLE_ALPHA + || fsrc->Register.SwizzleZ == PIPE_SWIZZLE_ALPHA + || fsrc->Register.SwizzleW == PIPE_SWIZZLE_ALPHA + )) { + /* hardware puts 0 in fogcoord.w, but GL/Gallium want 1 there */ + struct nvfx_src addend = nvfx_src(nvfx_fp_imm(fpc, 0, 0, 0, 1)); + addend.swz[0] = fsrc->Register.SwizzleX; + addend.swz[1] = fsrc->Register.SwizzleY; + addend.swz[2] = fsrc->Register.SwizzleZ; + addend.swz[3] = fsrc->Register.SwizzleW; + src[i] = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, ADD, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), addend, none)); + } else if (ai == -1 || ai == fsrc->Register.Index) { ai = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL, - tgsi_src(fpc, fsrc), none, none); + src[i] = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); } break; case TGSI_FILE_CONSTANT: @@ -342,9 +524,8 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, ci = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL, - tgsi_src(fpc, fsrc), none, none); + src[i] = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); } break; case TGSI_FILE_IMMEDIATE: @@ -353,9 +534,8 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, ii = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL, - tgsi_src(fpc, fsrc), none, none); + src[i] = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, MOV, src[i].reg, NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none)); } break; case TGSI_FILE_TEMPORARY: @@ -378,277 +558,377 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: - arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); + nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, abs(src[0]), none, none)); break; case TGSI_OPCODE_ADD: - arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_CMP: - tmp = nvfx_sr(NVFXSR_NONE, 0); - tmp.cc_update = 1; - arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); - dst.cc_test = NVFX_COND_GE; - arith(fpc, sat, MOV, dst, mask, src[2], none, none); - dst.cc_test = NVFX_COND_LT; - arith(fpc, sat, MOV, dst, mask, src[1], none, none); + insn = arith(0, MOV, none.reg, mask, src[0], none, none); + insn.cc_update = 1; + nvfx_fp_emit(fpc, insn); + + insn = arith(sat, MOV, dst, mask, src[2], none, none); + insn.cc_test = NVFX_COND_GE; + nvfx_fp_emit(fpc, insn); + + insn = arith(sat, MOV, dst, mask, src[1], none, none); + insn.cc_test = NVFX_COND_LT; + nvfx_fp_emit(fpc, insn); break; case TGSI_OPCODE_COS: - arith(fpc, sat, COS, dst, mask, src[0], none, none); + nvfx_fp_emit(fpc, arith(sat, COS, dst, mask, src[0], none, none)); break; case TGSI_OPCODE_DDX: if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { - tmp = temp(fpc); - arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, - swz(src[0], Z, W, Z, W), none, none); - arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, - swz(tmp, X, Y, X, Y), none, none); - arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], - none, none); - arith(fpc, 0, MOV, dst, mask, tmp, none, none); + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none)); + nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none)); + nvfx_fp_emit(fpc, arith(sat, DDX, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none)); + nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none)); } else { - arith(fpc, sat, DDX, dst, mask, src[0], none, none); + nvfx_fp_emit(fpc, arith(sat, DDX, dst, mask, src[0], none, none)); } break; case TGSI_OPCODE_DDY: if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { - tmp = temp(fpc); - arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, - swz(src[0], Z, W, Z, W), none, none); - arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, - swz(tmp, X, Y, X, Y), none, none); - arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], - none, none); - arith(fpc, 0, MOV, dst, mask, tmp, none, none); + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none)); + nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none)); + nvfx_fp_emit(fpc, arith(sat, DDY, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none)); + nvfx_fp_emit(fpc, arith(0, MOV, dst, mask, tmp, none, none)); } else { - arith(fpc, sat, DDY, dst, mask, src[0], none, none); + nvfx_fp_emit(fpc, arith(sat, DDY, dst, mask, src[0], none, none)); } break; + case TGSI_OPCODE_DP2: + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], src[1], none)); + nvfx_fp_emit(fpc, arith(0, ADD, dst, mask, swz(tmp, X, X, X, X), swz(tmp, Y, Y, Y, Y), none)); + break; case TGSI_OPCODE_DP3: - arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, DP3, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_DP4: - arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_DPH: - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[1], none); - arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), - swz(src[1], W, W, W, W), none); + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[1], none)); + nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none)); break; case TGSI_OPCODE_DST: - arith(fpc, sat, DST, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_EX2: - arith(fpc, sat, EX2, dst, mask, src[0], none, none); + nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, src[0], none, none)); break; case TGSI_OPCODE_FLR: - arith(fpc, sat, FLR, dst, mask, src[0], none, none); + nvfx_fp_emit(fpc, arith(sat, FLR, dst, mask, src[0], none, none)); break; case TGSI_OPCODE_FRC: - arith(fpc, sat, FRC, dst, mask, src[0], none, none); + nvfx_fp_emit(fpc, arith(sat, FRC, dst, mask, src[0], none, none)); break; case TGSI_OPCODE_KILP: - arith(fpc, 0, KIL, none, 0, none, none, none); + nvfx_fp_emit(fpc, arith(0, KIL, none.reg, 0, none, none, none)); break; case TGSI_OPCODE_KIL: - dst = nvfx_sr(NVFXSR_NONE, 0); - dst.cc_update = 1; - arith(fpc, 0, MOV, dst, NVFX_FP_MASK_ALL, src[0], none, none); - dst.cc_update = 0; dst.cc_test = NVFX_COND_LT; - arith(fpc, 0, KIL, dst, 0, none, none, none); + insn = arith(0, MOV, none.reg, NVFX_FP_MASK_ALL, src[0], none, none); + insn.cc_update = 1; + nvfx_fp_emit(fpc, insn); + + insn = arith(0, KIL, none.reg, 0, none, none, none); + insn.cc_test = NVFX_COND_LT; + nvfx_fp_emit(fpc, insn); break; case TGSI_OPCODE_LG2: - arith(fpc, sat, LG2, dst, mask, src[0], none, none); + nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_LIT: + if(!nvfx->is_nv4x) + nvfx_fp_emit(fpc, arith(sat, LIT_NV30, dst, mask, src[0], src[1], src[2])); + else { + /* we use FLT_MIN, so that log2 never gives -infinity, and thus multiplication by + * specular 0 always gives 0, so that ex2 gives 1, to satisfy the 0^0 = 1 requirement + * + * NOTE: if we start using half precision, we might need an fp16 FLT_MIN here instead + */ + struct nvfx_src maxs = nvfx_src(nvfx_fp_imm(fpc, 0, FLT_MIN, 0, 0)); + tmp = nvfx_src(temp(fpc)); + if (ci>= 0 || ii >= 0) { + nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, maxs, none, none)); + maxs = tmp; + } + nvfx_fp_emit(fpc, arith(0, MAX, tmp.reg, NVFX_FP_MASK_Y | NVFX_FP_MASK_W, swz(src[0], X, X, X, Y), swz(maxs, X, X, Y, Y), none)); + nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), none, none)); + nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), swz(src[0], W, W, W, W), none)); + nvfx_fp_emit(fpc, arith(sat, LITEX2_NV40, dst, mask, swz(tmp, Y, Y, W, W), none, none)); + } break; -// case TGSI_OPCODE_LIT: case TGSI_OPCODE_LRP: if(!nvfx->is_nv4x) - arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]); + nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2])); else { - tmp = temp(fpc); - arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); - arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2])); + nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], tmp)); } break; case TGSI_OPCODE_MAD: - arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); + nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, src[0], src[1], src[2])); break; case TGSI_OPCODE_MAX: - arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, MAX, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_MIN: - arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, MIN, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_MOV: - arith(fpc, sat, MOV, dst, mask, src[0], none, none); + nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, src[0], none, none)); break; case TGSI_OPCODE_MUL: - arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, MUL, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_NOP: break; case TGSI_OPCODE_POW: if(!nvfx->is_nv4x) - arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, POW_NV30, dst, mask, src[0], src[1], none)); else { - tmp = temp(fpc); - arith(fpc, 0, LG2, tmp, NVFX_FP_MASK_X, - swz(src[0], X, X, X, X), none, none); - arith(fpc, 0, MUL, tmp, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(fpc, sat, EX2, dst, mask, - swz(tmp, X, X, X, X), none, none); + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); + nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none)); + nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none)); } break; case TGSI_OPCODE_RCP: - arith(fpc, sat, RCP, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_RET: - assert(0); + nvfx_fp_emit(fpc, arith(sat, RCP, dst, mask, src[0], none, none)); break; case TGSI_OPCODE_RFL: if(!nvfx->is_nv4x) - arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(0, RFL_NV30, dst, mask, src[0], src[1], none)); else { - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[0], none); - arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_Y, src[0], src[1], none); - arith(fpc, 0, DIV, scale(tmp, 2X), NVFX_FP_MASK_Z, - swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); - arith(fpc, sat, MAD, dst, mask, - swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[0], none)); + nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_Y, src[0], src[1], none)); + insn = arith(0, DIV, tmp.reg, NVFX_FP_MASK_Z, swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); + insn.scale = NVFX_FP_OP_DST_SCALE_2X; + nvfx_fp_emit(fpc, insn); + nvfx_fp_emit(fpc, arith(sat, MAD, dst, mask, swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]))); } break; case TGSI_OPCODE_RSQ: if(!nvfx->is_nv4x) - arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); + nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none)); else { - tmp = temp(fpc); - arith(fpc, 0, LG2, scale(tmp, INV_2X), NVFX_FP_MASK_X, - abs(swz(src[0], X, X, X, X)), none, none); - arith(fpc, sat, EX2, dst, mask, - neg(swz(tmp, X, X, X, X)), none, none); + tmp = nvfx_src(temp(fpc)); + insn = arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none); + insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X; + nvfx_fp_emit(fpc, insn); + nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none)); } break; case TGSI_OPCODE_SCS: /* avoid overwriting the source */ if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X) { - if (mask & NVFX_FP_MASK_X) { - arith(fpc, sat, COS, dst, NVFX_FP_MASK_X, - swz(src[0], X, X, X, X), none, none); - } - if (mask & NVFX_FP_MASK_Y) { - arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y, - swz(src[0], X, X, X, X), none, none); - } + if (mask & NVFX_FP_MASK_X) + nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); + if (mask & NVFX_FP_MASK_Y) + nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none)); } else { - if (mask & NVFX_FP_MASK_Y) { - arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y, - swz(src[0], X, X, X, X), none, none); - } - if (mask & NVFX_FP_MASK_X) { - arith(fpc, sat, COS, dst, NVFX_FP_MASK_X, - swz(src[0], X, X, X, X), none, none); - } + if (mask & NVFX_FP_MASK_Y) + nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none)); + if (mask & NVFX_FP_MASK_X) + nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none)); } break; case TGSI_OPCODE_SEQ: - arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SFL: - arith(fpc, sat, SFL, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, SFL, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SGE: - arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SGT: - arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SIN: - arith(fpc, sat, SIN, dst, mask, src[0], none, none); + nvfx_fp_emit(fpc, arith(sat, SIN, dst, mask, src[0], none, none)); break; case TGSI_OPCODE_SLE: - arith(fpc, sat, SLE, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, SLE, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SLT: - arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, SLT, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SNE: - arith(fpc, sat, SNE, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, SNE, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_SSG: + { + struct nvfx_src minones = swz(nvfx_src(nvfx_fp_imm(fpc, -1, -1, -1, -1)), X, X, X, X); + + insn = arith(sat, MOV, dst, mask, src[0], none, none); + insn.cc_update = 1; + nvfx_fp_emit(fpc, insn); + + insn = arith(0, STR, dst, mask, none, none, none); + insn.cc_test = NVFX_COND_GT; + nvfx_fp_emit(fpc, insn); + + if(!sat) { + insn = arith(0, MOV, dst, mask, minones, none, none); + insn.cc_test = NVFX_COND_LT; + nvfx_fp_emit(fpc, insn); + } break; + } case TGSI_OPCODE_STR: - arith(fpc, sat, STR, dst, mask, src[0], src[1], none); + nvfx_fp_emit(fpc, arith(sat, STR, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SUB: - arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); + nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], neg(src[1]), none)); break; case TGSI_OPCODE_TEX: - tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); + nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); break; - case TGSI_OPCODE_TXB: - tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); + case TGSI_OPCODE_TRUNC: + tmp = nvfx_src(temp(fpc)); + insn = arith(0, MOV, none.reg, mask, src[0], none, none); + insn.cc_update = 1; + nvfx_fp_emit(fpc, insn); + + nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, abs(src[0]), none, none)); + nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, tmp, none, none)); + + insn = arith(sat, MOV, dst, mask, neg(tmp), none, none); + insn.cc_test = NVFX_COND_LT; + nvfx_fp_emit(fpc, insn); + break; + case TGSI_OPCODE_TXB: + nvfx_fp_emit(fpc, tex(sat, TXB, unit, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_TXL: + if(nvfx->is_nv4x) + nvfx_fp_emit(fpc, tex(sat, TXL_NV40, unit, dst, mask, src[0], none, none)); + else /* unsupported on nv30, use TEX and hope they like it */ + nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_TXP: + nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_XPD: + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none)); + nvfx_fp_emit(fpc, arith(sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp))); break; - case TGSI_OPCODE_TXP: - tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); + + case TGSI_OPCODE_IF: + // MOVRC0 R31 (TR0.xyzw), R<src>: + // IF (NE.xxxx) ELSE <else> END <end> + if(!nvfx->use_nv4x) + goto nv3x_cflow; + nv40_fp_if(fpc, src[0]); break; - case TGSI_OPCODE_XPD: - tmp = temp(fpc); - arith(fpc, 0, MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(fpc, sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); + + case TGSI_OPCODE_ELSE: + { + uint32_t *hw; + if(!nvfx->use_nv4x) + goto nv3x_cflow; + assert(util_dynarray_contains(&fpc->if_stack, unsigned)); + hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)]; + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len; break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; } - release_temps(fpc); - return TRUE; -} + case TGSI_OPCODE_ENDIF: + { + uint32_t *hw; + if(!nvfx->use_nv4x) + goto nv3x_cflow; + assert(util_dynarray_contains(&fpc->if_stack, unsigned)); + hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)]; + if(!hw[2]) + hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len; + hw[3] = fpc->fp->insn_len; + break; + } -static boolean -nvfx_fragprog_parse_decl_attrib(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; + case TGSI_OPCODE_BRA: + /* This can in limited cases be implemented with an IF with the else and endif labels pointing to the target */ + /* no state tracker uses this, so don't implement this for now */ + assert(0); + nv40_fp_bra(fpc, finst->Label.Label); + break; - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NVFX_FP_OP_INPUT_SRC_POSITION; + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + /* nothing to do here */ break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NVFX_FP_OP_INPUT_SRC_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NVFX_FP_OP_INPUT_SRC_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } + + case TGSI_OPCODE_CAL: + if(!nvfx->use_nv4x) + goto nv3x_cflow; + nv40_fp_cal(fpc, finst->Label.Label); break; - case TGSI_SEMANTIC_FOG: - hw = NVFX_FP_OP_INPUT_SRC_FOGC; + + case TGSI_OPCODE_RET: + if(!nvfx->use_nv4x) + goto nv3x_cflow; + nv40_fp_ret(fpc); break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic. - Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; + + case TGSI_OPCODE_BGNLOOP: + if(!nvfx->use_nv4x) + goto nv3x_cflow; + /* TODO: we should support using two nested REPs to allow a > 255 iteration count */ + nv40_fp_rep(fpc, 255, finst->Label.Label); + break; + + case TGSI_OPCODE_ENDLOOP: + break; + + case TGSI_OPCODE_BRK: + if(!nvfx->use_nv4x) + goto nv3x_cflow; + nv40_fp_brk(fpc); + break; + + case TGSI_OPCODE_CONT: + { + static int warned = 0; + if(!warned) { + NOUVEAU_ERR("Sorry, the continue keyword is not implemented: ignoring it.\n"); + warned = 1; } break; - default: - NOUVEAU_ERR("bad input semantic\n"); + } + + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); return FALSE; } - fpc->attrib_map[fdec->Range.First] = hw; +out: + release_temps(fpc); return TRUE; +nv3x_cflow: + { + static int warned = 0; + if(!warned) { + NOUVEAU_ERR( + "Sorry, control flow instructions are not supported in hardware on nv3x: ignoring them\n" + "If rendering is incorrect, try to disable GLSL support in the application.\n"); + warned = 1; + } + } + goto out; } static boolean @@ -670,7 +950,7 @@ nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, case 2: hw = 3; break; case 3: hw = 4; break; } - if(hw > ((nvfx->is_nv4x) ? 4 : 2)) { + if(hw > ((nvfx->use_nv4x) ? 4 : 2)) { NOUVEAU_ERR("bad rcol index\n"); return FALSE; } @@ -680,8 +960,8 @@ nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, return FALSE; } - fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); - fpc->r_temps |= (1 << hw); + fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw); + fpc->r_temps |= (1ULL << hw); return TRUE; } @@ -690,8 +970,20 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc) { struct tgsi_parse_context p; int high_temp = -1, i; + struct util_semantic_set set; + unsigned num_texcoords = nvfx->use_nv4x ? 10 : 8; - tgsi_parse_init(&p, fpc->fp->pipe.tokens); + fpc->fp->num_slots = util_semantic_set_from_program_file(&set, fpc->pfp->pipe.tokens, TGSI_FILE_INPUT); + if(fpc->fp->num_slots > num_texcoords) + return FALSE; + util_semantic_layout_from_set(fpc->fp->slot_to_generic, &set, 0, num_texcoords); + util_semantic_table_from_layout(fpc->generic_to_slot, fpc->fp->slot_to_generic, 0, num_texcoords); + + memset(fpc->fp->slot_to_fp_input, 0xff, sizeof(fpc->fp->slot_to_fp_input)); + + fpc->r_imm = CALLOC(fpc->pfp->info.immediate_count, sizeof(struct nvfx_reg)); + + tgsi_parse_init(&p, fpc->pfp->pipe.tokens); while (!tgsi_parse_end_of_tokens(&p)) { const union tgsi_full_token *tok = &p.FullToken; @@ -702,10 +994,6 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc) const struct tgsi_full_declaration *fdec; fdec = &p.FullToken.FullDeclaration; switch (fdec->Declaration.File) { - case TGSI_FILE_INPUT: - if (!nvfx_fragprog_parse_decl_attrib(nvfx, fpc, fdec)) - goto out_err; - break; case TGSI_FILE_OUTPUT: if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec)) goto out_err; @@ -724,19 +1012,14 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc) case TGSI_TOKEN_TYPE_IMMEDIATE: { struct tgsi_full_immediate *imm; - float vals[4]; imm = &p.FullToken.FullImmediate; assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(fpc->nr_imm < MAX_IMM); + assert(fpc->nr_imm < fpc->pfp->info.immediate_count); - vals[0] = imm->u[0].Float; - vals[1] = imm->u[1].Float; - vals[2] = imm->u[2].Float; - vals[3] = imm->u[3].Float; - fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); - } + fpc->r_imm[fpc->nr_imm++] = nvfx_fp_imm(fpc, imm->u[0].Float, imm->u[1].Float, imm->u[2].Float, imm->u[3].Float); break; + } default: break; } @@ -744,40 +1027,75 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc) tgsi_parse_free(&p); if (++high_temp) { - fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); + fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg)); for (i = 0; i < high_temp; i++) fpc->r_temp[i] = temp(fpc); - fpc->r_temps_discard = 0; + fpc->r_temps_discard = 0ULL; } return TRUE; out_err: - if (fpc->r_temp) + if (fpc->r_temp) { FREE(fpc->r_temp); + fpc->r_temp = NULL; + } tgsi_parse_free(&p); return FALSE; } -static void +DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", FALSE) + +static struct nvfx_fragment_program* nvfx_fragprog_translate(struct nvfx_context *nvfx, - struct nvfx_fragment_program *fp) + struct nvfx_pipe_fragment_program *pfp, + boolean emulate_sprite_flipping) { struct tgsi_parse_context parse; struct nvfx_fpc *fpc = NULL; + struct util_dynarray insns; + struct nvfx_fragment_program* fp = NULL; + const int min_size = 4096; + + fp = CALLOC_STRUCT(nvfx_fragment_program); + if(!fp) + goto out_err; - fpc = CALLOC(1, sizeof(struct nvfx_fpc)); + fpc = CALLOC_STRUCT(nvfx_fpc); if (!fpc) - return; + goto out_err; + + fpc->max_temps = nvfx->use_nv4x ? 48 : 32; + fpc->pfp = pfp; fpc->fp = fp; fpc->num_regs = 2; - if (!nvfx_fragprog_prepare(nvfx, fpc)) { - FREE(fpc); - return; + for (unsigned i = 0; i < pfp->info.num_properties; ++i) { + if (pfp->info.properties[i].name == TGSI_PROPERTY_FS_COORD_ORIGIN) { + if(pfp->info.properties[i].data[0]) + fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED; + } else if (pfp->info.properties[i].name == TGSI_PROPERTY_FS_COORD_PIXEL_CENTER) { + if(pfp->info.properties[i].data[0]) + fp->coord_conventions |= NV30_3D_COORD_CONVENTIONS_CENTER_INTEGER; + } } - tgsi_parse_init(&parse, fp->pipe.tokens); + if (!nvfx_fragprog_prepare(nvfx, fpc)) + goto out_err; + + tgsi_parse_init(&parse, pfp->pipe.tokens); + util_dynarray_init(&insns); + + if(emulate_sprite_flipping) + { + struct nvfx_reg reg = temp(fpc); + struct nvfx_src sprite_input = nvfx_src(nvfx_reg(NVFXSR_RELOCATED, fp->num_slots)); + struct nvfx_src imm = nvfx_src(nvfx_fp_imm(fpc, 1, -1, 0, 0)); + + fpc->sprite_coord_temp = reg.index; + fpc->r_temps_discard = 0ULL; + nvfx_fp_emit(fpc, arith(0, MAD, reg, NVFX_FP_MASK_ALL, sprite_input, swz(imm, X, Y, X, X), swz(imm, Z, X, Z, Z))); + } while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); @@ -787,6 +1105,7 @@ nvfx_fragprog_translate(struct nvfx_context *nvfx, { const struct tgsi_full_instruction *finst; + util_dynarray_append(&insns, unsigned, fp->insn_len); finst = &parse.FullToken.FullInstruction; if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst)) goto out_err; @@ -796,17 +1115,25 @@ nvfx_fragprog_translate(struct nvfx_context *nvfx, break; } } + util_dynarray_append(&insns, unsigned, fp->insn_len); + + for(unsigned i = 0; i < fpc->label_relocs.size; i += sizeof(struct nvfx_relocation)) + { + struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)fpc->label_relocs.data + i); + fp->insn[label_reloc->location] |= ((unsigned*)insns.data)[label_reloc->target]; + } + util_dynarray_fini(&insns); if(!nvfx->is_nv4x) fp->fp_control |= (fpc->num_regs-1)/2; else - fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; + fp->fp_control |= fpc->num_regs << NV40_3D_FP_CONTROL_TEMP_COUNT__SHIFT; /* Terminate final instruction */ if(fp->insn) - fp->insn[fpc->inst_offset] |= 0x00000001; + fp->insn[fpc->inst_offset] |= 0x00000001; - /* Append NOP + END instruction, may or may not be necessary. */ + /* Append NOP + END instruction for branches to the end of the program */ fpc->inst_offset = fp->insn_len; grow_insns(fpc, 4); fp->insn[fpc->inst_offset + 0] = 0x00000001; @@ -814,12 +1141,49 @@ nvfx_fragprog_translate(struct nvfx_context *nvfx, fp->insn[fpc->inst_offset + 2] = 0x00000000; fp->insn[fpc->inst_offset + 3] = 0x00000000; - fp->translated = TRUE; -out_err: + if(debug_get_option_nvfx_dump_fp()) + { + debug_printf("\n"); + tgsi_dump(pfp->pipe.tokens, 0); + + debug_printf("\n%s fragment program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x"); + for (unsigned i = 0; i < fp->insn_len; i += 4) + debug_printf("%3u: %08x %08x %08x %08x\n", i >> 2, fp->insn[i], fp->insn[i + 1], fp->insn[i + 2], fp->insn[i + 3]); + debug_printf("\n"); + } + + fp->prog_size = (fp->insn_len * 4 + 63) & ~63; + + if(fp->prog_size >= min_size) + fp->progs_per_bo = 1; + else + fp->progs_per_bo = min_size / fp->prog_size; + fp->bo_prog_idx = fp->progs_per_bo - 1; + +out: tgsi_parse_free(&parse); - if (fpc->r_temp) - FREE(fpc->r_temp); - FREE(fpc); + if(fpc) + { + if (fpc->r_temp) + FREE(fpc->r_temp); + util_dynarray_fini(&fpc->if_stack); + util_dynarray_fini(&fpc->label_relocs); + util_dynarray_fini(&fpc->imm_data); + //util_dynarray_fini(&fpc->loop_stack); + FREE(fpc); + } + return fp; + +out_err: + _debug_printf("Error: failed to compile this fragment program:\n"); + tgsi_dump(pfp->pipe.tokens, 0); + + if(fp) + { + FREE(fp); + fp = NULL; + } + goto out; } static inline void @@ -836,54 +1200,188 @@ nvfx_fp_memcpy(void* dst, const void* src, size_t len) #endif } +/* The hardware only supports immediate constants inside the fragment program, + * and at least on nv30 doesn't support an indirect linkage table. + * + * Hence, we need to patch the fragment program itself both to update constants + * and update linkage. + * + * Using a single fragment program would entail unacceptable stalls if the GPU is + * already rendering with that fragment program. + * Thus, we instead use a "rotating queue" of buffer objects, each of which is + * packed with multiple versions of the same program. + * + * Whenever we need to patch something, we move to the next program and + * patch it. If all buffer objects are in use by the GPU, we allocate another one, + * expanding the queue. + * + * As an additional optimization, we record when all the programs have the + * current input slot configuration, and at that point we stop patching inputs. + * This happens, for instance, if a given fragment program is always used with + * the same vertex program (i.e. always with GLSL), or if the layouts match + * enough (non-GLSL). + * + * Note that instead of using multiple programs, we could push commands + * on the FIFO to patch a single program: it's not fully clear which option is + * faster, but my guess is that the current way is faster. + * + * We also track the previous slot assignments for each version and don't + * patch if they are the same (this could perhaps be removed). + */ + void nvfx_fragprog_validate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; - struct nvfx_fragment_program *fp = nvfx->fragprog; - int update = 0; - int i; + struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog; + struct nvfx_vertex_program* vp; + + // TODO: the multiplication by point_quad_rasterization is probably superfluous + unsigned sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * nvfx->rasterizer->pipe.sprite_coord_enable; + + boolean emulate_sprite_flipping = sprite_coord_enable && nvfx->rasterizer->pipe.sprite_coord_mode; + unsigned key = emulate_sprite_flipping; + struct nvfx_fragment_program* fp; - if (!fp->translated) + fp = pfp->fps[key]; + if (!fp) { - const int min_size = 4096; + fp = nvfx_fragprog_translate(nvfx, pfp, emulate_sprite_flipping); - nvfx_fragprog_translate(nvfx, fp); - if (!fp->translated) { - static unsigned dummy[8] = {1, 0, 0, 0, 1, 0, 0, 0}; - static int warned = 0; - if(!warned) + if(!fp) + { + if(!nvfx->dummy_fs) { - fprintf(stderr, "nvfx: failed to translate fragment program!\n"); - warned = 1; + struct ureg_program *ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); + if (ureg) + { + ureg_END( ureg ); + nvfx->dummy_fs = ureg_create_shader_and_destroy( ureg, &nvfx->pipe ); + } + + if(!nvfx->dummy_fs) + { + _debug_printf("Error: unable to create a dummy fragment shader: aborting."); + abort(); + } } - /* use dummy program: we cannot fail here */ - fp->translated = TRUE; - fp->insn = malloc(sizeof(dummy)); - memcpy(fp->insn, dummy, sizeof(dummy)); - fp->insn_len = sizeof(dummy) / sizeof(dummy[0]); + fp = nvfx_fragprog_translate(nvfx, nvfx->dummy_fs, FALSE); + emulate_sprite_flipping = FALSE; + + if(!fp) + { + _debug_printf("Error: unable to compile even a dummy fragment shader: aborting."); + abort(); + } } - update = TRUE; - fp->prog_size = (fp->insn_len * 4 + 63) & ~63; + pfp->fps[key] = fp; + } + + vp = nvfx->hw_vertprog; + + if (fp->last_vp_id != vp->id || fp->last_sprite_coord_enable != sprite_coord_enable) { + int sprite_real_input = -1; + int sprite_reloc_input; + unsigned i; + fp->last_vp_id = vp->id; + fp->last_sprite_coord_enable = sprite_coord_enable; + + if(sprite_coord_enable) + { + sprite_real_input = vp->sprite_fp_input; + if(sprite_real_input < 0) + { + unsigned used_texcoords = 0; + for(unsigned i = 0; i < fp->num_slots; ++i) { + unsigned generic = fp->slot_to_generic[i]; + if((generic < 32) && !((1 << generic) & sprite_coord_enable)) + { + unsigned char slot_mask = vp->generic_to_fp_input[generic]; + if(slot_mask >= 0xf0) + used_texcoords |= 1 << ((slot_mask & 0xf) - NVFX_FP_OP_INPUT_SRC_TC0); + } + } + + sprite_real_input = NVFX_FP_OP_INPUT_SRC_TC(__builtin_ctz(~used_texcoords)); + } + + fp->point_sprite_control |= (1 << (sprite_real_input - NVFX_FP_OP_INPUT_SRC_TC0 + 8)); + } + else + fp->point_sprite_control = 0; - if(fp->prog_size >= min_size) - fp->progs_per_bo = 1; + if(emulate_sprite_flipping) + sprite_reloc_input = 0; else - fp->progs_per_bo = min_size / fp->prog_size; - fp->bo_prog_idx = fp->progs_per_bo - 1; - } + sprite_reloc_input = sprite_real_input; + + for(i = 0; i < fp->num_slots; ++i) { + unsigned generic = fp->slot_to_generic[i]; + if((generic < 32) && ((1 << generic) & sprite_coord_enable)) + { + if(fp->slot_to_fp_input[i] != sprite_reloc_input) + goto update_slots; + } + else + { + unsigned char slot_mask = vp->generic_to_fp_input[generic]; + if((slot_mask >> 4) & (slot_mask ^ fp->slot_to_fp_input[i])) + goto update_slots; + } + } - /* we must update constants even on "just" fragprog changes, because - we don't check whether the current constant buffer matches the latest - one bound to this fragment program */ - if (nvfx->dirty & (NVFX_NEW_FRAGCONST | NVFX_NEW_FRAGPROG)) - update = TRUE; + if(emulate_sprite_flipping) + { + if(fp->slot_to_fp_input[fp->num_slots] != sprite_real_input) + goto update_slots; + } - if(update) { + if(0) + { +update_slots: + /* optimization: we start updating from the slot we found the first difference in */ + for(; i < fp->num_slots; ++i) + { + unsigned generic = fp->slot_to_generic[i]; + if((generic < 32) && ((1 << generic) & sprite_coord_enable)) + fp->slot_to_fp_input[i] = sprite_reloc_input; + else + fp->slot_to_fp_input[i] = vp->generic_to_fp_input[generic] & 0xf; + } + + fp->slot_to_fp_input[fp->num_slots] = sprite_real_input; + + if(nvfx->is_nv4x) + { + fp->or = 0; + for(i = 0; i <= fp->num_slots; ++i) { + unsigned fp_input = fp->slot_to_fp_input[i]; + if(fp_input == NVFX_FP_OP_INPUT_SRC_TC(8)) + fp->or |= (1 << 12); + else if(fp_input == NVFX_FP_OP_INPUT_SRC_TC(9)) + fp->or |= (1 << 13); + else if(fp_input >= NVFX_FP_OP_INPUT_SRC_TC(0) && fp_input <= NVFX_FP_OP_INPUT_SRC_TC(7)) + fp->or |= (1 << (fp_input - NVFX_FP_OP_INPUT_SRC_TC0 + 14)); + } + } + + fp->progs_left_with_obsolete_slot_assignments = fp->progs; + goto update; + } + } + + /* We must update constants even on "just" fragprog changes, because + * we don't check whether the current constant buffer matches the latest + * one bound to this fragment program. + * Doing such a check would likely be a pessimization. + */ + if ((nvfx->hw_fragprog != fp) || (nvfx->dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST))) { int offset; + uint32_t* fpmap; +update: ++fp->bo_prog_idx; if(fp->bo_prog_idx >= fp->progs_per_bo) { @@ -893,9 +1391,12 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) } else { - struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + fp->prog_size * fp->progs_per_bo, 16); - char *map, *buf; + struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + (fp->prog_size + 8) * fp->progs_per_bo, 16); + uint8_t* map; + uint8_t* buf; + fpbo->slots = (unsigned char*)&fpbo->insn[(fp->prog_size) * fp->progs_per_bo]; + memset(fpbo->slots, 0, 8 * fp->progs_per_bo); if(fp->fpbo) { fpbo->next = fp->fpbo->next; @@ -905,12 +1406,14 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) fpbo->next = fpbo; fp->fpbo = fpbo; fpbo->bo = 0; + fp->progs += fp->progs_per_bo; + fp->progs_left_with_obsolete_slot_assignments += fp->progs_per_bo; nouveau_bo_new(nvfx->screen->base.device, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 64, fp->prog_size * fp->progs_per_bo, &fpbo->bo); nouveau_bo_map(fpbo->bo, NOUVEAU_BO_NOSYNC); map = fpbo->bo->map; - buf = fpbo->insn; - for(int i = 0; i < fp->progs_per_bo; ++i) + buf = (uint8_t*)fpbo->insn; + for(unsigned i = 0; i < fp->progs_per_bo; ++i) { memcpy(buf, fp->insn, fp->insn_len * 4); nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4); @@ -922,15 +1425,14 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) } offset = fp->bo_prog_idx * fp->prog_size; + fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset); if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) { struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT]; - // TODO: avoid using transfers, just directly the buffer - struct pipe_transfer* transfer; - // TODO: does this check make any sense, or should we do this unconditionally? - uint32_t* map = pipe_buffer_map(&nvfx->pipe, constbuf, PIPE_TRANSFER_READ, &transfer); + uint32_t* map = (uint32_t*)nvfx_buffer(constbuf)->data; uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset); uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset); + int i; for (i = 0; i < fp->nr_consts; ++i) { unsigned off = fp->consts[i].offset; unsigned idx = fp->consts[i].index * 4; @@ -941,49 +1443,113 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) nvfx_fp_memcpy(&fpmap[off], &map[idx], 4 * sizeof(uint32_t)); } } - pipe_buffer_unmap(&nvfx->pipe, constbuf, transfer); } - } - if(update || (nvfx->dirty & NVFX_NEW_FRAGPROG)) { - int offset = fp->bo_prog_idx * fp->prog_size; + /* we only do this if we aren't sure that all program versions have the + * current slot assignments, otherwise we just update constants for speed + */ + if(fp->progs_left_with_obsolete_slot_assignments) { + unsigned char* fpbo_slots = &fp->fpbo->slots[fp->bo_prog_idx * 8]; + /* also relocate sprite coord slot, if any */ + for(unsigned i = 0; i <= fp->num_slots; ++i) { + unsigned value = fp->slot_to_fp_input[i];; + if(value != fpbo_slots[i]) { + unsigned* p; + unsigned* begin = (unsigned*)fp->slot_relocations[i].data; + unsigned* end = (unsigned*)((char*)fp->slot_relocations[i].data + fp->slot_relocations[i].size); + //printf("fp %p reloc slot %u/%u: %u -> %u\n", fp, i, fp->num_slots, fpbo_slots[i], value); + if(value == 0) + { + /* was relocated to an input, switch type to temporary */ + for(p = begin; p != end; ++p) { + unsigned off = *p; + unsigned dw = fp->insn[off]; + dw &=~ NVFX_FP_REG_TYPE_MASK; + //printf("reloc_tmp at %x\n", off); + nvfx_fp_memcpy(&fpmap[off], &dw, sizeof(dw)); + } + } else { + if(!fpbo_slots[i]) + { + /* was relocated to a temporary, switch type to input */ + for(p= begin; p != end; ++p) { + unsigned off = *p; + unsigned dw = fp->insn[off]; + //printf("reloc_in at %x\n", off); + dw |= NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT; + nvfx_fp_memcpy(&fpmap[off], &dw, sizeof(dw)); + } + } + + /* set the correct input index */ + for(p = begin; p != end; ++p) { + unsigned off = *p & ~3; + unsigned dw = fp->insn[off]; + //printf("reloc&~3 at %x\n", off); + dw = (dw & ~NVFX_FP_OP_INPUT_SRC_MASK) | (value << NVFX_FP_OP_INPUT_SRC_SHIFT); + nvfx_fp_memcpy(&fpmap[off], &dw, sizeof(dw)); + } + } + fpbo_slots[i] = value; + } + } + --fp->progs_left_with_obsolete_slot_assignments; + } + + nvfx->hw_fragprog = fp; + MARK_RING(chan, 8, 1); - OUT_RING(chan, RING_3D(NV34TCL_FP_ACTIVE_PROGRAM, 1)); + OUT_RING(chan, RING_3D(NV30_3D_FP_ACTIVE_PROGRAM, 1)); OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, - NV34TCL_FP_ACTIVE_PROGRAM_DMA1); - OUT_RING(chan, RING_3D(NV34TCL_FP_CONTROL, 1)); + NOUVEAU_BO_OR, NV30_3D_FP_ACTIVE_PROGRAM_DMA0, + NV30_3D_FP_ACTIVE_PROGRAM_DMA1); + OUT_RING(chan, RING_3D(NV30_3D_FP_CONTROL, 1)); OUT_RING(chan, fp->fp_control); if(!nvfx->is_nv4x) { - OUT_RING(chan, RING_3D(NV34TCL_FP_REG_CONTROL, 1)); + OUT_RING(chan, RING_3D(NV30_3D_FP_REG_CONTROL, 1)); OUT_RING(chan, (1<<16)|0x4); - OUT_RING(chan, RING_3D(NV34TCL_TX_UNITS_ENABLE, 1)); + OUT_RING(chan, RING_3D(NV30_3D_TEX_UNITS_ENABLE, 1)); OUT_RING(chan, fp->samplers); } } + + { + unsigned pointsprite_control = fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization; + if(pointsprite_control != nvfx->hw_pointsprite_control) + { + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV30_3D_POINT_SPRITE, 1)); + OUT_RING(chan, pointsprite_control); + nvfx->hw_pointsprite_control = pointsprite_control; + } + } + + nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGPROG; } void nvfx_fragprog_relocate(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; - struct nvfx_fragment_program *fp = nvfx->fragprog; + struct nvfx_fragment_program *fp = nvfx->hw_fragprog; struct nouveau_bo* bo = fp->fpbo->bo; int offset = fp->bo_prog_idx * fp->prog_size; unsigned fp_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; // TODO: GART? fp_flags |= NOUVEAU_BO_DUMMY; MARK_RING(chan, 2, 2); - OUT_RELOC(chan, bo, RING_3D(NV34TCL_FP_ACTIVE_PROGRAM, 1), fp_flags, 0, 0); + OUT_RELOC(chan, bo, RING_3D(NV30_3D_FP_ACTIVE_PROGRAM, 1), fp_flags, 0, 0); OUT_RELOC(chan, bo, offset, fp_flags | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, - NV34TCL_FP_ACTIVE_PROGRAM_DMA1); + NOUVEAU_BO_OR, NV30_3D_FP_ACTIVE_PROGRAM_DMA0, + NV30_3D_FP_ACTIVE_PROGRAM_DMA1); + nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGPROG; } void nvfx_fragprog_destroy(struct nvfx_context *nvfx, struct nvfx_fragment_program *fp) { + unsigned i; struct nvfx_fragment_program_bo* fpbo = fp->fpbo; if(fpbo) { @@ -992,13 +1558,66 @@ nvfx_fragprog_destroy(struct nvfx_context *nvfx, struct nvfx_fragment_program_bo* next = fpbo->next; nouveau_bo_unmap(fpbo->bo); nouveau_bo_ref(0, &fpbo->bo); - free(fpbo); + os_free_aligned(fpbo); fpbo = next; } while(fpbo != fp->fpbo); } + for(i = 0; i < Elements(fp->slot_relocations); ++i) + util_dynarray_fini(&fp->slot_relocations[i]); + if (fp->insn_len) FREE(fp->insn); } +static void * +nvfx_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nvfx_pipe_fragment_program *pfp; + + pfp = CALLOC(1, sizeof(struct nvfx_pipe_fragment_program)); + pfp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(pfp->pipe.tokens, &pfp->info); + + return (void *)pfp; +} + +static void +nvfx_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->fragprog = hwcso; + nvfx->dirty |= NVFX_NEW_FRAGPROG; +} + +static void +nvfx_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_pipe_fragment_program *pfp = hwcso; + unsigned i; + + for(i = 0; i < Elements(pfp->fps); ++i) + { + if(pfp->fps[i]) + { + nvfx_fragprog_destroy(nvfx, pfp->fps[i]); + FREE(pfp->fps[i]); + } + } + + FREE((void*)pfp->pipe.tokens); + FREE(pfp); +} + +void +nvfx_init_fragprog_functions(struct nvfx_context *nvfx) +{ + nvfx->pipe.create_fs_state = nvfx_fp_state_create; + nvfx->pipe.bind_fs_state = nvfx_fp_state_bind; + nvfx->pipe.delete_fs_state = nvfx_fp_state_delete; +} diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c index 0b4a434fecc..1d6b4e24cbc 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragtex.c +++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c @@ -1,5 +1,177 @@ #include "nvfx_context.h" #include "nvfx_resource.h" +#include "nvfx_tex.h" + +static void * +nvfx_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_sampler_state *ps; + + ps = MALLOC(sizeof(struct nvfx_sampler_state)); + + /* on nv30, we use this as an internal flag */ + ps->fmt = cso->normalized_coords ? 0 : NV40_3D_TEX_FORMAT_RECT; + ps->en = 0; + ps->filt = nvfx_tex_filter(cso) | 0x2000; /*voodoo*/ + ps->wrap = (nvfx_tex_wrap_mode(cso->wrap_s) << NV30_3D_TEX_WRAP_S__SHIFT) | + (nvfx_tex_wrap_mode(cso->wrap_t) << NV30_3D_TEX_WRAP_T__SHIFT) | + (nvfx_tex_wrap_mode(cso->wrap_r) << NV30_3D_TEX_WRAP_R__SHIFT); + ps->compare = FALSE; + + if(cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) + { + ps->wrap |= nvfx_tex_wrap_compare_mode(cso->compare_func); + ps->compare = TRUE; + } + ps->bcol = nvfx_tex_border_color(cso->border_color); + + if(nvfx->is_nv4x) + nv40_sampler_state_init(pipe, ps, cso); + else + nv30_sampler_state_init(pipe, ps, cso); + + return (void *)ps; +} + +static void +nvfx_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvfx_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nvfx->tex_sampler[unit] = sampler[unit]; + nvfx->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nvfx->nr_samplers; unit++) { + nvfx->tex_sampler[unit] = NULL; + nvfx->dirty_samplers |= (1 << unit); + } + + nvfx->nr_samplers = nr; + nvfx->dirty |= NVFX_NEW_SAMPLER; +} + +static struct pipe_sampler_view * +nvfx_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_sampler_view *templ) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_sampler_view *sv = CALLOC_STRUCT(nvfx_sampler_view); + struct nvfx_texture_format *tf = &nvfx_texture_formats[templ->format]; + unsigned txf; + + if (!sv) + return NULL; + + sv->base = *templ; + sv->base.reference.count = 1; + sv->base.texture = NULL; + pipe_resource_reference(&sv->base.texture, pt); + sv->base.context = pipe; + + txf = NV30_3D_TEX_FORMAT_NO_BORDER; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV30_3D_TEX_FORMAT_CUBIC; + /* fall-through */ + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + txf |= NV30_3D_TEX_FORMAT_DIMS_2D; + break; + case PIPE_TEXTURE_3D: + txf |= NV30_3D_TEX_FORMAT_DIMS_3D; + break; + case PIPE_TEXTURE_1D: + txf |= NV30_3D_TEX_FORMAT_DIMS_1D; + break; + default: + assert(0); + } + sv->u.init_fmt = txf; + + sv->swizzle = 0 + | (tf->src[sv->base.swizzle_r] << NV30_3D_TEX_SWIZZLE_S0_Z__SHIFT) + | (tf->src[sv->base.swizzle_g] << NV30_3D_TEX_SWIZZLE_S0_Y__SHIFT) + | (tf->src[sv->base.swizzle_b] << NV30_3D_TEX_SWIZZLE_S0_X__SHIFT) + | (tf->src[sv->base.swizzle_a] << NV30_3D_TEX_SWIZZLE_S0_W__SHIFT) + | (tf->comp[sv->base.swizzle_r] << NV30_3D_TEX_SWIZZLE_S1_Z__SHIFT) + | (tf->comp[sv->base.swizzle_g] << NV30_3D_TEX_SWIZZLE_S1_Y__SHIFT) + | (tf->comp[sv->base.swizzle_b] << NV30_3D_TEX_SWIZZLE_S1_X__SHIFT) + | (tf->comp[sv->base.swizzle_a] << NV30_3D_TEX_SWIZZLE_S1_W__SHIFT); + + sv->filt = tf->sign; + sv->wrap = tf->wrap; + sv->wrap_mask = ~0; + + if (pt->target == PIPE_TEXTURE_CUBE) + { + sv->offset = 0; + sv->npot_size = (pt->width0 << NV30_3D_TEX_NPOT_SIZE_W__SHIFT) | pt->height0; + } + else + { + sv->offset = nvfx_subresource_offset(pt, 0, sv->base.first_level, 0); + sv->npot_size = (u_minify(pt->width0, sv->base.first_level) << NV30_3D_TEX_NPOT_SIZE_W__SHIFT) | u_minify(pt->height0, sv->base.first_level); + + /* apparently, we need to ignore the t coordinate for 1D textures to fix piglit tex1d-2dborder */ + if(pt->target == PIPE_TEXTURE_1D) + { + sv->wrap_mask &=~ NV30_3D_TEX_WRAP_T__MASK; + sv->wrap |= NV30_3D_TEX_WRAP_T_REPEAT; + } + } + + if(nvfx->is_nv4x) + nv40_sampler_view_init(pipe, sv); + else + nv30_sampler_view_init(pipe, sv); + + return &sv->base; +} + +static void +nvfx_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + FREE(view); +} + +static void +nvfx_set_fragment_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit], + views[unit]); + nvfx->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nvfx->nr_textures; unit++) { + pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit], + NULL); + nvfx->dirty_samplers |= (1 << unit); + } + + nvfx->nr_textures = nr; + nvfx->dirty |= NVFX_NEW_SAMPLER; +} void nvfx_fragtex_validate(struct nvfx_context *nvfx) @@ -16,6 +188,10 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx) samplers &= ~(1 << unit); if(nvfx->fragment_sampler_views[unit] && nvfx->tex_sampler[unit]) { + util_dirty_surfaces_use_for_sampling(&nvfx->pipe, + &((struct nvfx_miptree*)nvfx->fragment_sampler_views[unit]->texture)->dirty_surfaces, + nvfx_surface_flush); + if(!nvfx->is_nv4x) nv30_fragtex_set(nvfx, unit); else @@ -23,12 +199,13 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx) } else { WAIT_RING(chan, 2); /* this is OK for nv40 too */ - OUT_RING(chan, RING_3D(NV34TCL_TX_ENABLE(unit), 1)); + OUT_RING(chan, RING_3D(NV30_3D_TEX_ENABLE(unit), 1)); OUT_RING(chan, 0); nvfx->hw_samplers &= ~(1 << unit); } } nvfx->dirty_samplers = 0; + nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGTEX; } void @@ -50,9 +227,133 @@ nvfx_fragtex_relocate(struct nvfx_context *nvfx) bo = mt->base.bo; MARK_RING(chan, 3, 3); - OUT_RELOC(chan, bo, RING_3D(NV34TCL_TX_OFFSET(unit), 2), tex_flags | NOUVEAU_BO_DUMMY, 0, 0); + OUT_RELOC(chan, bo, RING_3D(NV30_3D_TEX_OFFSET(unit), 2), tex_flags | NOUVEAU_BO_DUMMY, 0, 0); OUT_RELOC(chan, bo, 0, tex_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_DUMMY, 0, 0); OUT_RELOC(chan, bo, nvfx->hw_txf[unit], tex_flags | NOUVEAU_BO_OR | NOUVEAU_BO_DUMMY, - NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); + NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1); } + nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGTEX; +} + +void +nvfx_init_sampling_functions(struct nvfx_context *nvfx) +{ + nvfx->pipe.create_sampler_state = nvfx_sampler_state_create; + nvfx->pipe.bind_fragment_sampler_states = nvfx_sampler_state_bind; + nvfx->pipe.delete_sampler_state = nvfx_sampler_state_delete; + nvfx->pipe.set_fragment_sampler_views = nvfx_set_fragment_sampler_views; + nvfx->pipe.create_sampler_view = nvfx_create_sampler_view; + nvfx->pipe.sampler_view_destroy = nvfx_sampler_view_destroy; +} + +#define NV30_3D_TEX_FORMAT_FORMAT_DXT1_RECT NV30_3D_TEX_FORMAT_FORMAT_DXT1 +#define NV30_3D_TEX_FORMAT_FORMAT_DXT3_RECT NV30_3D_TEX_FORMAT_FORMAT_DXT3 +#define NV30_3D_TEX_FORMAT_FORMAT_DXT5_RECT NV30_3D_TEX_FORMAT_FORMAT_DXT5 + +#define NV40_3D_TEX_FORMAT_FORMAT_HILO16 NV40_3D_TEX_FORMAT_FORMAT_A16L16 + +#define NV30_3D_TEX_FORMAT_FORMAT_RGBA16F 0x00004a00 +#define NV30_3D_TEX_FORMAT_FORMAT_RGBA16F_RECT NV30_3D_TEX_FORMAT_FORMAT_RGBA16F +#define NV30_3D_TEX_FORMAT_FORMAT_RGBA32F 0x00004b00 +#define NV30_3D_TEX_FORMAT_FORMAT_RGBA32F_RECT NV30_3D_TEX_FORMAT_FORMAT_RGBA32F +#define NV30_3D_TEX_FORMAT_FORMAT_R32F 0x00004c00 +#define NV30_3D_TEX_FORMAT_FORMAT_R32F_RECT NV30_3D_TEX_FORMAT_FORMAT_R32F + +// TODO: guess! +#define NV40_3D_TEX_FORMAT_FORMAT_R32F 0x00001c00 + +#define SRGB 0x00700000 + +#define __(m,tf,tfc,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sign,wrap) \ +[PIPE_FORMAT_##m] = { \ + {NV30_3D_TEX_FORMAT_FORMAT_##tf, \ + NV30_3D_TEX_FORMAT_FORMAT_##tfc, \ + NV30_3D_TEX_FORMAT_FORMAT_##tf##_RECT, \ + NV30_3D_TEX_FORMAT_FORMAT_##tfc##_RECT, \ + NV40_3D_TEX_FORMAT_FORMAT_##tf, \ + NV40_3D_TEX_FORMAT_FORMAT_##tfc}, \ + sign, wrap, \ + {ts0z, ts0y, ts0x, ts0w, 0, 1}, {ts1z, ts1y, ts1x, ts1w, 0, 0} \ } + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sign, wrap) \ + __(m,tf,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sign, wrap) + +/* Depth formats works by reading the depth value most significant 8/16 bits. + * We are losing precision, but nVidia loses even more by using A8R8G8B8 instead of HILO16 + * There is no 32-bit integer texture support, so other things are infeasible. + * + * TODO: is it possible to read 16 bits for Z16? A16 doesn't seem to work, either due to normalization or endianness issues + */ + +#define T 2 + +#define X 3 +#define Y 2 +#define Z 1 +#define W 0 + +#define SNORM ((NV30_3D_TEX_FILTER_SIGNED_RED) | (NV30_3D_TEX_FILTER_SIGNED_GREEN) | (NV30_3D_TEX_FILTER_SIGNED_BLUE) | (NV30_3D_TEX_FILTER_SIGNED_ALPHA)) +#define UNORM 0 + +struct nvfx_texture_format +nvfx_texture_formats[PIPE_FORMAT_COUNT] = { + [0 ... PIPE_FORMAT_COUNT - 1] = {{-1, -1, -1, -1, -1, -1}}, + _(B8G8R8X8_UNORM, A8R8G8B8, T, T, T, 1, X, Y, Z, W, UNORM, 0), + _(B8G8R8X8_SRGB, A8R8G8B8, T, T, T, 1, X, Y, Z, W, UNORM, SRGB), + _(B8G8R8A8_UNORM, A8R8G8B8, T, T, T, T, X, Y, Z, W, UNORM, 0), + _(B8G8R8A8_SRGB, A8R8G8B8, T, T, T, T, X, Y, Z, W, UNORM, SRGB), + + _(R8G8B8A8_UNORM, A8R8G8B8, T, T, T, T, Z, Y, X, W, UNORM, 0), + _(R8G8B8A8_SRGB, A8R8G8B8, T, T, T, T, Z, Y, X, W, UNORM, SRGB), + _(R8G8B8X8_UNORM, A8R8G8B8, T, T, T, 1, Z, Y, X, W, UNORM, 0), + + _(A8R8G8B8_UNORM, A8R8G8B8, T, T, T, T, W, Z, Y, X, UNORM, 0), + _(A8R8G8B8_SRGB, A8R8G8B8, T, T, T, T, W, Z, Y, X, UNORM, SRGB), + _(A8B8G8R8_UNORM, A8R8G8B8, T, T, T, T, W, X, Y, Z, UNORM, 0), + _(A8B8G8R8_SRGB, A8R8G8B8, T, T, T, T, W, X, Y, Z, UNORM, SRGB), + _(X8R8G8B8_UNORM, A8R8G8B8, T, T, T, 1, W, Z, Y, X, UNORM, 0), + _(X8R8G8B8_SRGB, A8R8G8B8, T, T, T, 1, W, Z, Y, X, UNORM, SRGB), + + _(B5G5R5A1_UNORM, A1R5G5B5, T, T, T, T, X, Y, Z, W, UNORM, 0), + _(B5G5R5X1_UNORM, A1R5G5B5, T, T, T, 1, X, Y, Z, W, UNORM, 0), + + _(B4G4R4A4_UNORM, A4R4G4B4, T, T, T, T, X, Y, Z, W, UNORM, 0), + _(B4G4R4X4_UNORM, A4R4G4B4, T, T, T, 1, X, Y, Z, W, UNORM, 0), + + _(B5G6R5_UNORM, R5G6B5, T, T, T, 1, X, Y, Z, W, UNORM, 0), + + _(R8_UNORM, L8, T, 0, 0, 1, X, X, X, X, UNORM, 0), + _(R8_SNORM, L8, T, 0, 0, 1, X, X, X, X, SNORM, 0), + _(L8_UNORM, L8, T, T, T, 1, X, X, X, X, UNORM, 0), + _(L8_SRGB, L8, T, T, T, 1, X, X, X, X, UNORM, SRGB), + _(A8_UNORM, L8, 0, 0, 0, T, X, X, X, X, UNORM, 0), + _(I8_UNORM, L8, T, T, T, T, X, X, X, X, UNORM, 0), + + _(R8G8_UNORM, A8L8, T, T, T, T, X, X, X, W, UNORM, 0), + _(R8G8_SNORM, A8L8, T, T, T, T, X, X, X, W, SNORM, 0), + _(L8A8_UNORM, A8L8, T, T, T, T, X, X, X, W, UNORM, 0), + _(L8A8_SRGB, A8L8, T, T, T, T, X, X, X, W, UNORM, SRGB), + + _(DXT1_RGB, DXT1, T, T, T, 1, X, Y, Z, W, UNORM, 0), + _(DXT1_SRGB, DXT1, T, T, T, 1, X, Y, Z, W, UNORM, SRGB), + _(DXT1_RGBA, DXT1, T, T, T, T, X, Y, Z, W, UNORM, 0), + _(DXT1_SRGBA, DXT1, T, T, T, T, X, Y, Z, W, UNORM, SRGB), + _(DXT3_RGBA, DXT3, T, T, T, T, X, Y, Z, W, UNORM, 0), + _(DXT3_SRGBA, DXT3, T, T, T, T, X, Y, Z, W, UNORM, SRGB), + _(DXT5_RGBA, DXT5, T, T, T, T, X, Y, Z, W, UNORM, 0), + _(DXT5_SRGBA, DXT5, T, T, T, T, X, Y, Z, W, UNORM, SRGB), + + __(Z16_UNORM, A8L8, Z16, T, T, T, 1, W, W, W, W, UNORM, 0), + __(S8_USCALED_Z24_UNORM,HILO16,Z24, T, T, T, 1, W, W, W, W, UNORM, 0), + __(X8Z24_UNORM, HILO16,Z24, T, T, T, 1, W, W, W, W, UNORM, 0), + + _(R16_UNORM, A16, T, 0, 0, 1, X, X, X, X, UNORM, 0), + _(R16_SNORM, A16, T, 0, 0, 1, X, X, X, X, SNORM, 0), + _(R16G16_UNORM, HILO16, T, T, 0, 1, X, Y, X, X, UNORM, 0), + _(R16G16_SNORM, HILO16, T, T, 0, 1, X, Y, X, X, SNORM, 0), + + _(R16G16B16A16_FLOAT, RGBA16F, T, T, T, T, X, Y, Z, W, UNORM, 0), + _(R32G32B32A32_FLOAT, RGBA32F, T, T, T, T, X, Y, Z, W, UNORM, 0), + _(R32_FLOAT, R32F, T, 0, 0, 1, X, X, X, X, UNORM, 0) +}; diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c index aeb88e9ac96..7677fde40cb 100644 --- a/src/gallium/drivers/nvfx/nvfx_miptree.c +++ b/src/gallium/drivers/nvfx/nvfx_miptree.c @@ -2,309 +2,221 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/u_memory.h" #include "util/u_math.h" - -#include "nvfx_context.h" +#include "util/u_staging.h" +#include "state_tracker/drm_driver.h" +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_screen.h" +#include "nvfx_screen.h" #include "nvfx_resource.h" -#include "nvfx_transfer.h" -#include "nv04_surface_2d.h" - -/* Currently using separate implementations for buffers and textures, - * even though gallium has a unified abstraction of these objects. - * Eventually these should be combined, and mechanisms like transfers - * be adapted to work for both buffer and texture uploads. - */ static void -nvfx_miptree_layout(struct nvfx_miptree *mt) +nvfx_miptree_choose_format(struct nvfx_miptree *mt) { struct pipe_resource *pt = &mt->base.base; - uint width = pt->width0; - uint offset = 0; - int nr_faces, l, f; - uint wide_pitch = pt->bind & (PIPE_BIND_SAMPLER_VIEW | - PIPE_BIND_DEPTH_STENCIL | - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT); - - if (pt->target == PIPE_TEXTURE_CUBE) { - nr_faces = 6; - } else - if (pt->target == PIPE_TEXTURE_3D) { - nr_faces = pt->depth0; - } else { - nr_faces = 1; + unsigned uniform_pitch = 0; + static int no_swizzle = -1; + if(no_swizzle < 0) + no_swizzle = debug_get_bool_option("NV40_NO_SWIZZLE", FALSE); /* this will break things on nv30 */ + + if (!util_is_power_of_two(pt->width0) || + !util_is_power_of_two(pt->height0) || + !util_is_power_of_two(pt->depth0) || + (!nvfx_screen(pt->screen)->is_nv4x && pt->target == PIPE_TEXTURE_RECT) + ) + uniform_pitch = 1; + + if ( + (pt->bind & (PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET)) + || (pt->usage & PIPE_USAGE_DYNAMIC) || (pt->usage & PIPE_USAGE_STAGING) + || util_format_is_compressed(pt->format) + || no_swizzle + ) + mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; + + /* non compressed formats with uniform pitch must be linear, and vice versa */ + if(!util_format_is_s3tc(pt->format) + && (uniform_pitch || mt->base.base.flags & NVFX_RESOURCE_FLAG_LINEAR)) + { + mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; + uniform_pitch = 1; } - for (l = 0; l <= pt->last_level; l++) { - if (wide_pitch && (pt->flags & NVFX_RESOURCE_FLAG_LINEAR)) - mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64); - else - mt->level[l].pitch = util_format_get_stride(pt->format, width); + if(uniform_pitch) + { + mt->linear_pitch = util_format_get_stride(pt->format, pt->width0); - mt->level[l].image_offset = - CALLOC(nr_faces, sizeof(unsigned)); + // TODO: this is only a constraint for rendering and not sampling, apparently + // we may also want this unconditionally + if(pt->bind & (PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT)) + mt->linear_pitch = align(mt->linear_pitch, 64); + } + else + mt->linear_pitch = 0; +} + +static unsigned +nvfx_miptree_layout(struct nvfx_miptree *mt) +{ + struct pipe_resource* pt = &mt->base.base; + uint offset = 0; - width = u_minify(width, 1); + if(!nvfx_screen(pt->screen)->is_nv4x) + { + assert(pt->target == PIPE_TEXTURE_RECT + || (util_is_power_of_two(pt->width0) && util_is_power_of_two(pt->height0))); } - for (f = 0; f < nr_faces; f++) { - for (l = 0; l < pt->last_level; l++) { - mt->level[l].image_offset[f] = offset; + for (unsigned l = 0; l <= pt->last_level; l++) + { + unsigned size; + mt->level_offset[l] = offset; - if (!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR) && - u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) - offset += align(mt->level[l].pitch * u_minify(pt->height0, l), 64); - else - offset += mt->level[l].pitch * u_minify(pt->height0, l); - } + if(mt->linear_pitch) + size = mt->linear_pitch; + else + size = util_format_get_stride(pt->format, u_minify(pt->width0, l)); + size = util_format_get_2d_size(pt->format, size, u_minify(pt->height0, l)); - mt->level[l].image_offset[f] = offset; - offset += mt->level[l].pitch * u_minify(pt->height0, l); + if(pt->target == PIPE_TEXTURE_3D) + size *= u_minify(pt->depth0, l); + + offset += size; } - mt->total_size = offset; + offset = align(offset, 128); + mt->face_size = offset; + if(mt->base.base.target == PIPE_TEXTURE_CUBE) + offset += 5 * mt->face_size; + return offset; } -static boolean -nvfx_miptree_get_handle(struct pipe_screen *pscreen, - struct pipe_resource *ptexture, - struct winsys_handle *whandle) +static void +nvfx_miptree_surface_final_destroy(struct pipe_surface* ps) { - struct nvfx_miptree* mt = (struct nvfx_miptree*)ptexture; - - if (!mt || !mt->base.bo) - return FALSE; - - return nouveau_screen_bo_get_handle(pscreen, - mt->base.bo, - mt->level[0].pitch, - whandle); + struct nvfx_surface* ns = (struct nvfx_surface*)ps; + pipe_resource_reference(&ps->texture, 0); + pipe_resource_reference((struct pipe_resource**)&ns->temp, 0); + FREE(ps); } - -static void +void nvfx_miptree_destroy(struct pipe_screen *screen, struct pipe_resource *pt) { struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; - int l; - + util_surfaces_destroy(&mt->surfaces, pt, nvfx_miptree_surface_final_destroy); nouveau_screen_bo_release(screen, mt->base.bo); - - for (l = 0; l <= pt->last_level; l++) { - if (mt->level[l].image_offset) - FREE(mt->level[l].image_offset); - } - FREE(mt); } - - - -struct u_resource_vtbl nvfx_miptree_vtbl = +static struct nvfx_miptree* +nvfx_miptree_create_skeleton(struct pipe_screen *pscreen, const struct pipe_resource *pt) { - nvfx_miptree_get_handle, /* get_handle */ - nvfx_miptree_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ - nvfx_miptree_transfer_new, /* get_transfer */ - nvfx_miptree_transfer_del, /* transfer_destroy */ - nvfx_miptree_transfer_map, /* transfer_map */ - u_default_transfer_flush_region, /* transfer_flush_region */ - nvfx_miptree_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ -}; + struct nvfx_miptree *mt; + if(pt->width0 > 4096 || pt->height0 > 4096) + return NULL; + mt = CALLOC_STRUCT(nvfx_miptree); + if (!mt) + return NULL; -struct pipe_resource * -nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *pt) -{ - struct nvfx_miptree *mt; - static int no_swizzle = -1; - if(no_swizzle < 0) - no_swizzle = debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE); - - mt = CALLOC_STRUCT(nvfx_miptree); - if (!mt) - return NULL; - - mt->base.base = *pt; - mt->base.vtbl = &nvfx_miptree_vtbl; - pipe_reference_init(&mt->base.base.reference, 1); - mt->base.base.screen = pscreen; + mt->base.base = *pt; + util_dirty_surfaces_init(&mt->dirty_surfaces); - /* Swizzled textures must be POT */ - if (pt->width0 & (pt->width0 - 1) || - pt->height0 & (pt->height0 - 1)) - mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; - else - if (pt->bind & (PIPE_BIND_SCANOUT | - PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_DEPTH_STENCIL)) - mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; - else - if (pt->usage == PIPE_USAGE_DYNAMIC) - mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; - else { - switch (pt->format) { - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_I8_UNORM: - /* TODO: we can actually swizzle these formats on nv40, we - are just preserving the pre-unification behavior. - The whole 2D code is going to be rewritten anyway. */ - if(nvfx_screen(pscreen)->is_nv4x) { - mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; - break; - } - /* TODO: Figure out which formats can be swizzled */ - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_R16_SNORM: - { - if (no_swizzle) - mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; - break; - } - default: - mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; - } - } + pipe_reference_init(&mt->base.base.reference, 1); + mt->base.base.screen = pscreen; - /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. - * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. - * This also happens for small mipmaps of large textures. */ - if (pt->bind & PIPE_BIND_RENDER_TARGET && - util_format_get_stride(pt->format, pt->width0) < 64) - mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; + // set this to the actual capabilities, we use it to decide whether to use the 3D engine for copies + // TODO: is this the correct way to use Gallium? + mt->base.base.bind = pt->bind | PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_DEPTH_STENCIL; - nvfx_miptree_layout(mt); + // on our current driver (and the driver too), format support does not depend on geometry, so don't bother computing it + // TODO: may want to revisit this + if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_RENDER_TARGET, 0)) + mt->base.base.bind &=~ PIPE_BIND_RENDER_TARGET; + if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_SAMPLER_VIEW, 0)) + mt->base.base.bind &=~ PIPE_BIND_SAMPLER_VIEW; + if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_DEPTH_STENCIL, 0)) + mt->base.base.bind &=~ PIPE_BIND_DEPTH_STENCIL; - mt->base.bo = nouveau_screen_bo_new(pscreen, 256, - pt->usage, pt->bind, mt->total_size); - if (!mt->base.bo) { - FREE(mt); - return NULL; - } - return &mt->base.base; + return mt; } - - struct pipe_resource * -nvfx_miptree_from_handle(struct pipe_screen *pscreen, - const struct pipe_resource *template, - struct winsys_handle *whandle) +nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *pt) { - struct nvfx_miptree *mt; - unsigned stride; + struct nvfx_miptree* mt = nvfx_miptree_create_skeleton(pscreen, pt); + unsigned size; + nvfx_miptree_choose_format(mt); - /* Only supports 2D, non-mipmapped textures for the moment */ - if (template->target != PIPE_TEXTURE_2D || - template->last_level != 0 || - template->depth0 != 1) - return NULL; + size = nvfx_miptree_layout(mt); - mt = CALLOC_STRUCT(nvfx_miptree); - if (!mt) - return NULL; + mt->base.bo = nouveau_screen_bo_new(pscreen, 256, pt->usage, pt->bind, size); - mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride); - if (mt->base.bo == NULL) { + if (!mt->base.bo) { FREE(mt); return NULL; } - - mt->base.base = *template; - mt->base.vtbl = &nvfx_miptree_vtbl; - pipe_reference_init(&mt->base.base.reference, 1); - mt->base.base.screen = pscreen; - mt->level[0].pitch = stride; - mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); - - /* Assume whoever created this buffer expects it to be linear for now */ - mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; - - /* XXX: Need to adjust bo refcount?? - */ - /* nouveau_bo_ref(bo, &mt->base.bo); */ return &mt->base.base; } +// TODO: redo this, just calling miptree_layout +struct pipe_resource * +nvfx_miptree_from_handle(struct pipe_screen *pscreen, const struct pipe_resource *template, struct winsys_handle *whandle) +{ + struct nvfx_miptree* mt = nvfx_miptree_create_skeleton(pscreen, template); + unsigned stride; + if(whandle->stride) { + mt->linear_pitch = whandle->stride; + mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; + } else + nvfx_miptree_choose_format(mt); + nvfx_miptree_layout(mt); + mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride); + if (mt->base.bo == NULL) { + FREE(mt); + return NULL; + } + return &mt->base.base; +} - -/* Surface helpers, not strictly required to implement the resource vtbl: - */ struct pipe_surface * nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) { - struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; - struct nv04_surface *ns; - - ns = CALLOC_STRUCT(nv04_surface); - if (!ns) - return NULL; - pipe_resource_reference(&ns->base.texture, pt); - ns->base.format = pt->format; - ns->base.width = u_minify(pt->width0, level); - ns->base.height = u_minify(pt->height0, level); - ns->base.usage = flags; - pipe_reference_init(&ns->base.reference, 1); - ns->base.face = face; - ns->base.level = level; - ns->base.zslice = zslice; - ns->pitch = mt->level[level].pitch; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ns->base.offset = mt->level[level].image_offset[face]; - } else - if (pt->target == PIPE_TEXTURE_3D) { - ns->base.offset = mt->level[level].image_offset[zslice]; - } else { - ns->base.offset = mt->level[level].image_offset[0]; - } - - /* create a linear temporary that we can render into if - * necessary. - * - * Note that ns->pitch is always a multiple of 64 for linear - * surfaces and swizzled surfaces are POT, so ns->pitch & 63 - * is equivalent to (ns->pitch < 64 && swizzled) - */ - - if ((ns->pitch & 63) && - (ns->base.usage & PIPE_BIND_RENDER_TARGET)) - { - struct nv04_surface_2d* eng2d = - ((struct nvfx_screen*)pscreen)->eng2d; - - ns = nv04_surface_wrap_for_render(pscreen, eng2d, ns); + struct nvfx_miptree* mt = (struct nvfx_miptree*)pt; + struct nvfx_surface *ns; + + ns = (struct nvfx_surface*)util_surfaces_get(&mt->surfaces, sizeof(struct nvfx_surface), pscreen, pt, face, level, zslice, flags); + if(ns->base.base.offset == ~0) { + util_dirty_surface_init(&ns->base); + ns->pitch = nvfx_subresource_pitch(pt, level); + ns->base.base.offset = nvfx_subresource_offset(pt, face, level, zslice); } - return &ns->base; + return &ns->base.base; } void nvfx_miptree_surface_del(struct pipe_surface *ps) { - struct nv04_surface* ns = (struct nv04_surface*)ps; - if(ns->backing) + struct nvfx_surface* ns = (struct nvfx_surface*)ps; + + if(!ns->temp) { - struct nvfx_screen* screen = (struct nvfx_screen*)ps->texture->screen; - if(ns->backing->base.usage & PIPE_BIND_BLIT_DESTINATION) - screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); - nvfx_miptree_surface_del(&ns->backing->base); + assert(!util_dirty_surface_is_dirty(&ns->base)); + util_surfaces_detach(&((struct nvfx_miptree*)ps->texture)->surfaces, ps); + pipe_resource_reference(&ps->texture, 0); + FREE(ps); } - - pipe_resource_reference(&ps->texture, NULL); - FREE(ps); } diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c new file mode 100644 index 00000000000..ebf47e6ed30 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_push.c @@ -0,0 +1,414 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_split_prim.h" +#include "translate/translate.h" + +#include "nvfx_context.h" +#include "nvfx_resource.h" + +struct push_context { + struct nouveau_channel* chan; + + void *idxbuf; + int32_t idxbias; + + float edgeflag; + int edgeflag_attr; + + unsigned vertex_length; + unsigned max_vertices_per_packet; + + struct translate* translate; +}; + +static void +emit_edgeflag(void *priv, boolean enabled) +{ + struct push_context* ctx = priv; + struct nouveau_channel *chan = ctx->chan; + + OUT_RING(chan, RING_3D(NV30_3D_EDGEFLAG, 1)); + OUT_RING(chan, enabled ? 1 : 0); +} + +static void +emit_vertices_lookup8(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + uint8_t* elts = (uint8_t*)ctx->idxbuf + start; + + while(count) + { + unsigned push = MIN2(count, ctx->max_vertices_per_packet); + unsigned length = push * ctx->vertex_length; + + OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); + ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur); + ctx->chan->cur += length; + + count -= push; + elts += push; + } +} + +static void +emit_vertices_lookup16(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + uint16_t* elts = (uint16_t*)ctx->idxbuf + start; + + while(count) + { + unsigned push = MIN2(count, ctx->max_vertices_per_packet); + unsigned length = push * ctx->vertex_length; + + OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); + ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur); + ctx->chan->cur += length; + + count -= push; + elts += push; + } +} + +static void +emit_vertices_lookup32(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + uint32_t* elts = (uint32_t*)ctx->idxbuf + start; + + while(count) + { + unsigned push = MIN2(count, ctx->max_vertices_per_packet); + unsigned length = push * ctx->vertex_length; + + OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); + ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur); + ctx->chan->cur += length; + + count -= push; + elts += push; + } +} + +static void +emit_vertices(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + + while(count) + { + unsigned push = MIN2(count, ctx->max_vertices_per_packet); + unsigned length = push * ctx->vertex_length; + + OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length)); + ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur); + ctx->chan->cur += length; + + count -= push; + start += push; + } +} + +static void +emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg) +{ + struct push_context* ctx = priv; + struct nouveau_channel *chan = ctx->chan; + unsigned nr = (vc & 0xff); + if (nr) { + OUT_RING(chan, RING_3D(reg, 1)); + OUT_RING (chan, ((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + OUT_RING(chan, RING_3D_NI(reg, push)); + while (push--) { + OUT_RING(chan, ((0x100 - 1) << 24) | start); + start += 0x100; + } + } +} + +static void +emit_ib_ranges(void* priv, unsigned start, unsigned vc) +{ + emit_ranges(priv, start, vc, NV30_3D_VB_INDEX_BATCH); +} + +static void +emit_vb_ranges(void* priv, unsigned start, unsigned vc) +{ + emit_ranges(priv, start, vc, NV30_3D_VB_VERTEX_BATCH); +} + +static INLINE void +emit_elt8(void* priv, unsigned start, unsigned vc) +{ + struct push_context* ctx = priv; + struct nouveau_channel *chan = ctx->chan; + uint8_t *elts = (uint8_t *)ctx->idxbuf + start; + int idxbias = ctx->idxbias; + + if (vc & 1) { + OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1)); + OUT_RING (chan, elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + unsigned push = MIN2(vc, 2047 * 2); + + OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1)); + for (i = 0; i < push; i+=2) + OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias)); + + vc -= push; + elts += push; + } +} + +static INLINE void +emit_elt16(void* priv, unsigned start, unsigned vc) +{ + struct push_context* ctx = priv; + struct nouveau_channel *chan = ctx->chan; + uint16_t *elts = (uint16_t *)ctx->idxbuf + start; + int idxbias = ctx->idxbias; + + if (vc & 1) { + OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1)); + OUT_RING (chan, elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + unsigned push = MIN2(vc, 2047 * 2); + + OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1)); + for (i = 0; i < push; i+=2) + OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias)); + + vc -= push; + elts += push; + } +} + +static INLINE void +emit_elt32(void* priv, unsigned start, unsigned vc) +{ + struct push_context* ctx = priv; + struct nouveau_channel *chan = ctx->chan; + uint32_t *elts = (uint32_t *)ctx->idxbuf + start; + int idxbias = ctx->idxbias; + + while (vc) { + unsigned push = MIN2(vc, 2047); + + OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U32, push)); + assert(AVAIL_RING(chan) >= push); + if(idxbias) + { + for(unsigned i = 0; i < push; ++i) + OUT_RING(chan, elts[i] + idxbias); + } + else + OUT_RINGp(chan, elts, push); + + vc -= push; + elts += push; + } +} + +void +nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nouveau_channel *chan = nvfx->screen->base.channel; + struct push_context ctx; + struct util_split_prim s; + unsigned instances_left = info->instance_count; + int vtx_value; + unsigned hw_mode = nvgl_primitive(info->mode); + int i; + struct + { + uint8_t* map; + unsigned step; + } per_instance[16]; + unsigned p_overhead = 64 /* magic fix */ + + 4 /* begin/end */ + + 4; /* potential edgeflag enable/disable */ + + ctx.chan = nvfx->screen->base.channel; + ctx.translate = nvfx->vtxelt->translate; + ctx.idxbuf = NULL; + ctx.vertex_length = nvfx->vtxelt->vertex_length; + ctx.max_vertices_per_packet = nvfx->vtxelt->max_vertices_per_packet; + ctx.edgeflag = 0.5f; + // TODO: figure out if we really want to handle this, and do so in that case + ctx.edgeflag_attr = 0xff; // nvfx->vertprog->cfg.edgeflag_in; + + if(!nvfx->use_vertex_buffers) + { + for(i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i) + { + struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; + uint8_t* data = nvfx_buffer(vb->buffer)->data + vb->buffer_offset; + if(info->indexed) + data += info->index_bias * vb->stride; + ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); + } + + if(ctx.edgeflag_attr < 16) + vtx_value = -(ctx.vertex_length + 3); /* vertex data and edgeflag header and value */ + else + { + p_overhead += 1; /* initial vertex_data header */ + vtx_value = -ctx.vertex_length; /* vertex data and edgeflag header and value */ + } + + if (info->indexed) { + // XXX: this case and is broken and probably need a new VTX_ATTR push path + if (nvfx->idxbuf.index_size == 1) + s.emit = emit_vertices_lookup8; + else if (nvfx->idxbuf.index_size == 2) + s.emit = emit_vertices_lookup16; + else + s.emit = emit_vertices_lookup32; + } else + s.emit = emit_vertices; + } + else + { + if(!info->indexed || nvfx->use_index_buffer) + { + s.emit = info->indexed ? emit_ib_ranges : emit_vb_ranges; + p_overhead += 3; + vtx_value = 0; + } + else if (nvfx->idxbuf.index_size == 4) + { + s.emit = emit_elt32; + p_overhead += 1; + vtx_value = 8; + } + else + { + s.emit = (nvfx->idxbuf.index_size == 2) ? emit_elt16 : emit_elt8; + p_overhead += 3; + vtx_value = 7; + } + } + + ctx.idxbias = info->index_bias; + if(nvfx->use_vertex_buffers) + ctx.idxbias -= nvfx->base_vertex; + + /* map index buffer, if present */ + if (info->indexed && !nvfx->use_index_buffer) + ctx.idxbuf = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset; + + s.priv = &ctx; + s.edge = emit_edgeflag; + + for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i) + { + struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index]; + float v[4]; + per_instance[i].step = info->start_instance % ve->instance_divisor; + per_instance[i].map = nvfx_buffer(vb->buffer)->data + vb->buffer_offset + ve->base.src_offset; + + nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0); + + WAIT_RING(chan, 5); + nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp); + } + + /* per-instance loop */ + while (instances_left--) { + int max_verts; + boolean done; + + util_split_prim_init(&s, info->mode, info->start, info->count); + nvfx_state_emit(nvfx); + for(;;) { + max_verts = AVAIL_RING(chan); + max_verts -= p_overhead; + + /* if vtx_value < 0, each vertex is -vtx_value words long + * otherwise, each vertex is 2^(vtx_value) / 255 words long (this is an approximation) + */ + if(vtx_value < 0) + { + max_verts /= -vtx_value; + max_verts -= (max_verts >> 10); /* vertex data headers */ + } + else + { + if(max_verts >= (1 << 23)) /* avoid overflow here */ + max_verts = (1 << 23); + max_verts = (max_verts * 255) >> vtx_value; + } + + //printf("avail %u max_verts %u\n", AVAIL_RING(chan), max_verts); + + if(max_verts >= 16) + { + /* XXX: any command a lot of times seems to (mostly) fix corruption that would otherwise happen */ + /* this seems to cause issues on nv3x, and also be unneeded there */ + if(nvfx->is_nv4x) + { + int i; + for(i = 0; i < 32; ++i) + { + OUT_RING(chan, RING_3D(0x1dac, 1)); + OUT_RING(chan, 0); + } + } + + OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + OUT_RING(chan, hw_mode); + done = util_split_prim_next(&s, max_verts); + OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); + OUT_RING(chan, 0); + + if(done) + break; + } + + FIRE_RING(chan); + nvfx_state_emit(nvfx); + } + + /* set data for the next instance, if any changed */ + for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i) + { + struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index]; + + if(++per_instance[i].step == ve->instance_divisor) + { + float v[4]; + per_instance[i].map += vb->stride; + per_instance[i].step = 0; + + nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0); + WAIT_RING(chan, 5); + nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp); + } + } + } +} diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c index 1b20b5245d7..3935ffd7f92 100644 --- a/src/gallium/drivers/nvfx/nvfx_query.c +++ b/src/gallium/drivers/nvfx/nvfx_query.c @@ -49,9 +49,10 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nvfx_query *q = nvfx_query(pq); struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; uint64_t tmp; + assert(!nvfx->query); + /* Happens when end_query() is called, then another begin_query() * without querying the result in-between. For now we'll wait for * the existing query to notify completion, but it could be better. @@ -71,33 +72,42 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq) nouveau_notifier_reset(nvfx->screen->query, q->object->start); - BEGIN_RING(chan, eng3d, NV34TCL_QUERY_RESET, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, eng3d, NV34TCL_QUERY_UNK17CC, 1); - OUT_RING (chan, 1); + WAIT_RING(chan, 4); + OUT_RING(chan, RING_3D(NV30_3D_QUERY_RESET, 1)); + OUT_RING(chan, 1); + OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); + OUT_RING(chan, 1); q->ready = FALSE; + + nvfx->query = pq; } static void nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; + struct nouveau_channel *chan = nvfx->screen->base.channel; struct nvfx_query *q = nvfx_query(pq); - BEGIN_RING(chan, eng3d, NV34TCL_QUERY_GET, 1); - OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | - ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); + assert(nvfx->query == pq); + + WAIT_RING(chan, 4); + OUT_RING(chan, RING_3D(NV30_3D_QUERY_GET, 1)); + OUT_RING (chan, (0x01 << NV30_3D_QUERY_GET_UNK24__SHIFT) | + ((q->object->start * 32) << NV30_3D_QUERY_GET_OFFSET__SHIFT)); + OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); + OUT_RING(chan, 0); FIRE_RING(chan); + + nvfx->query = 0; } static boolean nvfx_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, uint64_t *result) + boolean wait, void *vresult) { + uint64_t *result = (uint64_t *)vresult; struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_query *q = nvfx_query(pq); diff --git a/src/gallium/drivers/nvfx/nvfx_resource.c b/src/gallium/drivers/nvfx/nvfx_resource.c index 10cdeed2a37..39ae893f1b3 100644 --- a/src/gallium/drivers/nvfx/nvfx_resource.c +++ b/src/gallium/drivers/nvfx/nvfx_resource.c @@ -1,23 +1,15 @@ #include "pipe/p_context.h" +#include "util/u_staging.h" #include "nvfx_resource.h" #include "nouveau/nouveau_screen.h" - -/* This doesn't look quite right - this query is supposed to ask - * whether the particular context has references to the resource in - * any unflushed rendering command buffer, and hence requires a - * pipe->flush() for serializing some modification to that resource. - * - * This seems to be answering the question of whether the resource is - * currently on hardware. - */ static unsigned int nvfx_resource_is_referenced(struct pipe_context *pipe, - struct pipe_resource *resource, + struct pipe_resource *pr, unsigned face, unsigned level) { - return nouveau_reference_flags(nvfx_resource(resource)->bo); + return !!nouveau_reference_flags(nvfx_resource(pr)->bo); } static struct pipe_resource * @@ -30,6 +22,15 @@ nvfx_resource_create(struct pipe_screen *screen, return nvfx_miptree_create(screen, template); } +static void +nvfx_resource_destroy(struct pipe_screen *screen, struct pipe_resource *pr) +{ + if (pr->target == PIPE_BUFFER) + return nvfx_buffer_destroy(screen, pr); + else + return nvfx_miptree_destroy(screen, pr); +} + static struct pipe_resource * nvfx_resource_from_handle(struct pipe_screen * screen, const struct pipe_resource *template, @@ -41,15 +42,22 @@ nvfx_resource_from_handle(struct pipe_screen * screen, return nvfx_miptree_from_handle(screen, template, whandle); } +static boolean +nvfx_resource_get_handle(struct pipe_screen *pscreen, + struct pipe_resource *pr, + struct winsys_handle *whandle) +{ + struct nvfx_resource* res = (struct nvfx_resource*)pr; + + if (!res || !res->bo) + return FALSE; + + return nouveau_screen_bo_get_handle(pscreen, res->bo, nvfx_subresource_pitch(pr, 0), whandle); +} + void nvfx_init_resource_functions(struct pipe_context *pipe) { - pipe->get_transfer = u_get_transfer_vtbl; - pipe->transfer_map = u_transfer_map_vtbl; - pipe->transfer_flush_region = u_transfer_flush_region_vtbl; - pipe->transfer_unmap = u_transfer_unmap_vtbl; - pipe->transfer_destroy = u_transfer_destroy_vtbl; - pipe->transfer_inline_write = u_transfer_inline_write_vtbl; pipe->is_resource_referenced = nvfx_resource_is_referenced; } @@ -58,10 +66,10 @@ nvfx_screen_init_resource_functions(struct pipe_screen *pscreen) { pscreen->resource_create = nvfx_resource_create; pscreen->resource_from_handle = nvfx_resource_from_handle; - pscreen->resource_get_handle = u_resource_get_handle_vtbl; - pscreen->resource_destroy = u_resource_destroy_vtbl; + pscreen->resource_get_handle = nvfx_resource_get_handle; + pscreen->resource_destroy = nvfx_resource_destroy; pscreen->user_buffer_create = nvfx_user_buffer_create; - + pscreen->get_tex_surface = nvfx_miptree_surface_new; pscreen->tex_surface_destroy = nvfx_miptree_surface_del; } diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h index a68c14cf3fb..583be4de2ae 100644 --- a/src/gallium/drivers/nvfx/nvfx_resource.h +++ b/src/gallium/drivers/nvfx/nvfx_resource.h @@ -1,44 +1,82 @@ - #ifndef NVFX_RESOURCE_H #define NVFX_RESOURCE_H #include "util/u_transfer.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_double_list.h" +#include "util/u_surfaces.h" +#include "util/u_dirty_surfaces.h" +#include <nouveau/nouveau_bo.h> struct pipe_resource; -struct nouveau_bo; - +struct nv04_region; -/* This gets further specialized into either buffer or texture - * structures. In the future we'll want to remove much of that - * distinction, but for now try to keep as close to the existing code - * as possible and use the vtbl struct to choose between the two - * underlying implementations. - */ struct nvfx_resource { struct pipe_resource base; - struct u_resource_vtbl *vtbl; struct nouveau_bo *bo; }; +static INLINE +struct nvfx_resource *nvfx_resource(struct pipe_resource *resource) +{ + return (struct nvfx_resource *)resource; +} + +#define NVFX_RESOURCE_FLAG_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) +#define NVFX_RESOURCE_FLAG_USER (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) + +/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */ +static INLINE boolean +nvfx_resource_mapped_by_gpu(struct pipe_resource *resource) +{ + return nvfx_resource(resource)->bo->handle; +} + +/* is resource in VRAM? */ +static inline int +nvfx_resource_on_gpu(struct pipe_resource* pr) +{ +#if 0 + // a compiler error here means you need to apply libdrm-nouveau-add-domain.patch to libdrm + // TODO: return FALSE if not VRAM and on a PCI-E system + return ((struct nvfx_resource*)pr)->bo->domain & (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); +#else + return TRUE; +#endif +} + #define NVFX_MAX_TEXTURE_LEVELS 16 +/* We have the following invariants for render temporaries + * + * 1. Render temporaries are always linear + * 2. Render temporaries are always up to date + * 3. Currently, render temporaries are destroyed when the resource is used for sampling, but kept for any other use + * + * Also, we do NOT flush temporaries on any pipe->flush(). + * This is fine, as long as scanout targets and shared resources never need temps. + * + * TODO: we may want to also support swizzled temporaries to improve performance in some cases. + */ + struct nvfx_miptree { - struct nvfx_resource base; - uint total_size; + struct nvfx_resource base; - struct { - uint pitch; - uint *image_offset; - } level[NVFX_MAX_TEXTURE_LEVELS]; + unsigned linear_pitch; /* for linear textures, 0 for swizzled and compressed textures with level-dependent minimal pitch */ + unsigned face_size; /* 128-byte aligned face/total size */ + unsigned level_offset[NVFX_MAX_TEXTURE_LEVELS]; - unsigned image_nr; + struct util_surfaces surfaces; + struct util_dirty_surfaces dirty_surfaces; }; -static INLINE -struct nvfx_resource *nvfx_resource(struct pipe_resource *resource) -{ - return (struct nvfx_resource *)resource; -} +struct nvfx_surface { + struct util_dirty_surface base; + unsigned pitch; + + struct nvfx_miptree* temp; +}; static INLINE struct nouveau_bo * nvfx_surface_buffer(struct pipe_surface *surf) @@ -48,6 +86,12 @@ nvfx_surface_buffer(struct pipe_surface *surf) return mt->bo; } +static INLINE struct util_dirty_surfaces* +nvfx_surface_get_dirty_surfaces(struct pipe_surface* surf) +{ + struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture; + return &mt->dirty_surfaces; +} void nvfx_init_resource_functions(struct pipe_context *pipe); @@ -62,30 +106,118 @@ nvfx_screen_init_resource_functions(struct pipe_screen *pscreen); struct pipe_resource * nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *pt); +void +nvfx_miptree_destroy(struct pipe_screen *pscreen, + struct pipe_resource *presource); + struct pipe_resource * nvfx_miptree_from_handle(struct pipe_screen *pscreen, const struct pipe_resource *template, struct winsys_handle *whandle); +void +nvfx_miptree_surface_del(struct pipe_surface *ps); + +struct pipe_surface * +nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags); + +/* only for miptrees, don't use for buffers */ + +/* NOTE: for swizzled 3D textures, this just returns the offset of the mipmap level */ +static inline unsigned +nvfx_subresource_offset(struct pipe_resource* pt, unsigned face, unsigned level, unsigned zslice) +{ + if(pt->target == PIPE_BUFFER) + return 0; + else + { + struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; + + unsigned offset = mt->level_offset[level]; + if (pt->target == PIPE_TEXTURE_CUBE) + offset += mt->face_size * face; + else if (pt->target == PIPE_TEXTURE_3D && mt->linear_pitch) + offset += zslice * util_format_get_2d_size(pt->format, (mt->linear_pitch ? mt->linear_pitch : util_format_get_stride(pt->format, u_minify(pt->width0, level))), u_minify(pt->height0, level)); + return offset; + } +} + +static inline unsigned +nvfx_subresource_pitch(struct pipe_resource* pt, unsigned level) +{ + if(pt->target == PIPE_BUFFER) + return ((struct nvfx_resource*)pt)->bo->size; + else + { + struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; + + if(mt->linear_pitch) + return mt->linear_pitch; + else + return util_format_get_stride(pt->format, u_minify(pt->width0, level)); + } +} + +void +nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf); + +void +nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf); + +struct nvfx_buffer +{ + struct nvfx_resource base; + uint8_t* data; + unsigned size; + + /* the range of data not yet uploaded to the GPU bo */ + unsigned dirty_begin; + unsigned dirty_end; + + /* whether all transfers were unsynchronized */ + boolean dirty_unsynchronized; + + /* whether it would have been profitable to upload + * the latest updated data to the GPU immediately */ + boolean last_update_static; + + /* how many bytes we need to draw before we deem + * the buffer to be static + */ + long long bytes_to_draw_until_static; +}; + +static inline struct nvfx_buffer* nvfx_buffer(struct pipe_resource* pr) +{ + return (struct nvfx_buffer*)pr; +} + +/* this is an heuristic to determine whether we are better off uploading the + * buffer to the GPU, or just continuing pushing it on the FIFO + */ +static inline boolean nvfx_buffer_seems_static(struct nvfx_buffer* buffer) +{ + return buffer->last_update_static + || buffer->bytes_to_draw_until_static < 0; +} + struct pipe_resource * nvfx_buffer_create(struct pipe_screen *pscreen, const struct pipe_resource *template); +void +nvfx_buffer_destroy(struct pipe_screen *pscreen, + struct pipe_resource *presource); + struct pipe_resource * nvfx_user_buffer_create(struct pipe_screen *screen, void *ptr, unsigned bytes, unsigned usage); - - void -nvfx_miptree_surface_del(struct pipe_surface *ps); - -struct pipe_surface * -nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags); - +nvfx_buffer_upload(struct nvfx_buffer* buffer); #endif diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index a4d5c61b7aa..875b3a96ca6 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -1,59 +1,52 @@ #include "pipe/p_screen.h" #include "pipe/p_state.h" +#include "util/u_format_s3tc.h" #include "util/u_simple_screen.h" #include "nouveau/nouveau_screen.h" - +#include "nouveau/nv_object.xml.h" #include "nvfx_context.h" #include "nvfx_video_context.h" #include "nvfx_screen.h" #include "nvfx_resource.h" +#include "nvfx_tex.h" -#define NV30TCL_CHIPSET_3X_MASK 0x00000003 -#define NV34TCL_CHIPSET_3X_MASK 0x00000010 -#define NV35TCL_CHIPSET_3X_MASK 0x000001e0 - -/* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h -* to get the pointer to the context front buffer, so I copied nouveau_winsys here. -* nv30_screen_surface_format_supported() can then use it to enforce creating fbo -* with same number of bits everywhere. -*/ -struct nouveau_winsys { - struct pipe_winsys base; +#define NV30_3D_CHIPSET_3X_MASK 0x00000003 +#define NV34_3D_CHIPSET_3X_MASK 0x00000010 +#define NV35_3D_CHIPSET_3X_MASK 0x000001e0 - struct pipe_screen *pscreen; - - struct pipe_surface *front; -}; #define NV4X_GRCLASS4097_CHIPSETS 0x00000baf #define NV4X_GRCLASS4497_CHIPSETS 0x00005450 #define NV6X_GRCLASS4497_CHIPSETS 0x00000088 static int -nvfx_screen_get_param(struct pipe_screen *pscreen, int param) +nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { struct nvfx_screen *screen = nvfx_screen(pscreen); switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - /* TODO: check this */ - return screen->is_nv4x ? 16 : 8; + return 16; case PIPE_CAP_NPOT_TEXTURES: - return !!screen->is_nv4x; + return screen->advertise_npot; case PIPE_CAP_TWO_SIDED_STENCIL: return 1; case PIPE_CAP_GLSL: - return 0; + return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: return 1; case PIPE_CAP_MAX_RENDER_TARGETS: - return screen->is_nv4x ? 4 : 2; + return screen->use_nv4x ? 4 : 2; case PIPE_CAP_OCCLUSION_QUERY: return 1; + case PIPE_CAP_TIMER_QUERY: + return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; + case PIPE_CAP_TEXTURE_SWIZZLE: + return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return 13; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: @@ -61,15 +54,13 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - return !!screen->is_nv4x; + return !!screen->use_nv4x; case PIPE_CAP_TEXTURE_MIRROR_REPEAT: return 1; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: return 0; /* We have 4 on nv40 - but unsupported currently */ - case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return !!screen->is_nv4x; + return screen->advertise_blend_equation_separate; case PIPE_CAP_MAX_COMBINED_SAMPLERS: return 16; case PIPE_CAP_INDEP_BLEND_ENABLE: @@ -78,20 +69,96 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, int param) return 0; case PIPE_CAP_INDEP_BLEND_FUNC: return 0; + case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: + return 0; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; + return 1; + case PIPE_CAP_DEPTH_CLAMP: + return 0; // TODO: implement depth clamp default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + NOUVEAU_ERR("Warning: unknown PIPE_CAP %d\n", param); return 0; } } +static int +nvfx_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum pipe_shader_cap param) +{ + struct nvfx_screen *screen = nvfx_screen(pscreen); + + switch(shader) { + case PIPE_SHADER_FRAGMENT: + switch(param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 4096; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + /* FIXME: is it the dynamic (nv30:0/nv40:24) or the static + value (nv30:0/nv40:4) ? */ + return screen->use_nv4x ? 4 : 0; + case PIPE_SHADER_CAP_MAX_INPUTS: + return screen->use_nv4x ? 12 : 10; + case PIPE_SHADER_CAP_MAX_CONSTS: + return screen->use_nv4x ? 224 : 32; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 1; + case PIPE_SHADER_CAP_MAX_TEMPS: + return 32; + case PIPE_SHADER_CAP_MAX_ADDRS: + return screen->use_nv4x ? 1 : 0; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; /* we could expose these, but nothing uses them */ + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 0; + default: + break; + } + break; + case PIPE_SHADER_VERTEX: + switch(param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + return screen->use_nv4x ? 512 : 256; + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return screen->use_nv4x ? 512 : 0; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + /* FIXME: is it the dynamic (nv30:24/nv40:24) or the static + value (nv30:1/nv40:4) ? */ + return screen->use_nv4x ? 4 : 1; + case PIPE_SHADER_CAP_MAX_INPUTS: + return 16; + case PIPE_SHADER_CAP_MAX_CONSTS: + /* - 6 is for clip planes; Gallium should be fixed to put + * them in the vertex shader itself, so we don't need to reserve these */ + return (screen->use_nv4x ? 468 : 256) - 6; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 1; + case PIPE_SHADER_CAP_MAX_TEMPS: + return screen->use_nv4x ? 32 : 13; + case PIPE_SHADER_CAP_MAX_ADDRS: + return 2; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; /* we could expose these, but nothing uses them */ + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 1; + default: + break; + } + break; + default: + break; + } + return 0; +} + static float -nvfx_screen_get_paramf(struct pipe_screen *pscreen, int param) +nvfx_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param) { struct nvfx_screen *screen = nvfx_screen(pscreen); @@ -103,9 +170,9 @@ nvfx_screen_get_paramf(struct pipe_screen *pscreen, int param) case PIPE_CAP_MAX_POINT_WIDTH_AA: return 64.0; case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return screen->is_nv4x ? 16.0 : 8.0; + return screen->use_nv4x ? 16.0 : 8.0; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return screen->is_nv4x ? 16.0 : 4.0; + return 15.0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0.0; @@ -113,66 +180,87 @@ nvfx_screen_get_paramf(struct pipe_screen *pscreen, int param) } static boolean -nvfx_screen_surface_format_supported(struct pipe_screen *pscreen, +nvfx_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, enum pipe_texture_target target, - unsigned tex_usage, unsigned geom_flags) + unsigned sample_count, + unsigned bind, unsigned geom_flags) { struct nvfx_screen *screen = nvfx_screen(pscreen); - struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front; - if (tex_usage & PIPE_BIND_RENDER_TARGET) { + if (sample_count > 1) + return FALSE; + + if (bind & PIPE_BIND_RENDER_TARGET) { switch (format) { case PIPE_FORMAT_B8G8R8A8_UNORM: case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: case PIPE_FORMAT_B5G6R5_UNORM: - return TRUE; - default: break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + if(!screen->advertise_fp16) + return FALSE; + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + if(!screen->advertise_fp32) + return FALSE; + break; + default: + return FALSE; } - } else - if (tex_usage & PIPE_BIND_DEPTH_STENCIL) { + } + + if (bind & PIPE_BIND_DEPTH_STENCIL) { switch (format) { case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: - return TRUE; case PIPE_FORMAT_Z16_UNORM: - /* TODO: this nv30 limitation probably does not exist */ - if (!screen->is_nv4x && front) - return (front->format == PIPE_FORMAT_B5G6R5_UNORM); - return TRUE; - default: break; - } - } else { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return TRUE; - /* TODO: does nv30 support this? */ - case PIPE_FORMAT_R16_SNORM: - return !!screen->is_nv4x; default: - break; + return FALSE; } } - return FALSE; -} + if (bind & PIPE_BIND_SAMPLER_VIEW) { + struct nvfx_texture_format* tf = &nvfx_texture_formats[format]; + if(util_format_is_s3tc(format) && !util_format_s3tc_enabled) + return FALSE; + if(format == PIPE_FORMAT_R16G16B16A16_FLOAT && !screen->advertise_fp16) + return FALSE; + if(format == PIPE_FORMAT_R32G32B32A32_FLOAT && !screen->advertise_fp32) + return FALSE; + if(screen->use_nv4x) + { + if(tf->fmt[4] < 0) + return FALSE; + } + else + { + if(tf->fmt[0] < 0) + return FALSE; + } + } + + // note that we do actually support everything through translate + if (bind & PIPE_BIND_VERTEX_BUFFER) { + unsigned type = nvfx_vertex_formats[format]; + if(!type) + return FALSE; + } + + if (bind & PIPE_BIND_INDEX_BUFFER) { + // 8-bit indices supported, but not in hardware index buffer + if(format != PIPE_FORMAT_R16_USCALED && format != PIPE_FORMAT_R32_USCALED) + return FALSE; + } + if(bind & PIPE_BIND_STREAM_OUTPUT) + return FALSE; + + return TRUE; +} static void nvfx_screen_destroy(struct pipe_screen *pscreen) @@ -185,7 +273,7 @@ nvfx_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->eng3d); - nv04_surface_2d_takedown(&screen->eng2d); + nvfx_screen_surface_takedown(pscreen); nouveau_screen_fini(&screen->base); @@ -199,9 +287,9 @@ static void nv30_screen_init(struct nvfx_screen *screen) /* TODO: perhaps we should do some of this on nv40 too? */ for (i=1; i<8; i++) { - OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1)); + OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1)); OUT_RING(chan, 0); - OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_VERT(i), 1)); + OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_VERT(i), 1)); OUT_RING(chan, 0); } @@ -237,14 +325,14 @@ static void nv30_screen_init(struct nvfx_screen *screen) OUT_RING(chan, RING_3D(0x1d88, 1)); OUT_RING(chan, 0x00001200); - OUT_RING(chan, RING_3D(NV34TCL_RC_ENABLE, 1)); + OUT_RING(chan, RING_3D(NV30_3D_RC_ENABLE, 1)); OUT_RING(chan, 0); - OUT_RING(chan, RING_3D(NV34TCL_DEPTH_RANGE_NEAR, 2)); + OUT_RING(chan, RING_3D(NV30_3D_DEPTH_RANGE_NEAR, 2)); OUT_RING(chan, fui(0.0)); OUT_RING(chan, fui(1.0)); - OUT_RING(chan, RING_3D(NV34TCL_MULTISAMPLE_CONTROL, 1)); + OUT_RING(chan, RING_3D(NV30_3D_MULTISAMPLE_CONTROL, 1)); OUT_RING(chan, 0xffff0000); /* enables use of vp rather than fixed-function somehow */ @@ -256,10 +344,13 @@ static void nv40_screen_init(struct nvfx_screen *screen) { struct nouveau_channel *chan = screen->base.channel; - OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR2, 2)); + OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 2)); OUT_RING(chan, screen->base.channel->vram->handle); OUT_RING(chan, screen->base.channel->vram->handle); + OUT_RING(chan, RING_3D(0x1450, 1)); + OUT_RING(chan, 0x00000004); + OUT_RING(chan, RING_3D(0x1ea4, 3)); OUT_RING(chan, 0x00000010); OUT_RING(chan, 0x01000100); @@ -270,7 +361,7 @@ static void nv40_screen_init(struct nvfx_screen *screen) OUT_RING(chan, 0x06144321); OUT_RING(chan, RING_3D(0x1fc8, 2)); OUT_RING(chan, 0xedcba987); - OUT_RING(chan, 0x00000021); + OUT_RING(chan, 0x0000006f); OUT_RING(chan, RING_3D(0x1fd0, 1)); OUT_RING(chan, 0x00171615); OUT_RING(chan, RING_3D(0x1fd4, 1)); @@ -279,9 +370,12 @@ static void nv40_screen_init(struct nvfx_screen *screen) OUT_RING(chan, RING_3D(0x1ef8, 1)); OUT_RING(chan, 0x0020ffff); OUT_RING(chan, RING_3D(0x1d64, 1)); - OUT_RING(chan, 0x00d30000); + OUT_RING(chan, 0x01d300d4); OUT_RING(chan, RING_3D(0x1e94, 1)); OUT_RING(chan, 0x00000001); + + OUT_RING(chan, RING_3D(NV40_3D_MIPMAP_ROUNDING, 1)); + OUT_RING(chan, NV40_3D_MIPMAP_ROUNDING_MODE_DOWN); } static unsigned @@ -298,22 +392,17 @@ nvfx_screen_get_vertex_buffer_flags(struct nvfx_screen* screen) vram_hack_default = 1; vram_hack = debug_get_bool_option("NOUVEAU_VTXIDX_IN_VRAM", vram_hack_default); -#ifdef DEBUG - if(!vram_hack) - { - fprintf(stderr, "Some systems may experience graphics corruption due to randomly misplaced vertices.\n" - "If this is happening, export NOUVEAU_VTXIDX_IN_VRAM=1 may reduce or eliminate the problem\n"); - } - else - { - fprintf(stderr, "A performance reducing hack is being used to help avoid graphics corruption.\n" - "You can try export NOUVEAU_VTXIDX_IN_VRAM=0 to disable it.\n"); - } -#endif - return vram_hack ? NOUVEAU_BO_VRAM : NOUVEAU_BO_GART; } +static void nvfx_channel_flush_notify(struct nouveau_channel* chan) +{ + struct nvfx_screen* screen = chan->user_private; + struct nvfx_context* nvfx = screen->cur_ctx; + if(nvfx) + nvfx->relocs_needed = NVFX_RELOCATE_ALL; +} + struct pipe_screen * nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) { @@ -335,34 +424,38 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } chan = screen->base.channel; + screen->cur_ctx = NULL; + chan->user_private = screen; + chan->flush_notify = nvfx_channel_flush_notify; pscreen->winsys = ws; pscreen->destroy = nvfx_screen_destroy; pscreen->get_param = nvfx_screen_get_param; + pscreen->get_shader_param = nvfx_screen_get_shader_param; pscreen->get_paramf = nvfx_screen_get_paramf; - pscreen->is_format_supported = nvfx_screen_surface_format_supported; + pscreen->is_format_supported = nvfx_screen_is_format_supported; pscreen->context_create = nvfx_create; pscreen->video_context_create = nvfx_video_create; switch (dev->chipset & 0xf0) { case 0x30: - if (NV30TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) - eng3d_class = 0x0397; - else if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) - eng3d_class = 0x0697; - else if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) - eng3d_class = 0x0497; + if (NV30_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) + eng3d_class = NV30_3D; + else if (NV34_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) + eng3d_class = NV34_3D; + else if (NV35_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) + eng3d_class = NV35_3D; break; case 0x40: if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f))) - eng3d_class = NV40TCL; + eng3d_class = NV40_3D; else if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) - eng3d_class = NV44TCL; + eng3d_class = NV44_3D; screen->is_nv4x = ~0; break; case 0x60: if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) - eng3d_class = NV44TCL; + eng3d_class = NV44_3D; screen->is_nv4x = ~0; break; } @@ -372,12 +465,37 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } - screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE); + screen->advertise_npot = !!screen->is_nv4x; + screen->advertise_blend_equation_separate = !!screen->is_nv4x; + screen->use_nv4x = screen->is_nv4x; + + if(screen->is_nv4x) { + if(debug_get_bool_option("NVFX_SIMULATE_NV30", FALSE)) + screen->use_nv4x = 0; + if(!debug_get_bool_option("NVFX_NPOT", TRUE)) + screen->advertise_npot = 0; + if(!debug_get_bool_option("NVFX_BLEND_EQ_SEP", TRUE)) + screen->advertise_blend_equation_separate = 0; + } + + screen->force_swtnl = debug_get_bool_option("NVFX_SWTNL", FALSE); + screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE); + + screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384); + screen->inline_cost_per_hardware_cost = atof(debug_get_option("NVFX_INLINE_COST_PER_HARDWARE_COST", "1.0")); + screen->static_reuse_threshold = atof(debug_get_option("NVFX_STATIC_REUSE_THRESHOLD", "2.0")); + + /* We don't advertise these by default because filtering and blending doesn't work as + * it should, due to several restrictions. + * The only exception is fp16 on nv40. + */ + screen->advertise_fp16 = debug_get_bool_option("NVFX_FP16", !!screen->use_nv4x); + screen->advertise_fp32 = debug_get_bool_option("NVFX_FP32", 0); screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen); /* surely both nv3x and nv44 support index buffers too: find out how and test that */ - if(eng3d_class == NV40TCL) + if(eng3d_class == NV40_3D) screen->index_buffer_reloc_flags = screen->vertex_buffer_reloc_flags; if(!screen->force_swtnl && screen->vertex_buffer_reloc_flags == screen->index_buffer_reloc_flags) @@ -392,8 +510,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* 2D engine setup */ - screen->eng2d = nv04_surface_2d_init(&screen->base); - screen->eng2d->buf = nvfx_surface_buffer; + nvfx_screen_surface_init(pscreen); /* Notifier for sync purposes */ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); @@ -427,8 +544,8 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) LIST_INITHEAD(&screen->query_list); /* Vtxprog resources */ - if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->is_nv4x ? 512 : 256) || - nouveau_resource_init(&screen->vp_data_heap, 0, 256)) { + if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->use_nv4x ? 512 : 256) || + nouveau_resource_init(&screen->vp_data_heap, 0, screen->use_nv4x ? 468 : 256)) { nvfx_screen_destroy(pscreen); return NULL; } @@ -437,25 +554,25 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Static eng3d initialisation */ /* note that we just started using the channel, so we must have space in the pushbuffer */ - OUT_RING(chan, RING_3D(NV34TCL_DMA_NOTIFY, 1)); + OUT_RING(chan, RING_3D(NV30_3D_DMA_NOTIFY, 1)); OUT_RING(chan, screen->sync->handle); - OUT_RING(chan, RING_3D(NV34TCL_DMA_TEXTURE0, 2)); + OUT_RING(chan, RING_3D(NV30_3D_DMA_TEXTURE0, 2)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->gart->handle); - OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR1, 1)); + OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1)); OUT_RING(chan, chan->vram->handle); - OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 2)); + OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 2)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->vram->handle); - OUT_RING(chan, RING_3D(NV34TCL_DMA_VTXBUF0, 2)); + OUT_RING(chan, RING_3D(NV30_3D_DMA_VTXBUF0, 2)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->gart->handle); - OUT_RING(chan, RING_3D(NV34TCL_DMA_FENCE, 2)); + OUT_RING(chan, RING_3D(NV30_3D_DMA_FENCE, 2)); OUT_RING(chan, 0); OUT_RING(chan, screen->query->handle); - OUT_RING(chan, RING_3D(NV34TCL_DMA_IN_MEMORY7, 2)); + OUT_RING(chan, RING_3D(NV30_3D_DMA_UNK1AC, 2)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->vram->handle); diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h index 5e1c3945aef..b1f07187c78 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.h +++ b/src/gallium/drivers/nvfx/nvfx_screen.h @@ -1,11 +1,11 @@ #ifndef __NVFX_SCREEN_H__ #define __NVFX_SCREEN_H__ +#include "pipe/p_compiler.h" #include "util/u_double_list.h" #include "nouveau/nouveau_screen.h" -#include "nv04_surface_2d.h" -struct nvfx_context; +struct pipe_screen; struct nvfx_screen { struct nouveau_screen base; @@ -15,12 +15,17 @@ struct nvfx_screen { struct nvfx_context *cur_ctx; unsigned is_nv4x; /* either 0 or ~0 */ + unsigned use_nv4x; /* either 0 or ~0 */ boolean force_swtnl; + boolean trace_draw; unsigned vertex_buffer_reloc_flags; unsigned index_buffer_reloc_flags; + unsigned advertise_fp16; + unsigned advertise_fp32; + unsigned advertise_npot; + unsigned advertise_blend_equation_separate; /* HW graphics objects */ - struct nv04_surface_2d *eng2d; struct nouveau_grobj *eng3d; struct nouveau_notifier *sync; @@ -32,6 +37,20 @@ struct nvfx_screen { /* Vtxprog resources */ struct nouveau_resource *vp_exec_heap; struct nouveau_resource *vp_data_heap; + + struct nv04_2d_context* eng2d; + + /* Once the amount of bytes drawn from the buffer reaches the updated size times this value, + * we will assume that the buffer will be drawn an huge number of times before the + * next modification + */ + float static_reuse_threshold; + + /* Cost of allocating a buffer in terms of the cost of copying a byte to an hardware buffer */ + unsigned buffer_allocation_cost; + + /* inline_cost/hardware_cost conversion ration */ + float inline_cost_per_hardware_cost; }; static INLINE struct nvfx_screen * @@ -40,4 +59,7 @@ nvfx_screen(struct pipe_screen *screen) return (struct nvfx_screen *)screen; } +int nvfx_screen_surface_init(struct pipe_screen *pscreen); +void nvfx_screen_surface_takedown(struct pipe_screen *pscreen); + #endif diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h index 50830b39164..46406b27940 100644 --- a/src/gallium/drivers/nvfx/nvfx_shader.h +++ b/src/gallium/drivers/nvfx/nvfx_shader.h @@ -1,6 +1,12 @@ #ifndef __NVFX_SHADER_H__ #define __NVFX_SHADER_H__ +#include <stdint.h> + +#include "pipe/p_compiler.h" + +#define NVFX_SWZ_IDENTITY ((3 << 6) | (2 << 4) | (1 << 2) | (0 << 0)) + /* this will resolve to either the NV30 or the NV40 version * depending on the current hardware */ /* unusual, but very fast and compact method */ @@ -71,11 +77,58 @@ /* * Each fragment program opcode appears to be comprised of 4 32-bit values. * - * 0 - Opcode, output reg/mask, ATTRIB source - * 1 - Source 0 - * 2 - Source 1 - * 3 - Source 2 + * 0: OPDEST + * 0: program end + * 1-6: destination register + * 7: destination register is fp16?? (use for outputs) + * 8: set condition code + * 9: writemask x + * 10: writemask y + * 11: writemask z + * 12: writemask w + * 13-16: source attribute register number (e.g. COL0) + * 17-20: texture unit number + * 21: expand value on texture operation (x -> 2x - 1) + * 22-23: precision 0 = fp32, 1 = fp16, 2 = s1.10 fixed, 3 = s0.8 fixed (nv40-only)) + * 24-29: opcode + * 30: no destination + * 31: saturate + * 1 - SRC0 + * 0-17: see common source fields + * 18: execute if condition code less + * 19: execute if condition code equal + * 20: execute if condition code greater + * 21-22: condition code swizzle x source component + * 23-24: condition code swizzle y source component + * 25-26: condition code swizzle z source component + * 27-28: condition code swizzle w source component + * 29: source 0 absolute + * 30: always 0 in renouveau tests + * 31: always 0 in renouveau tests + * 2 - SRC1 + * 0-17: see common source fields + * 18: source 1 absolute + * 19-20: input precision 0 = fp32, 1 = fp16, 2 = s1.10 fixed, 3 = ??? + * 21-27: always 0 in renouveau tests + * 28-30: scale (0 = 1x, 1 = 2x, 2 = 4x, 3 = 8x, 4 = ???, 5, = 1/2, 6 = 1/4, 7 = 1/8) + * 31: opcode is branch + * 3 - SRC2 + * 0-17: see common source fields + * 18: source 2 absolute + * 19-29: address register displacement + * 30: use index register + * 31: disable perspective-correct interpolation? * +* Common fields of 0, 1, 2 - SRC + * 0-1: source register type (0 = temp, 1 = input, 2 = immediate, 3 = ???) + * 2-7: source temp register index + * 8: source register is fp16?? + * 9-10: source swizzle x source component + * 11-12: source swizzle y source component + * 13-14: source swizzle z source component + * 15-16: source swizzle w source component + * 17: negate + * There appears to be no special difference between result regs and temp regs. * result.color == R0.xyzw * result.depth == R1.z @@ -210,6 +263,8 @@ /* NV40 only fragment program opcodes */ #define NVFX_FP_OP_OPCODE_TXL_NV40 0x2F +#define NVFX_FP_OP_OPCODE_LITEX2_NV40 0x3C + /* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ #define NV40_FP_OP_BRA_OPCODE_BRK 0x0 #define NV40_FP_OP_BRA_OPCODE_CAL 0x1 @@ -218,10 +273,11 @@ #define NV40_FP_OP_BRA_OPCODE_REP 0x4 #define NV40_FP_OP_BRA_OPCODE_RET 0x5 +#define NV40_FP_OP_OUT_NONE (1 << 30) #define NVFX_FP_OP_OUT_SAT (1 << 31) /* high order bits of SRC0 */ -#define NVFX_FP_OP_OUT_ABS (1 << 29) +#define NVFX_FP_OP_SRC0_ABS (1 << 29) #define NVFX_FP_OP_COND_SWZ_W_SHIFT 27 #define NVFX_FP_OP_COND_SWZ_W_MASK (3 << 27) #define NVFX_FP_OP_COND_SWZ_Z_SHIFT 25 @@ -254,6 +310,7 @@ #define NVFX_FP_OP_DST_SCALE_INV_2X 5 #define NVFX_FP_OP_DST_SCALE_INV_4X 6 #define NVFX_FP_OP_DST_SCALE_INV_8X 7 +#define NVFX_FP_OP_SRC1_ABS (1 << 18) /* SRC1 LOOP */ #define NV40_FP_OP_LOOP_INCR_SHIFT 19 @@ -263,13 +320,13 @@ #define NV40_FP_OP_LOOP_COUNT_SHIFT 2 #define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) -/* SRC1 IF */ -#define NV40_FP_OP_ELSE_ID_SHIFT 2 -#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) +/* SRC1 IF: absolute offset in dwords */ +#define NV40_FP_OP_ELSE_OFFSET_SHIFT 0 +#define NV40_FP_OP_ELSE_OFFSET_MASK (0x7FFFFFFF << 0) /* SRC1 CAL */ -#define NV40_FP_OP_IADDR_SHIFT 2 -#define NV40_FP_OP_IADDR_MASK (0xFF << 2) +#define NV40_FP_OP_SUB_OFFSET_SHIFT 0 +#define NV40_FP_OP_SUB_OFFSET_MASK (0x7FFFFFFF << 0) /* SRC1 REP * I have no idea why there are 3 count values here.. but they @@ -283,9 +340,9 @@ #define NV40_FP_OP_REP_COUNT3_SHIFT 19 #define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) -/* SRC2 REP/IF */ -#define NV40_FP_OP_END_ID_SHIFT 2 -#define NV40_FP_OP_END_ID_MASK (0xFF << 2) +/* SRC2 REP/IF: absolute offset in dwords */ +#define NV40_FP_OP_END_OFFSET_SHIFT 0 +#define NV40_FP_OP_END_OFFSET_MASK (0x7FFFFFFF << 0) /* high order bits of SRC2 */ #define NVFX_FP_OP_INDEX_INPUT (1 << 30) @@ -322,7 +379,9 @@ #define NVFXSR_OUTPUT 1 #define NVFXSR_INPUT 2 #define NVFXSR_TEMP 3 -#define NVFXSR_CONST 4 +#define NVFXSR_CONST 5 +#define NVFXSR_IMM 6 +#define NVFXSR_RELOCATED 7 #define NVFX_COND_FL 0 #define NVFX_COND_LT 1 @@ -352,51 +411,91 @@ #define NVFX_SWZ_Z 2 #define NVFX_SWZ_W 3 -#define swz(s,x,y,z,w) nvfx_sr_swz((s), NVFX_SWZ_##x, NVFX_SWZ_##y, NVFX_SWZ_##z, NVFX_SWZ_##w) -#define neg(s) nvfx_sr_neg((s)) -#define abs(s) nvfx_sr_abs((s)) -#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v) +#define swz(s,x,y,z,w) nvfx_src_swz((s), NVFX_SWZ_##x, NVFX_SWZ_##y, NVFX_SWZ_##z, NVFX_SWZ_##w) +#define neg(s) nvfx_src_neg((s)) +#define abs(s) nvfx_src_abs((s)) -struct nvfx_sreg { - int type; - int index; +struct nvfx_reg { + int8_t type; + uint32_t index; +}; - int dst_scale; +struct nvfx_src { + struct nvfx_reg reg; - int negate; - int abs; - int swz[4]; + uint8_t indirect : 1; + uint8_t indirect_reg : 1; + uint8_t indirect_swz : 2; + uint8_t negate : 1; + uint8_t abs : 1; + uint8_t swz[4]; +}; - int cc_update; - int cc_update_reg; - int cc_test; - int cc_test_reg; - int cc_swz[4]; +struct nvfx_insn +{ + uint8_t op; + char scale; + int8_t unit; + uint8_t mask; + uint8_t cc_swz[4]; + + uint8_t sat : 1; + uint8_t cc_update : 1; + uint8_t cc_update_reg : 1; + uint8_t cc_test : 3; + uint8_t cc_test_reg : 1; + + struct nvfx_reg dst; + struct nvfx_src src[3]; }; -static INLINE struct nvfx_sreg -nvfx_sr(int type, int index) +static INLINE struct nvfx_insn +nvfx_insn(boolean sat, unsigned op, int unit, struct nvfx_reg dst, unsigned mask, struct nvfx_src s0, struct nvfx_src s1, struct nvfx_src s2) { - struct nvfx_sreg temp = { - .type = type, - .index = index, - .dst_scale = 0, - .abs = 0, - .negate = 0, - .swz = { 0, 1, 2, 3 }, + struct nvfx_insn insn = { + .op = op, + .scale = 0, + .unit = unit, + .sat = sat, + .mask = mask, .cc_update = 0, .cc_update_reg = 0, .cc_test = NVFX_COND_TR, .cc_test_reg = 0, .cc_swz = { 0, 1, 2, 3 }, + .dst = dst, + .src = {s0, s1, s2} + }; + return insn; +} + +static INLINE struct nvfx_reg +nvfx_reg(int type, int index) +{ + struct nvfx_reg temp = { + .type = type, + .index = index, }; return temp; } -static INLINE struct nvfx_sreg -nvfx_sr_swz(struct nvfx_sreg src, int x, int y, int z, int w) +static INLINE struct nvfx_src +nvfx_src(struct nvfx_reg reg) { - struct nvfx_sreg dst = src; + struct nvfx_src temp = { + .reg = reg, + .abs = 0, + .negate = 0, + .swz = { 0, 1, 2, 3 }, + .indirect = 0, + }; + return temp; +} + +static INLINE struct nvfx_src +nvfx_src_swz(struct nvfx_src src, int x, int y, int z, int w) +{ + struct nvfx_src dst = src; dst.swz[NVFX_SWZ_X] = src.swz[x]; dst.swz[NVFX_SWZ_Y] = src.swz[y]; @@ -405,25 +504,23 @@ nvfx_sr_swz(struct nvfx_sreg src, int x, int y, int z, int w) return dst; } -static INLINE struct nvfx_sreg -nvfx_sr_neg(struct nvfx_sreg src) +static INLINE struct nvfx_src +nvfx_src_neg(struct nvfx_src src) { src.negate = !src.negate; return src; } -static INLINE struct nvfx_sreg -nvfx_sr_abs(struct nvfx_sreg src) +static INLINE struct nvfx_src +nvfx_src_abs(struct nvfx_src src) { src.abs = 1; return src; } -static INLINE struct nvfx_sreg -nvfx_sr_scale(struct nvfx_sreg src, int scale) -{ - src.dst_scale = scale; - return src; -} +struct nvfx_relocation { + unsigned location; + unsigned target; +}; #endif diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index 315de492dab..54619037d82 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_framebuffer.h" #include "draw/draw_context.h" @@ -19,26 +20,26 @@ nvfx_blend_state_create(struct pipe_context *pipe, struct nouveau_statebuf_builder sb = sb_init(bso->sb); if (cso->rt[0].blend_enable) { - sb_method(sb, NV34TCL_BLEND_FUNC_ENABLE, 3); + sb_method(sb, NV30_3D_BLEND_FUNC_ENABLE, 3); sb_data(sb, 1); sb_data(sb, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | nvgl_blend_func(cso->rt[0].rgb_src_factor)); sb_data(sb, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | nvgl_blend_func(cso->rt[0].rgb_dst_factor)); if(nvfx->screen->base.device->chipset < 0x40) { - sb_method(sb, NV34TCL_BLEND_EQUATION, 1); + sb_method(sb, NV30_3D_BLEND_EQUATION, 1); sb_data(sb, nvgl_blend_eqn(cso->rt[0].rgb_func)); } else { - sb_method(sb, NV40TCL_BLEND_EQUATION, 1); + sb_method(sb, NV40_3D_BLEND_EQUATION, 1); sb_data(sb, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 | nvgl_blend_eqn(cso->rt[0].rgb_func)); } } else { - sb_method(sb, NV34TCL_BLEND_FUNC_ENABLE, 1); + sb_method(sb, NV30_3D_BLEND_FUNC_ENABLE, 1); sb_data(sb, 0); } - sb_method(sb, NV34TCL_COLOR_MASK, 1); + sb_method(sb, NV30_3D_COLOR_MASK, 1); sb_data(sb, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | @@ -47,15 +48,15 @@ nvfx_blend_state_create(struct pipe_context *pipe, /* TODO: add NV40 MRT color mask */ if (cso->logicop_enable) { - sb_method(sb, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); + sb_method(sb, NV30_3D_COLOR_LOGIC_OP_ENABLE, 2); sb_data(sb, 1); sb_data(sb, nvgl_logicop_func(cso->logicop_func)); } else { - sb_method(sb, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); + sb_method(sb, NV30_3D_COLOR_LOGIC_OP_ENABLE, 1); sb_data(sb, 0); } - sb_method(sb, NV34TCL_DITHER_ENABLE, 1); + sb_method(sb, NV30_3D_DITHER_ENABLE, 1); sb_data(sb, cso->dither ? 1 : 0); bso->sb_len = sb_len(sb, bso->sb); @@ -81,111 +82,6 @@ nvfx_blend_state_delete(struct pipe_context *pipe, void *hwcso) } static void * -nvfx_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_sampler_state *ps; - - ps = MALLOC(sizeof(struct nvfx_sampler_state)); - - /* on nv30, we use this as an internal flag */ - ps->fmt = cso->normalized_coords ? 0 : NV40TCL_TEX_FORMAT_RECT; - ps->en = 0; - ps->filt = nvfx_tex_filter(cso); - ps->wrap = (nvfx_tex_wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | - (nvfx_tex_wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | - (nvfx_tex_wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT) | - nvfx_tex_wrap_compare_mode(cso); - ps->bcol = nvfx_tex_border_color(cso->border_color); - - if(nvfx->is_nv4x) - nv40_sampler_state_init(pipe, ps, cso); - else - nv30_sampler_state_init(pipe, ps, cso); - - return (void *)ps; -} - -static void -nvfx_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - nvfx->tex_sampler[unit] = sampler[unit]; - nvfx->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nvfx->nr_samplers; unit++) { - nvfx->tex_sampler[unit] = NULL; - nvfx->dirty_samplers |= (1 << unit); - } - - nvfx->nr_samplers = nr; - nvfx->dirty |= NVFX_NEW_SAMPLER; -} - -static void -nvfx_sampler_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); -} - -static void -nvfx_set_fragment_sampler_views(struct pipe_context *pipe, - unsigned nr, - struct pipe_sampler_view **views) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit], - views[unit]); - nvfx->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nvfx->nr_textures; unit++) { - pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit], - NULL); - nvfx->dirty_samplers |= (1 << unit); - } - - nvfx->nr_textures = nr; - nvfx->dirty |= NVFX_NEW_SAMPLER; -} - - -static struct pipe_sampler_view * -nvfx_create_sampler_view(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_sampler_view *templ) -{ - struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); - - if (view) { - *view = *templ; - view->reference.count = 1; - view->texture = NULL; - pipe_resource_reference(&view->texture, texture); - view->context = pipe; - } - - return view; -} - - -static void -nvfx_sampler_view_destroy(struct pipe_context *pipe, - struct pipe_sampler_view *view) -{ - pipe_resource_reference(&view->texture, NULL); - FREE(view); -} - -static void * nvfx_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { @@ -195,105 +91,68 @@ nvfx_rasterizer_state_create(struct pipe_context *pipe, /*XXX: ignored: * point_smooth -nohw * multisample + * sprite_coord_origin */ - sb_method(sb, NV34TCL_SHADE_MODEL, 1); - sb_data(sb, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : - NV34TCL_SHADE_MODEL_SMOOTH); + sb_method(sb, NV30_3D_SHADE_MODEL, 1); + sb_data(sb, cso->flatshade ? NV30_3D_SHADE_MODEL_FLAT : + NV30_3D_SHADE_MODEL_SMOOTH); - sb_method(sb, NV34TCL_VERTEX_TWO_SIDE_ENABLE, 1); + sb_method(sb, NV30_3D_VERTEX_TWO_SIDE_ENABLE, 1); sb_data(sb, cso->light_twoside); - sb_method(sb, NV34TCL_LINE_WIDTH, 2); + sb_method(sb, NV30_3D_LINE_WIDTH, 2); sb_data(sb, (unsigned char)(cso->line_width * 8.0) & 0xff); sb_data(sb, cso->line_smooth ? 1 : 0); - sb_method(sb, NV34TCL_LINE_STIPPLE_ENABLE, 2); + sb_method(sb, NV30_3D_LINE_STIPPLE_ENABLE, 2); sb_data(sb, cso->line_stipple_enable ? 1 : 0); sb_data(sb, (cso->line_stipple_pattern << 16) | cso->line_stipple_factor); - sb_method(sb, NV34TCL_POINT_SIZE, 1); + sb_method(sb, NV30_3D_POINT_SIZE, 1); sb_data(sb, fui(cso->point_size)); - sb_method(sb, NV34TCL_POLYGON_MODE_FRONT, 6); - if (cso->front_winding == PIPE_WINDING_CCW) { - sb_data(sb, nvgl_polygon_mode(cso->fill_ccw)); - sb_data(sb, nvgl_polygon_mode(cso->fill_cw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - sb_data(sb, NV34TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_CW: - sb_data(sb, NV34TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_BOTH: - sb_data(sb, NV34TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - sb_data(sb, NV34TCL_CULL_FACE_BACK); - break; - } - sb_data(sb, NV34TCL_FRONT_FACE_CCW); + sb_method(sb, NV30_3D_POLYGON_MODE_FRONT, 6); + sb_data(sb, nvgl_polygon_mode(cso->fill_front)); + sb_data(sb, nvgl_polygon_mode(cso->fill_back)); + switch (cso->cull_face) { + case PIPE_FACE_FRONT: + sb_data(sb, NV30_3D_CULL_FACE_FRONT); + break; + case PIPE_FACE_BACK: + sb_data(sb, NV30_3D_CULL_FACE_BACK); + break; + case PIPE_FACE_FRONT_AND_BACK: + sb_data(sb, NV30_3D_CULL_FACE_FRONT_AND_BACK); + break; + default: + sb_data(sb, NV30_3D_CULL_FACE_BACK); + break; + } + if (cso->front_ccw) { + sb_data(sb, NV30_3D_FRONT_FACE_CCW); } else { - sb_data(sb, nvgl_polygon_mode(cso->fill_cw)); - sb_data(sb, nvgl_polygon_mode(cso->fill_ccw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - sb_data(sb, NV34TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_CW: - sb_data(sb, NV34TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_BOTH: - sb_data(sb, NV34TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - sb_data(sb, NV34TCL_CULL_FACE_BACK); - break; - } - sb_data(sb, NV34TCL_FRONT_FACE_CW); + sb_data(sb, NV30_3D_FRONT_FACE_CW); } sb_data(sb, cso->poly_smooth ? 1 : 0); - sb_data(sb, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + sb_data(sb, (cso->cull_face != PIPE_FACE_NONE) ? 1 : 0); - sb_method(sb, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + sb_method(sb, NV30_3D_POLYGON_STIPPLE_ENABLE, 1); sb_data(sb, cso->poly_stipple_enable ? 1 : 0); - sb_method(sb, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) - sb_data(sb, 1); - else - sb_data(sb, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) - sb_data(sb, 1); - else - sb_data(sb, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) - sb_data(sb, 1); - else - sb_data(sb, 0); - if (cso->offset_cw || cso->offset_ccw) { - sb_method(sb, NV34TCL_POLYGON_OFFSET_FACTOR, 2); + sb_method(sb, NV30_3D_POLYGON_OFFSET_POINT_ENABLE, 3); + sb_data(sb, cso->offset_point); + sb_data(sb, cso->offset_line); + sb_data(sb, cso->offset_tri); + + if (cso->offset_point || cso->offset_line || cso->offset_tri) { + sb_method(sb, NV30_3D_POLYGON_OFFSET_FACTOR, 2); sb_data(sb, fui(cso->offset_scale)); sb_data(sb, fui(cso->offset_units * 2)); } - sb_method(sb, NV34TCL_POINT_SPRITE, 1); - if (cso->point_quad_rasterization) { - unsigned psctl = (1 << 0), i; - - for (i = 0; i < 8; i++) { - if ((cso->sprite_coord_enable >> i) & 1) - psctl |= (1 << (8 + i)); - } - - sb_data(sb, psctl); - } else { - sb_data(sb, 0); - } + sb_method(sb, NV30_3D_FLATSHADE_FIRST, 1); + sb_data(sb, cso->flatshade_first); rsso->pipe = *cso; rsso->sb_len = sb_len(sb, rsso->sb); @@ -314,11 +173,11 @@ nvfx_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) nvfx->draw_dirty |= NVFX_NEW_SCISSOR; } - if(((struct nvfx_rasterizer_state*)hwcso)->pipe.poly_stipple_enable - != nvfx->rasterizer->pipe.poly_stipple_enable) + if(((struct nvfx_rasterizer_state*)hwcso)->pipe.point_quad_rasterization != nvfx->rasterizer->pipe.point_quad_rasterization + || ((struct nvfx_rasterizer_state*)hwcso)->pipe.sprite_coord_enable != nvfx->rasterizer->pipe.sprite_coord_enable + || ((struct nvfx_rasterizer_state*)hwcso)->pipe.sprite_coord_mode != nvfx->rasterizer->pipe.sprite_coord_mode) { - nvfx->dirty |= NVFX_NEW_STIPPLE; - nvfx->draw_dirty |= NVFX_NEW_STIPPLE; + nvfx->dirty |= NVFX_NEW_SPRITE; } } @@ -342,43 +201,41 @@ nvfx_depth_stencil_alpha_state_create(struct pipe_context *pipe, struct nvfx_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); struct nouveau_statebuf_builder sb = sb_init(zsaso->sb); - sb_method(sb, NV34TCL_DEPTH_FUNC, 3); + sb_method(sb, NV30_3D_DEPTH_FUNC, 1); sb_data (sb, nvgl_comparison_op(cso->depth.func)); - sb_data (sb, cso->depth.writemask ? 1 : 0); - sb_data (sb, cso->depth.enabled ? 1 : 0); - sb_method(sb, NV34TCL_ALPHA_FUNC_ENABLE, 3); + sb_method(sb, NV30_3D_ALPHA_FUNC_ENABLE, 3); sb_data (sb, cso->alpha.enabled ? 1 : 0); sb_data (sb, nvgl_comparison_op(cso->alpha.func)); sb_data (sb, float_to_ubyte(cso->alpha.ref_value)); if (cso->stencil[0].enabled) { - sb_method(sb, NV34TCL_STENCIL_FRONT_ENABLE, 3); + sb_method(sb, NV30_3D_STENCIL_ENABLE(0), 3); sb_data (sb, cso->stencil[0].enabled ? 1 : 0); sb_data (sb, cso->stencil[0].writemask); sb_data (sb, nvgl_comparison_op(cso->stencil[0].func)); - sb_method(sb, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4); + sb_method(sb, NV30_3D_STENCIL_FUNC_MASK(0), 4); sb_data (sb, cso->stencil[0].valuemask); sb_data (sb, nvgl_stencil_op(cso->stencil[0].fail_op)); sb_data (sb, nvgl_stencil_op(cso->stencil[0].zfail_op)); sb_data (sb, nvgl_stencil_op(cso->stencil[0].zpass_op)); } else { - sb_method(sb, NV34TCL_STENCIL_FRONT_ENABLE, 1); + sb_method(sb, NV30_3D_STENCIL_ENABLE(0), 1); sb_data (sb, 0); } if (cso->stencil[1].enabled) { - sb_method(sb, NV34TCL_STENCIL_BACK_ENABLE, 3); + sb_method(sb, NV30_3D_STENCIL_ENABLE(1), 3); sb_data (sb, cso->stencil[1].enabled ? 1 : 0); sb_data (sb, cso->stencil[1].writemask); sb_data (sb, nvgl_comparison_op(cso->stencil[1].func)); - sb_method(sb, NV34TCL_STENCIL_BACK_FUNC_MASK, 4); + sb_method(sb, NV30_3D_STENCIL_FUNC_MASK(1), 4); sb_data (sb, cso->stencil[1].valuemask); sb_data (sb, nvgl_stencil_op(cso->stencil[1].fail_op)); sb_data (sb, nvgl_stencil_op(cso->stencil[1].zfail_op)); sb_data (sb, nvgl_stencil_op(cso->stencil[1].zpass_op)); } else { - sb_method(sb, NV34TCL_STENCIL_BACK_ENABLE, 1); + sb_method(sb, NV30_3D_STENCIL_ENABLE(1), 1); sb_data (sb, 0); } @@ -404,76 +261,6 @@ nvfx_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) FREE(zsaso); } -static void * -nvfx_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_vertex_program *vp; - - vp = CALLOC(1, sizeof(struct nvfx_vertex_program)); - vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe); - - return (void *)vp; -} - -static void -nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->vertprog = hwcso; - nvfx->dirty |= NVFX_NEW_VERTPROG; - nvfx->draw_dirty |= NVFX_NEW_VERTPROG; -} - -static void -nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_vertex_program *vp = hwcso; - - draw_delete_vertex_shader(nvfx->draw, vp->draw); - nvfx_vertprog_destroy(nvfx, vp); - FREE((void*)vp->pipe.tokens); - FREE(vp); -} - -static void * -nvfx_fp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nvfx_fragment_program *fp; - - fp = CALLOC(1, sizeof(struct nvfx_fragment_program)); - fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - - tgsi_scan_shader(fp->pipe.tokens, &fp->info); - - return (void *)fp; -} - -static void -nvfx_fp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->fragprog = hwcso; - nvfx->dirty |= NVFX_NEW_FRAGPROG; -} - -static void -nvfx_fp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_fragment_program *fp = hwcso; - - nvfx_fragprog_destroy(nvfx, fp); - FREE((void*)fp->pipe.tokens); - FREE(fp); -} - static void nvfx_set_blend_color(struct pipe_context *pipe, const struct pipe_blend_color *bcol) @@ -506,13 +293,19 @@ nvfx_set_clip_state(struct pipe_context *pipe, } static void +nvfx_set_sample_mask(struct pipe_context *pipe, + unsigned sample_mask) +{ +} + +static void nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct pipe_resource *buf ) { struct nvfx_context *nvfx = nvfx_context(pipe); nvfx->constbuf[shader] = buf; - nvfx->constbuf_nr[shader] = buf->width0 / (4 * sizeof(float)); + nvfx->constbuf_nr[shader] = buf ? (buf->width0 / (4 * sizeof(float))) : 0; if (shader == PIPE_SHADER_VERTEX) { nvfx->dirty |= NVFX_NEW_VERTCONST; @@ -528,7 +321,10 @@ nvfx_set_framebuffer_state(struct pipe_context *pipe, { struct nvfx_context *nvfx = nvfx_context(pipe); - nvfx->framebuffer = *fb; + if(fb) + util_copy_framebuffer_state(&nvfx->framebuffer, fb); + else + util_unreference_framebuffer_state(&nvfx->framebuffer); nvfx->dirty |= NVFX_NEW_FB; } @@ -563,51 +359,6 @@ nvfx_set_viewport_state(struct pipe_context *pipe, nvfx->draw_dirty |= NVFX_NEW_VIEWPORT; } -static void -nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_buffer *vb) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - memcpy(nvfx->vtxbuf, vb, sizeof(*vb) * count); - nvfx->vtxbuf_nr = count; - - nvfx->dirty |= NVFX_NEW_ARRAYS; - nvfx->draw_dirty |= NVFX_NEW_ARRAYS; -} - -static void * -nvfx_vtxelts_state_create(struct pipe_context *pipe, - unsigned num_elements, - const struct pipe_vertex_element *elements) -{ - struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state); - - assert(num_elements < 16); /* not doing fallbacks yet */ - cso->num_elements = num_elements; - memcpy(cso->pipe, elements, num_elements * sizeof(*elements)); - -/* nvfx_vtxelt_construct(cso);*/ - - return (void *)cso; -} - -static void -nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); -} - -static void -nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->vtxelt = hwcso; - nvfx->dirty |= NVFX_NEW_ARRAYS; - /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/ -} - void nvfx_init_state_functions(struct nvfx_context *nvfx) { @@ -615,13 +366,6 @@ nvfx_init_state_functions(struct nvfx_context *nvfx) nvfx->pipe.bind_blend_state = nvfx_blend_state_bind; nvfx->pipe.delete_blend_state = nvfx_blend_state_delete; - nvfx->pipe.create_sampler_state = nvfx_sampler_state_create; - nvfx->pipe.bind_fragment_sampler_states = nvfx_sampler_state_bind; - nvfx->pipe.delete_sampler_state = nvfx_sampler_state_delete; - nvfx->pipe.set_fragment_sampler_views = nvfx_set_fragment_sampler_views; - nvfx->pipe.create_sampler_view = nvfx_create_sampler_view; - nvfx->pipe.sampler_view_destroy = nvfx_sampler_view_destroy; - nvfx->pipe.create_rasterizer_state = nvfx_rasterizer_state_create; nvfx->pipe.bind_rasterizer_state = nvfx_rasterizer_state_bind; nvfx->pipe.delete_rasterizer_state = nvfx_rasterizer_state_delete; @@ -633,26 +377,13 @@ nvfx_init_state_functions(struct nvfx_context *nvfx) nvfx->pipe.delete_depth_stencil_alpha_state = nvfx_depth_stencil_alpha_state_delete; - nvfx->pipe.create_vs_state = nvfx_vp_state_create; - nvfx->pipe.bind_vs_state = nvfx_vp_state_bind; - nvfx->pipe.delete_vs_state = nvfx_vp_state_delete; - - nvfx->pipe.create_fs_state = nvfx_fp_state_create; - nvfx->pipe.bind_fs_state = nvfx_fp_state_bind; - nvfx->pipe.delete_fs_state = nvfx_fp_state_delete; - nvfx->pipe.set_blend_color = nvfx_set_blend_color; nvfx->pipe.set_stencil_ref = nvfx_set_stencil_ref; nvfx->pipe.set_clip_state = nvfx_set_clip_state; + nvfx->pipe.set_sample_mask = nvfx_set_sample_mask; nvfx->pipe.set_constant_buffer = nvfx_set_constant_buffer; nvfx->pipe.set_framebuffer_state = nvfx_set_framebuffer_state; nvfx->pipe.set_polygon_stipple = nvfx_set_polygon_stipple; nvfx->pipe.set_scissor_state = nvfx_set_scissor_state; nvfx->pipe.set_viewport_state = nvfx_set_viewport_state; - - nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create; - nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete; - nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; - - nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers; } diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h index 676d4fd34bb..15e1cbb1986 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.h +++ b/src/gallium/drivers/nvfx/nvfx_state.h @@ -5,11 +5,11 @@ #include "pipe/p_video_state.h" #include "tgsi/tgsi_scan.h" #include "nouveau/nouveau_statebuf.h" +#include "util/u_dynarray.h" +#include "util/u_linkage.h" struct nvfx_vertex_program_exec { uint32_t data[4]; - boolean has_branch_offset; - int const_index; }; struct nvfx_vertex_program_data { @@ -18,19 +18,16 @@ struct nvfx_vertex_program_data { }; struct nvfx_vertex_program { - struct pipe_shader_state pipe; - - struct draw_vertex_shader *draw; - - boolean translated; - - struct pipe_clip_state ucp; + unsigned long long id; struct nvfx_vertex_program_exec *insns; unsigned nr_insns; struct nvfx_vertex_program_data *consts; unsigned nr_consts; + char generic_to_fp_input[256]; + int sprite_fp_input; + struct nouveau_resource *exec; unsigned exec_start; struct nouveau_resource *data; @@ -39,7 +36,24 @@ struct nvfx_vertex_program { uint32_t ir; uint32_t or; - uint32_t clip_ctrl; + int clip_nr; + + struct util_dynarray branch_relocs; + struct util_dynarray const_relocs; +}; + +#define NVFX_VP_FAILED ((struct nvfx_vertex_program*)-1) + +struct nvfx_pipe_vertex_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + unsigned draw_elements; + boolean draw_no_elements; + struct draw_vertex_shader *draw_vs; + struct nvfx_vertex_program* draw_vp; + + struct nvfx_vertex_program* vp; }; struct nvfx_fragment_program_data { @@ -50,15 +64,15 @@ struct nvfx_fragment_program_data { struct nvfx_fragment_program_bo { struct nvfx_fragment_program_bo* next; struct nouveau_bo* bo; + unsigned char* slots; char insn[] __attribute__((aligned(16))); }; struct nvfx_fragment_program { - struct pipe_shader_state pipe; - struct tgsi_shader_info info; - - boolean translated; unsigned samplers; + unsigned point_sprite_control; + unsigned or; + unsigned coord_conventions; uint32_t *insn; int insn_len; @@ -66,13 +80,36 @@ struct nvfx_fragment_program { struct nvfx_fragment_program_data *consts; unsigned nr_consts; + /* the slot at num_slots is for the sprite coordinate, if any */ + unsigned num_slots; /* how many input semantics? */ + unsigned char slot_to_generic[10]; /* semantics */ + unsigned char slot_to_fp_input[11]; /* current assignment of slots for each used semantic */ + struct util_dynarray slot_relocations[11]; + + /* This is reset to progs on any relocation update, and decreases every time we + * move to a new prog due to a constant update + * When this is the same as progs, applying relocations is no longer necessary. + */ + unsigned progs_left_with_obsolete_slot_assignments; + + unsigned long long last_vp_id; + unsigned last_sprite_coord_enable; + uint32_t fp_control; unsigned bo_prog_idx; unsigned prog_size; unsigned progs_per_bo; + unsigned progs; + struct nvfx_fragment_program_bo* fpbo; }; +struct nvfx_pipe_fragment_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + struct nvfx_fragment_program* fps[2]; +}; #endif diff --git a/src/gallium/drivers/nvfx/nvfx_state_blend.c b/src/gallium/drivers/nvfx/nvfx_state_blend.c deleted file mode 100644 index fe34e98364c..00000000000 --- a/src/gallium/drivers/nvfx/nvfx_state_blend.c +++ /dev/null @@ -1,22 +0,0 @@ -#include "nvfx_context.h" - -void -nvfx_state_blend_validate(struct nvfx_context *nvfx) -{ - struct nouveau_channel* chan = nvfx->screen->base.channel; - sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len); -} - -void -nvfx_state_blend_colour_validate(struct nvfx_context *nvfx) -{ - struct nouveau_channel* chan = nvfx->screen->base.channel; - struct pipe_blend_color *bcol = &nvfx->blend_colour; - - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV34TCL_BLEND_COLOR, 1)); - OUT_RING(chan, ((float_to_ubyte(bcol->color[3]) << 24) | - (float_to_ubyte(bcol->color[0]) << 16) | - (float_to_ubyte(bcol->color[1]) << 8) | - (float_to_ubyte(bcol->color[2]) << 0))); -} diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index f91ae19ecd3..501fdd4430c 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -1,43 +1,263 @@ #include "nvfx_context.h" #include "nvfx_state.h" +#include "nvfx_resource.h" #include "draw/draw_context.h" +void +nvfx_state_viewport_validate(struct nvfx_context *nvfx) +{ + struct nouveau_channel *chan = nvfx->screen->base.channel; + struct pipe_viewport_state *vpt = &nvfx->viewport; + + WAIT_RING(chan, 11); + if(nvfx->render_mode == HW) { + OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8)); + OUT_RINGf(chan, vpt->translate[0]); + OUT_RINGf(chan, vpt->translate[1]); + OUT_RINGf(chan, vpt->translate[2]); + OUT_RINGf(chan, vpt->translate[3]); + OUT_RINGf(chan, vpt->scale[0]); + OUT_RINGf(chan, vpt->scale[1]); + OUT_RINGf(chan, vpt->scale[2]); + OUT_RINGf(chan, vpt->scale[3]); + OUT_RING(chan, RING_3D(0x1d78, 1)); + OUT_RING(chan, 1); + } else { + OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8)); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 1.0f); + OUT_RINGf(chan, 1.0f); + OUT_RINGf(chan, 1.0f); + OUT_RINGf(chan, 1.0f); + OUT_RING(chan, RING_3D(0x1d78, 1)); + OUT_RING(chan, nvfx->is_nv4x ? 0x110 : 1); + } +} + +void +nvfx_state_scissor_validate(struct nvfx_context *nvfx) +{ + struct nouveau_channel *chan = nvfx->screen->base.channel; + struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; + struct pipe_scissor_state *s = &nvfx->scissor; + + if ((rast->scissor == 0 && nvfx->state.scissor_enabled == 0)) + return; + nvfx->state.scissor_enabled = rast->scissor; + + WAIT_RING(chan, 3); + OUT_RING(chan, RING_3D(NV30_3D_SCISSOR_HORIZ, 2)); + if (nvfx->state.scissor_enabled) { + OUT_RING(chan, ((s->maxx - s->minx) << 16) | s->minx); + OUT_RING(chan, ((s->maxy - s->miny) << 16) | s->miny); + } else { + OUT_RING(chan, 4096 << 16); + OUT_RING(chan, 4096 << 16); + } +} + +void +nvfx_state_sr_validate(struct nvfx_context *nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + struct pipe_stencil_ref *sr = &nvfx->stencil_ref; + + WAIT_RING(chan, 4); + OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(0), 1)); + OUT_RING(chan, sr->ref_value[0]); + OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(1), 1)); + OUT_RING(chan, sr->ref_value[1]); +} + +void +nvfx_state_blend_colour_validate(struct nvfx_context *nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + struct pipe_blend_color *bcol = &nvfx->blend_colour; + + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV30_3D_BLEND_COLOR, 1)); + OUT_RING(chan, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); +} + +void +nvfx_state_stipple_validate(struct nvfx_context *nvfx) +{ + struct nouveau_channel *chan = nvfx->screen->base.channel; + + WAIT_RING(chan, 33); + OUT_RING(chan, RING_3D(NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32)); + OUT_RINGp(chan, nvfx->stipple, 32); +} + +static void +nvfx_coord_conventions_validate(struct nvfx_context* nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned value = nvfx->hw_fragprog->coord_conventions; + if(value & NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED) + value |= nvfx->framebuffer.height << NV30_3D_COORD_CONVENTIONS_HEIGHT__SHIFT; + + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV30_3D_COORD_CONVENTIONS, 1)); + OUT_RING(chan, value); +} + +static void +nvfx_ucp_validate(struct nvfx_context* nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned enables[7] = + { + 0, + NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0, + NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE1, + NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE1 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE2, + NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE1 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE2 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE3, + NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE1 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE2 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE3 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE4, + NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE1 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE2 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE3 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE4 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE5, + }; + + if(!nvfx->use_vp_clipping) + { + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1)); + OUT_RING(chan, 0); + + WAIT_RING(chan, 6 * 4 + 1); + OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANE(0, 0), nvfx->clip.nr * 4)); + OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4); + } + + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1)); + OUT_RING(chan, enables[nvfx->clip.nr]); +} + +static void +nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned i; + struct nvfx_vertex_program* vp = nvfx->hw_vertprog; + if(nvfx->clip.nr != vp->clip_nr) + { + unsigned idx; + WAIT_RING(chan, 14); + + /* remove last instruction bit */ + if(vp->clip_nr >= 0) + { + idx = vp->nr_insns - 7 + vp->clip_nr; + OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1)); + OUT_RING(chan, vp->exec->start + idx); + OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4)); + OUT_RINGp (chan, vp->insns[idx].data, 4); + } + + /* set last instruction bit */ + idx = vp->nr_insns - 7 + nvfx->clip.nr; + OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1)); + OUT_RING(chan, vp->exec->start + idx); + OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4)); + OUT_RINGp(chan, vp->insns[idx].data, 3); + OUT_RING(chan, vp->insns[idx].data[3] | 1); + vp->clip_nr = nvfx->clip.nr; + } + + // TODO: only do this for the ones changed + WAIT_RING(chan, 6 * 6); + for(i = 0; i < nvfx->clip.nr; ++i) + { + OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5)); + OUT_RING(chan, vp->data->start + i); + OUT_RINGp (chan, nvfx->clip.ucp[i], 4); + } +} + static boolean nvfx_state_validate_common(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; - unsigned dirty = nvfx->dirty; + unsigned dirty; + unsigned still_dirty = 0; + int new_fb_mode = -1; /* 1 = all swizzled, 0 = make all linear */ + boolean flush_tex_cache = FALSE; + unsigned render_temps; if(nvfx != nvfx->screen->cur_ctx) - dirty = ~0; + { + nvfx->dirty = ~0; + nvfx->hw_vtxelt_nr = 16; + nvfx->hw_pointsprite_control = -1; + nvfx->hw_vp_output = -1; + nvfx->screen->cur_ctx = nvfx; + nvfx->relocs_needed = NVFX_RELOCATE_ALL; + } + + /* These can trigger use the of 3D engine to copy temporaries. + * That will recurse here and thus dirty all 3D state, so we need to this before anything else, and in a loop.. + * This converges to having clean temps, then binding both fragtexes and framebuffers. + */ + while(nvfx->dirty & (NVFX_NEW_FB | NVFX_NEW_SAMPLER)) + { + if(nvfx->dirty & NVFX_NEW_SAMPLER) + { + nvfx->dirty &=~ NVFX_NEW_SAMPLER; + nvfx_fragtex_validate(nvfx); + + // TODO: only set this if really necessary + flush_tex_cache = TRUE; + } + + if(nvfx->dirty & NVFX_NEW_FB) + { + nvfx->dirty &=~ NVFX_NEW_FB; + new_fb_mode = nvfx_framebuffer_prepare(nvfx); + + // TODO: make sure this doesn't happen, i.e. fbs have matching formats + assert(new_fb_mode >= 0); + } + } + + dirty = nvfx->dirty; if(nvfx->render_mode == HW) { - if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_VERTCONST | NVFX_NEW_UCP)) + if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_VERTCONST)) { if(!nvfx_vertprog_validate(nvfx)) return FALSE; } - if(dirty & (NVFX_NEW_ARRAYS)) + if(dirty & NVFX_NEW_ARRAYS) { if(!nvfx_vbo_validate(nvfx)) return FALSE; } + + if(dirty & NVFX_NEW_INDEX) + { + if(nvfx->use_index_buffer) + nvfx_idxbuf_validate(nvfx); + else + still_dirty = NVFX_NEW_INDEX; + } } else { - /* TODO: this looks a bit misdesigned */ - if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP)) - nvfx_vertprog_validate(nvfx); - - if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG)) - nvfx_vtxfmt_validate(nvfx); + if(dirty & NVFX_NEW_VERTPROG) { + assert(nvfx_vertprog_validate(nvfx)); + nvfx_vbo_swtnl_validate(nvfx); + } } - if(dirty & NVFX_NEW_FB) - nvfx_state_framebuffer_validate(nvfx); - if(dirty & NVFX_NEW_RAST) sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len); @@ -47,11 +267,35 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(dirty & NVFX_NEW_STIPPLE) nvfx_state_stipple_validate(nvfx); - if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST)) + if(nvfx->dirty & NVFX_NEW_UCP) + nvfx_ucp_validate(nvfx); + + if(nvfx->use_vp_clipping && (nvfx->dirty & (NVFX_NEW_UCP | NVFX_NEW_VERTPROG))) + nvfx_vertprog_ucp_validate(nvfx); + + if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST | NVFX_NEW_VERTPROG | NVFX_NEW_SPRITE)) + { nvfx_fragprog_validate(nvfx); + if(dirty & NVFX_NEW_FRAGPROG) + flush_tex_cache = TRUE; // TODO: do we need this? + } - if(dirty & NVFX_NEW_SAMPLER) - nvfx_fragtex_validate(nvfx); + if(nvfx->is_nv4x) + { + unsigned vp_output = nvfx->hw_vertprog->or | nvfx->hw_fragprog->or; + vp_output |= (1 << (nvfx->clip.nr + 6)) - (1 << 6); + + if(vp_output != nvfx->hw_vp_output) + { + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV40_3D_VP_RESULT_EN, 1)); + OUT_RING(chan, vp_output); + nvfx->hw_vp_output = vp_output; + } + } + + if(new_fb_mode >= 0) + nvfx_framebuffer_validate(nvfx, new_fb_mode); if(dirty & NVFX_NEW_BLEND) sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len); @@ -65,31 +309,69 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(dirty & NVFX_NEW_SR) nvfx_state_sr_validate(nvfx); -/* Having this depend on FB looks wrong, but it seems - necessary to make this work on nv3x +/* All these dependencies are wrong, but otherwise + etracer, neverball, foobillard, glest totally misrender TODO: find the right fix */ - if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_FB)) + if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_RAST | NVFX_NEW_ZSA) || (new_fb_mode >= 0)) + { nvfx_state_viewport_validate(nvfx); + } - /* TODO: could nv30 need this or something similar too? */ - if((dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SAMPLER)) && nvfx->is_nv4x) { - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); - OUT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); - OUT_RING(chan, 1); + if(dirty & NVFX_NEW_ZSA || (new_fb_mode >= 0)) + { + WAIT_RING(chan, 3); + OUT_RING(chan, RING_3D(NV30_3D_DEPTH_WRITE_ENABLE, 2)); + OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.writemask); + OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled); + } + + if((new_fb_mode >= 0) || (dirty & NVFX_NEW_FRAGPROG)) + nvfx_coord_conventions_validate(nvfx); + + if(flush_tex_cache) + { + // TODO: what about nv30? + if(nvfx->is_nv4x) + { + WAIT_RING(chan, 4); + OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1)); + OUT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1)); + OUT_RING(chan, 1); + } + } + + nvfx->dirty = dirty & still_dirty; + + render_temps = nvfx->state.render_temps; + if(render_temps) + { + for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i) + { + if(render_temps & (1 << i)) { + assert(((struct nvfx_surface*)nvfx->framebuffer.cbufs[i])->temp); + util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]), + (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]); + } + } + + if(render_temps & 0x80) { + assert(((struct nvfx_surface*)nvfx->framebuffer.zsbuf)->temp); + util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf), + (struct util_dirty_surface*)nvfx->framebuffer.zsbuf); + } } - nvfx->dirty = 0; + return TRUE; } -void -nvfx_state_emit(struct nvfx_context *nvfx) +inline void +nvfx_state_relocate(struct nvfx_context *nvfx, unsigned relocs) { struct nouveau_channel* chan = nvfx->screen->base.channel; /* we need to ensure there is enough space to output relocations in one go */ - unsigned max_relocs = 0 + const unsigned max_relocs = 0 + 16 /* vertex buffers, incl. dma flag */ + 2 /* index buffer plus format+dma flag */ + 2 * 5 /* 4 cbufs + zsbuf, plus dma objects */ @@ -97,25 +379,24 @@ nvfx_state_emit(struct nvfx_context *nvfx) + 2 * 4 /* vertex textures plus format+dma flag */ + 1 /* fragprog incl dma flag */ ; + MARK_RING(chan, max_relocs * 2, max_relocs * 2); - nvfx_state_relocate(nvfx); -} -void -nvfx_state_relocate(struct nvfx_context *nvfx) -{ - nvfx_framebuffer_relocate(nvfx); - nvfx_fragtex_relocate(nvfx); - nvfx_fragprog_relocate(nvfx); - if (nvfx->render_mode == HW) + if(relocs & NVFX_RELOCATE_FRAMEBUFFER) + nvfx_framebuffer_relocate(nvfx); + if(relocs & NVFX_RELOCATE_FRAGTEX) + nvfx_fragtex_relocate(nvfx); + if(relocs & NVFX_RELOCATE_FRAGPROG) + nvfx_fragprog_relocate(nvfx); + if(relocs & NVFX_RELOCATE_VTXBUF) nvfx_vbo_relocate(nvfx); + if(relocs & NVFX_RELOCATE_IDXBUF) + nvfx_idxbuf_relocate(nvfx); } boolean nvfx_state_validate(struct nvfx_context *nvfx) { - boolean was_sw = nvfx->fallback_swtnl ? TRUE : FALSE; - if (nvfx->render_mode != HW) { /* Don't even bother trying to go back to hw if none * of the states that caused swtnl previously have changed. @@ -134,9 +415,6 @@ nvfx_state_validate(struct nvfx_context *nvfx) if(!nvfx_state_validate_common(nvfx)) return FALSE; - if (was_sw) - NOUVEAU_ERR("swtnl->hw\n"); - return TRUE; } @@ -147,7 +425,11 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx) /* Setup for swtnl */ if (nvfx->render_mode == HW) { - NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl); + static boolean warned = FALSE; + if(!warned) { + NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl); + warned = TRUE; + } nvfx->pipe.flush(&nvfx->pipe, 0, NULL); nvfx->dirty |= (NVFX_NEW_VIEWPORT | NVFX_NEW_VERTPROG | @@ -155,8 +437,11 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx) nvfx->render_mode = SWTNL; } - if (nvfx->draw_dirty & NVFX_NEW_VERTPROG) - draw_bind_vertex_shader(draw, nvfx->vertprog->draw); + if (nvfx->draw_dirty & NVFX_NEW_VERTPROG) { + if(!nvfx->vertprog->draw_vs) + nvfx->vertprog->draw_vs = draw_create_vertex_shader(draw, &nvfx->vertprog->pipe); + draw_bind_vertex_shader(draw, nvfx->vertprog->draw_vs); + } if (nvfx->draw_dirty & NVFX_NEW_RAST) draw_set_rasterizer_state(draw, &nvfx->rasterizer->pipe, @@ -173,6 +458,9 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx) draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe); } + if (nvfx->draw_dirty & NVFX_NEW_INDEX) + draw_set_index_buffer(draw, &nvfx->idxbuf); + nvfx_state_validate_common(nvfx); nvfx->draw_dirty = 0; diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c index 360e569f77c..30e48c80735 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_fb.c +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -1,21 +1,57 @@ #include "nvfx_context.h" #include "nvfx_resource.h" -#include "nouveau/nouveau_util.h" +#include "util/u_format.h" +static inline boolean +nvfx_surface_linear_renderable(struct pipe_surface* surf) +{ + /* TODO: precompute this in nvfx_surface creation */ + return (surf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR) + && !(surf->offset & 63) + && !(((struct nvfx_surface*)surf)->pitch & 63); +} +static inline boolean +nvfx_surface_swizzled_renderable(struct pipe_framebuffer_state* fb, struct pipe_surface* surf) +{ + /* TODO: precompute this in nvfx_surface creation */ + return !((struct nvfx_miptree*)surf->texture)->linear_pitch + && (surf->texture->target != PIPE_TEXTURE_3D || u_minify(surf->texture->depth0, surf->level) <= 1) + && !(surf->offset & 127) + && (surf->width == fb->width) + && (surf->height == fb->height) + && !((struct nvfx_surface*)surf)->temp + && (surf->format == PIPE_FORMAT_B8G8R8A8_UNORM || surf->format == PIPE_FORMAT_B8G8R8X8_UNORM || surf->format == PIPE_FORMAT_B5G6R5_UNORM); +} -void -nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) +static boolean +nvfx_surface_get_render_target(struct pipe_surface* surf, int all_swizzled, struct nvfx_render_target* target) +{ + struct nvfx_surface* ns = (struct nvfx_surface*)surf; + if(!ns->temp) + { + target->bo = ((struct nvfx_miptree*)surf->texture)->base.bo; + target->offset = surf->offset; + target->pitch = align(ns->pitch, 64); + assert(target->pitch); + return FALSE; + } + else + { + target->offset = 0; + target->pitch = ns->temp->linear_pitch; + target->bo = ns->temp->base.bo; + assert(target->pitch); + return TRUE; + } +} + +int +nvfx_framebuffer_prepare(struct nvfx_context *nvfx) { struct pipe_framebuffer_state *fb = &nvfx->framebuffer; - struct nouveau_channel *chan = nvfx->screen->base.channel; - uint32_t rt_enable = 0, rt_format = 0; - int i, colour_format = 0, zeta_format = 0; - int depth_only = 0; - unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; - unsigned w = fb->width; - unsigned h = fb->height; - int colour_bits = 32, zeta_bits = 32; + int i, color_format = 0, zeta_format = 0; + int all_swizzled = 1; if(!nvfx->is_nv4x) assert(fb->nr_cbufs <= 2); @@ -23,130 +59,166 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) assert(fb->nr_cbufs <= 4); for (i = 0; i < fb->nr_cbufs; i++) { - if (colour_format) - assert(colour_format == fb->cbufs[i]->format); - else - colour_format = fb->cbufs[i]->format; - - rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); - nvfx->hw_rt[i].bo = nvfx_surface_buffer(fb->cbufs[i]); - nvfx->hw_rt[i].offset = fb->cbufs[i]->offset; - nvfx->hw_rt[i].pitch = ((struct nv04_surface *)fb->cbufs[i])->pitch; + if (color_format) { + if(color_format != fb->cbufs[i]->format) + return -1; + } else + color_format = fb->cbufs[i]->format; + + if(!nvfx_surface_swizzled_renderable(fb, fb->cbufs[i])) + all_swizzled = 0; + } + + if (fb->zsbuf) { + /* TODO: return FALSE if we have a format not supporting a depth buffer (e.g. r8); currently those are not supported at all */ + if(!nvfx_surface_swizzled_renderable(fb, fb->zsbuf)) + all_swizzled = 0; + + if(all_swizzled && util_format_get_blocksize(color_format) != util_format_get_blocksize(zeta_format)) + all_swizzled = 0; + } + + for (i = 0; i < fb->nr_cbufs; i++) { + if(!((struct nvfx_surface*)fb->cbufs[i])->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->cbufs[i])) + nvfx_surface_create_temp(&nvfx->pipe, fb->cbufs[i]); } + + if(fb->zsbuf) { + if(!((struct nvfx_surface*)fb->zsbuf)->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->zsbuf)) + nvfx_surface_create_temp(&nvfx->pipe, fb->zsbuf); + } + + return all_swizzled; +} + +void +nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) +{ + struct pipe_framebuffer_state *fb = &nvfx->framebuffer; + struct nouveau_channel *chan = nvfx->screen->base.channel; + uint32_t rt_enable, rt_format; + int i; + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + unsigned w = fb->width; + unsigned h = fb->height; + + rt_enable = (NV30_3D_RT_ENABLE_COLOR0 << fb->nr_cbufs) - 1; + if (rt_enable & (NV30_3D_RT_ENABLE_COLOR1 | + NV40_3D_RT_ENABLE_COLOR2 | NV40_3D_RT_ENABLE_COLOR3)) + rt_enable |= NV30_3D_RT_ENABLE_MRT; + + nvfx->state.render_temps = 0; + + for (i = 0; i < fb->nr_cbufs; i++) + nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->cbufs[i], prepare_result, &nvfx->hw_rt[i]) << i; + for(; i < 4; ++i) nvfx->hw_rt[i].bo = 0; - if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 | - NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) - rt_enable |= NV34TCL_RT_ENABLE_MRT; - if (fb->zsbuf) { - zeta_format = fb->zsbuf->format; - nvfx->hw_zeta.bo = nvfx_surface_buffer(fb->zsbuf); - nvfx->hw_zeta.offset = fb->zsbuf->offset; - nvfx->hw_zeta.pitch = ((struct nv04_surface *)fb->zsbuf)->pitch; + nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->zsbuf, prepare_result, &nvfx->hw_zeta) << 7; + + assert(util_format_get_stride(fb->zsbuf->format, fb->width) <= nvfx->hw_zeta.pitch); + assert(nvfx->hw_zeta.offset + nvfx->hw_zeta.pitch * fb->height <= nvfx->hw_zeta.bo->size); } - else - nvfx->hw_zeta.bo = 0; - - if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0 | NV34TCL_RT_ENABLE_COLOR1 | - NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) { - /* Render to at least a colour buffer */ - if (!(fb->cbufs[0]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - for (i = 1; i < fb->nr_cbufs; i++) - assert(!(fb->cbufs[i]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)); - - rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(fb->cbufs[0]->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(fb->cbufs[0]->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + + if (prepare_result) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + + rt_format = NV30_3D_RT_FORMAT_TYPE_SWIZZLED | + (util_logbase2(fb->width) << NV30_3D_RT_FORMAT_LOG2_WIDTH__SHIFT) | + (util_logbase2(fb->height) << NV30_3D_RT_FORMAT_LOG2_HEIGHT__SHIFT); + } else + rt_format = NV30_3D_RT_FORMAT_TYPE_LINEAR; + + if(fb->nr_cbufs > 0) { + switch (fb->cbufs[0]->format) { + case PIPE_FORMAT_B8G8R8X8_UNORM: + rt_format |= NV30_3D_RT_FORMAT_COLOR_X8R8G8B8; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + case 0: + rt_format |= NV30_3D_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R8G8B8X8_UNORM: + rt_format |= NV30_3D_RT_FORMAT_COLOR_X8B8G8R8; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + rt_format |= NV30_3D_RT_FORMAT_COLOR_A8B8G8R8; + break; + case PIPE_FORMAT_B5G6R5_UNORM: + rt_format |= NV30_3D_RT_FORMAT_COLOR_R5G6B5; + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + rt_format |= NV30_3D_RT_FORMAT_COLOR_A32B32G32R32_FLOAT; + break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + rt_format |= NV30_3D_RT_FORMAT_COLOR_A16B16G16R16_FLOAT; + break; + default: + assert(0); } - else - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; - } else if (fb->zsbuf) { - depth_only = 1; - - /* Render to depth buffer only */ - if (!(fb->zsbuf->texture->usage & NVFX_RESOURCE_FLAG_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - - rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(fb->zsbuf->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(fb->zsbuf->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + } else if(fb->zsbuf && util_format_get_blocksize(fb->zsbuf->format) == 2) + rt_format |= NV30_3D_RT_FORMAT_COLOR_R5G6B5; + else + rt_format |= NV30_3D_RT_FORMAT_COLOR_A8R8G8B8; + + if(fb->zsbuf) { + switch (fb->zsbuf->format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV30_3D_RT_FORMAT_ZETA_Z16; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case 0: + rt_format |= NV30_3D_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); } - else - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; - } else { - return; - } + } else if(fb->nr_cbufs && util_format_get_blocksize(fb->cbufs[0]->format) == 2) + rt_format |= NV30_3D_RT_FORMAT_ZETA_Z16; + else + rt_format |= NV30_3D_RT_FORMAT_ZETA_Z24S8; - switch (colour_format) { - case PIPE_FORMAT_B8G8R8X8_UNORM: - rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_B5G6R5_UNORM: - rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; - colour_bits = 16; - break; - default: - assert(0); - } + MARK_RING(chan, 42, 10); - switch (zeta_format) { - case PIPE_FORMAT_Z16_UNORM: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; - zeta_bits = 16; - break; - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; - break; - default: - assert(0); - } + if ((rt_enable & NV30_3D_RT_ENABLE_COLOR0) || fb->zsbuf) { + struct nvfx_render_target *rt0 = &nvfx->hw_rt[0]; + uint32_t pitch; - if ((!nvfx->is_nv4x) && colour_bits > zeta_bits) { - /* TODO: does this limitation really exist? - TODO: can it be worked around somehow? */ - assert(0); - } + if(!(rt_enable & NV30_3D_RT_ENABLE_COLOR0)) + rt0 = &nvfx->hw_zeta; - if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) - || ((!nvfx->is_nv4x) && depth_only)) { - struct nvfx_render_target *rt0 = (depth_only ? &nvfx->hw_zeta : &nvfx->hw_rt[0]); - uint32_t pitch = rt0->pitch; + pitch = rt0->pitch; if(!nvfx->is_nv4x) { - if (nvfx->hw_zeta.bo) { + if (nvfx->hw_zeta.bo) pitch |= (nvfx->hw_zeta.pitch << 16); - } else { + else pitch |= (pitch << 16); - } } - OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 1)); + //printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch); + + OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 1)); OUT_RELOC(chan, rt0->bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV34TCL_COLOR0_PITCH, 2)); + OUT_RING(chan, RING_3D(NV30_3D_COLOR0_PITCH, 2)); OUT_RING(chan, pitch); OUT_RELOC(chan, rt0->bo, rt0->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); } - if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { - OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR1, 1)); + if (rt_enable & NV30_3D_RT_ENABLE_COLOR1) { + OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1)); OUT_RELOC(chan, nvfx->hw_rt[1].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV34TCL_COLOR1_OFFSET, 2)); + OUT_RING(chan, RING_3D(NV30_3D_COLOR1_OFFSET, 2)); OUT_RELOC(chan, nvfx->hw_rt[1].bo, nvfx->hw_rt[1].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); @@ -155,69 +227,72 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) if(nvfx->is_nv4x) { - if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { - OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR2, 1)); + if (rt_enable & NV40_3D_RT_ENABLE_COLOR2) { + OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR2, 1)); OUT_RELOC(chan, nvfx->hw_rt[2].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV40TCL_COLOR2_OFFSET, 1)); + OUT_RING(chan, RING_3D(NV40_3D_COLOR2_OFFSET, 1)); OUT_RELOC(chan, nvfx->hw_rt[2].bo, nvfx->hw_rt[2].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - OUT_RING(chan, RING_3D(NV40TCL_COLOR2_PITCH, 1)); + OUT_RING(chan, RING_3D(NV40_3D_COLOR2_PITCH, 1)); OUT_RING(chan, nvfx->hw_rt[2].pitch); } - if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { - OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR3, 1)); + if (rt_enable & NV40_3D_RT_ENABLE_COLOR3) { + OUT_RING(chan, RING_3D(NV40_3D_DMA_COLOR3, 1)); OUT_RELOC(chan, nvfx->hw_rt[3].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV40TCL_COLOR3_OFFSET, 1)); + OUT_RING(chan, RING_3D(NV40_3D_COLOR3_OFFSET, 1)); OUT_RELOC(chan, nvfx->hw_rt[3].bo, nvfx->hw_rt[3].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - OUT_RING(chan, RING_3D(NV40TCL_COLOR3_PITCH, 1)); + OUT_RING(chan, RING_3D(NV40_3D_COLOR3_PITCH, 1)); OUT_RING(chan, nvfx->hw_rt[3].pitch); } } - if (zeta_format) { - OUT_RING(chan, RING_3D(NV34TCL_DMA_ZETA, 1)); + if (fb->zsbuf) { + OUT_RING(chan, RING_3D(NV30_3D_DMA_ZETA, 1)); OUT_RELOC(chan, nvfx->hw_zeta.bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - OUT_RING(chan, RING_3D(NV34TCL_ZETA_OFFSET, 1)); + OUT_RING(chan, RING_3D(NV30_3D_ZETA_OFFSET, 1)); /* TODO: reverse engineer LMA */ OUT_RELOC(chan, nvfx->hw_zeta.bo, nvfx->hw_zeta.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); if(nvfx->is_nv4x) { - OUT_RING(chan, RING_3D(NV40TCL_ZETA_PITCH, 1)); + OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1)); OUT_RING(chan, nvfx->hw_zeta.pitch); } } + else if(nvfx->is_nv4x) { + OUT_RING(chan, RING_3D(NV40_3D_ZETA_PITCH, 1)); + OUT_RING(chan, 64); + } - OUT_RING(chan, RING_3D(NV34TCL_RT_ENABLE, 1)); + OUT_RING(chan, RING_3D(NV30_3D_RT_ENABLE, 1)); OUT_RING(chan, rt_enable); - OUT_RING(chan, RING_3D(NV34TCL_RT_HORIZ, 3)); + OUT_RING(chan, RING_3D(NV30_3D_RT_HORIZ, 3)); OUT_RING(chan, (w << 16) | 0); OUT_RING(chan, (h << 16) | 0); OUT_RING(chan, rt_format); - OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_HORIZ, 2)); + OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_HORIZ, 2)); OUT_RING(chan, (w << 16) | 0); OUT_RING(chan, (h << 16) | 0); - OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2)); + OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_CLIP_HORIZ(0), 2)); OUT_RING(chan, ((w - 1) << 16) | 0); OUT_RING(chan, ((h - 1) << 16) | 0); - OUT_RING(chan, RING_3D(0x1d88, 1)); - OUT_RING(chan, (1 << 12) | h); if(!nvfx->is_nv4x) { /* Wonder why this is needed, context should all be set to zero on init */ /* TODO: we can most likely remove this, after putting it in context init */ - OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TX_ORIGIN, 1)); + OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TX_ORIGIN, 1)); OUT_RING(chan, 0); } + nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER; } void @@ -230,21 +305,22 @@ nvfx_framebuffer_relocate(struct nvfx_context *nvfx) #define DO_(var, pfx, name) \ if(var.bo) { \ - OUT_RELOC(chan, var.bo, RING_3D(pfx##TCL_DMA_##name, 1), rt_flags, 0, 0); \ + OUT_RELOC(chan, var.bo, RING_3D(pfx##_3D_DMA_##name, 1), rt_flags, 0, 0); \ OUT_RELOC(chan, var.bo, 0, \ rt_flags | NOUVEAU_BO_OR, \ chan->vram->handle, chan->gart->handle); \ - OUT_RELOC(chan, var.bo, RING_3D(pfx##TCL_##name##_OFFSET, 1), rt_flags, 0, 0); \ + OUT_RELOC(chan, var.bo, RING_3D(pfx##_3D_##name##_OFFSET, 1), rt_flags, 0, 0); \ OUT_RELOC(chan, var.bo, \ var.offset, rt_flags | NOUVEAU_BO_LOW, \ 0, 0); \ } #define DO(pfx, num) DO_(nvfx->hw_rt[num], pfx, COLOR##num) - DO(NV34, 0); - DO(NV34, 1); + DO(NV30, 0); + DO(NV30, 1); DO(NV40, 2); DO(NV40, 3); - DO_(nvfx->hw_zeta, NV34, ZETA); + DO_(nvfx->hw_zeta, NV30, ZETA); + nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER; } diff --git a/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c b/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c deleted file mode 100644 index 7f14ae85d5a..00000000000 --- a/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c +++ /dev/null @@ -1,9 +0,0 @@ -#include "nvfx_context.h" - -void -nvfx_state_rasterizer_validate(struct nvfx_context *nvfx) -{ - struct nouveau_channel* chan = nvfx->screen->base.channel; - sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len); -} - diff --git a/src/gallium/drivers/nvfx/nvfx_state_scissor.c b/src/gallium/drivers/nvfx/nvfx_state_scissor.c deleted file mode 100644 index 9077266120a..00000000000 --- a/src/gallium/drivers/nvfx/nvfx_state_scissor.c +++ /dev/null @@ -1,23 +0,0 @@ -#include "nvfx_context.h" - -void -nvfx_state_scissor_validate(struct nvfx_context *nvfx) -{ - struct nouveau_channel *chan = nvfx->screen->base.channel; - struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; - struct pipe_scissor_state *s = &nvfx->scissor; - - if ((rast->scissor == 0 && nvfx->state.scissor_enabled == 0)) - return; - nvfx->state.scissor_enabled = rast->scissor; - - WAIT_RING(chan, 3); - OUT_RING(chan, RING_3D(NV34TCL_SCISSOR_HORIZ, 2)); - if (nvfx->state.scissor_enabled) { - OUT_RING(chan, ((s->maxx - s->minx) << 16) | s->minx); - OUT_RING(chan, ((s->maxy - s->miny) << 16) | s->miny); - } else { - OUT_RING(chan, 4096 << 16); - OUT_RING(chan, 4096 << 16); - } -} diff --git a/src/gallium/drivers/nvfx/nvfx_state_stipple.c b/src/gallium/drivers/nvfx/nvfx_state_stipple.c deleted file mode 100644 index 4da968f093f..00000000000 --- a/src/gallium/drivers/nvfx/nvfx_state_stipple.c +++ /dev/null @@ -1,26 +0,0 @@ -#include "nvfx_context.h" - -void -nvfx_state_stipple_validate(struct nvfx_context *nvfx) -{ - struct nouveau_channel *chan = nvfx->screen->base.channel; - struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; - - if ((rast->poly_stipple_enable == 0 && nvfx->state.stipple_enabled == 0)) - return; - - if (rast->poly_stipple_enable) { - unsigned i; - - WAIT_RING(chan, 35); - OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_ENABLE, 1)); - OUT_RING(chan, 1); - OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32)); - for (i = 0; i < 32; i++) - OUT_RING(chan, nvfx->stipple[i]); - } else { - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_ENABLE, 1)); - OUT_RING(chan, 0); - } -} diff --git a/src/gallium/drivers/nvfx/nvfx_state_viewport.c b/src/gallium/drivers/nvfx/nvfx_state_viewport.c deleted file mode 100644 index e983b16f321..00000000000 --- a/src/gallium/drivers/nvfx/nvfx_state_viewport.c +++ /dev/null @@ -1,35 +0,0 @@ -#include "nvfx_context.h" - -void -nvfx_state_viewport_validate(struct nvfx_context *nvfx) -{ - struct nouveau_channel *chan = nvfx->screen->base.channel; - struct pipe_viewport_state *vpt = &nvfx->viewport; - - WAIT_RING(chan, 11); - if(nvfx->render_mode == HW) { - OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TRANSLATE_X, 8)); - OUT_RINGf(chan, vpt->translate[0]); - OUT_RINGf(chan, vpt->translate[1]); - OUT_RINGf(chan, vpt->translate[2]); - OUT_RINGf(chan, vpt->translate[3]); - OUT_RINGf(chan, vpt->scale[0]); - OUT_RINGf(chan, vpt->scale[1]); - OUT_RINGf(chan, vpt->scale[2]); - OUT_RINGf(chan, vpt->scale[3]); - OUT_RING(chan, RING_3D(0x1d78, 1)); - OUT_RING(chan, 1); - } else { - OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TRANSLATE_X, 8)); - OUT_RINGf(chan, 0.0f); - OUT_RINGf(chan, 0.0f); - OUT_RINGf(chan, 0.0f); - OUT_RINGf(chan, 0.0f); - OUT_RINGf(chan, 1.0f); - OUT_RINGf(chan, 1.0f); - OUT_RINGf(chan, 1.0f); - OUT_RINGf(chan, 1.0f); - OUT_RING(chan, RING_3D(0x1d78, 1)); - OUT_RING(chan, nvfx->is_nv4x ? 0x110 : 1); - } -} diff --git a/src/gallium/drivers/nvfx/nvfx_state_zsa.c b/src/gallium/drivers/nvfx/nvfx_state_zsa.c deleted file mode 100644 index 608605d32bd..00000000000 --- a/src/gallium/drivers/nvfx/nvfx_state_zsa.c +++ /dev/null @@ -1,21 +0,0 @@ -#include "nvfx_context.h" - -void -nvfx_state_zsa_validate(struct nvfx_context *nvfx) -{ - struct nouveau_channel* chan = nvfx->screen->base.channel; - sb_emit(chan, nvfx->zsa->sb, nvfx->zsa->sb_len); -} - -void -nvfx_state_sr_validate(struct nvfx_context *nvfx) -{ - struct nouveau_channel* chan = nvfx->screen->base.channel; - struct pipe_stencil_ref *sr = &nvfx->stencil_ref; - - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(NV34TCL_STENCIL_FRONT_FUNC_REF, 1)); - OUT_RING(chan, sr->ref_value[0]); - OUT_RING(chan, RING_3D(NV34TCL_STENCIL_BACK_FUNC_REF, 1)); - OUT_RING(chan, sr->ref_value[1]); -} diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index 2e115650aeb..70adebc1be5 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -26,36 +26,485 @@ * **************************************************************************/ +#include "pipe/p_context.h" +#include "pipe/p_format.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" +#include "util/u_blitter.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_screen.h" #include "nvfx_context.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" +#include "nvfx_screen.h" +#include "nvfx_resource.h" +#include "nv04_2d.h" + +#include <nouveau/nouveau_bo.h> + +static INLINE void +nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format) +{ + unsigned bits = util_format_get_blocksizebits(format); + unsigned shift = 0; + rgn->one_bits = 0; + + switch(bits) + { + case 8: + rgn->bpps = 0; + break; + case 16: + rgn->bpps = 1; + if(format == PIPE_FORMAT_B5G5R5X1_UNORM) + rgn->one_bits = 1; + break; + case 32: + rgn->bpps = 2; + if(format == PIPE_FORMAT_R8G8B8X8_UNORM || format == PIPE_FORMAT_B8G8R8X8_UNORM) + rgn->one_bits = 8; + break; + case 64: + rgn->bpps = 2; + shift = 1; + break; + case 128: + rgn->bpps = 2; + shift = 2; + break; + } + + if(shift) { + rgn->x = util_format_get_nblocksx(format, rgn->x) << shift; + rgn->y = util_format_get_nblocksy(format, rgn->y); + rgn->w <<= shift; + } +} + +static INLINE void +nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, bool for_write) +{ + rgn->x = x; + rgn->y = y; + rgn->z = 0; + + if(surf->temp) + { + rgn->bo = surf->temp->base.bo; + rgn->offset = 0; + rgn->pitch = surf->temp->linear_pitch; + + if(for_write) + util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(&surf->base.base), &surf->base); + } else { + rgn->bo = ((struct nvfx_resource*)surf->base.base.texture)->bo; + rgn->offset = surf->base.base.offset; + + if(surf->base.base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR) + rgn->pitch = surf->pitch; + else + { + rgn->pitch = 0; + rgn->z = surf->base.base.zslice; + rgn->w = surf->base.base.width; + rgn->h = surf->base.base.height; + rgn->d = u_minify(surf->base.base.texture->depth0, surf->base.base.level); + } + } + + nvfx_region_set_format(rgn, surf->base.base.format); + if(!rgn->pitch) + nv04_region_try_to_linearize(rgn); +} + +static INLINE void +nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z, bool for_write) +{ + if(pt->target != PIPE_BUFFER) + { + struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, sub.face, sub.level, z); + if(ns && util_dirty_surface_is_dirty(&ns->base)) + { + nvfx_region_init_for_surface(rgn, ns, x, y, for_write); + return; + } + } + + rgn->bo = ((struct nvfx_resource*)pt)->bo; + rgn->offset = nvfx_subresource_offset(pt, sub.face, sub.level, z); + rgn->x = x; + rgn->y = y; + + if(pt->flags & NVFX_RESOURCE_FLAG_LINEAR) + { + rgn->pitch = nvfx_subresource_pitch(pt, sub.level); + rgn->z = 0; + } + else + { + rgn->pitch = 0; + rgn->z = z; + rgn->w = u_minify(pt->width0, sub.level); + rgn->h = u_minify(pt->height0, sub.level); + rgn->d = u_minify(pt->depth0, sub.level); + } + + nvfx_region_set_format(rgn, pt->format); + if(!rgn->pitch) + nv04_region_try_to_linearize(rgn); +} + +// don't save index buffer because blitter doesn't setit +static struct blitter_context* +nvfx_get_blitter(struct pipe_context* pipe, int copy) +{ + struct nvfx_context* nvfx = nvfx_context(pipe); + struct blitter_context** pblitter; + struct blitter_context* blitter; + + assert(nvfx->blitters_in_use < Elements(nvfx->blitter)); + + if(nvfx->query && !nvfx->blitters_in_use) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); + OUT_RING(chan, 0); + } + + pblitter = &nvfx->blitter[nvfx->blitters_in_use++]; + if(!*pblitter) + *pblitter = util_blitter_create(pipe); + blitter = *pblitter; + + util_blitter_save_blend(blitter, nvfx->blend); + util_blitter_save_depth_stencil_alpha(blitter, nvfx->zsa); + util_blitter_save_stencil_ref(blitter, &nvfx->stencil_ref); + util_blitter_save_rasterizer(blitter, nvfx->rasterizer); + util_blitter_save_fragment_shader(blitter, nvfx->fragprog); + util_blitter_save_vertex_shader(blitter, nvfx->vertprog); + util_blitter_save_viewport(blitter, &nvfx->viewport); + util_blitter_save_framebuffer(blitter, &nvfx->framebuffer); + util_blitter_save_clip(blitter, &nvfx->clip); + util_blitter_save_vertex_elements(blitter, nvfx->vtxelt); + util_blitter_save_vertex_buffers(blitter, nvfx->vtxbuf_nr, nvfx->vtxbuf); + + if(copy) + { + util_blitter_save_fragment_sampler_states(blitter, nvfx->nr_samplers, (void**)nvfx->tex_sampler); + util_blitter_save_fragment_sampler_views(blitter, nvfx->nr_textures, nvfx->fragment_sampler_views); + } + + return blitter; +} + +static inline void +nvfx_put_blitter(struct pipe_context* pipe, struct blitter_context* blitter) +{ + struct nvfx_context* nvfx = nvfx_context(pipe); + --nvfx->blitters_in_use; + assert(nvfx->blitters_in_use >= 0); + + if(nvfx->query && !nvfx->blitters_in_use) + { + struct nouveau_channel* chan = nvfx->screen->base.channel; + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV30_3D_QUERY_ENABLE, 1)); + OUT_RING(chan, 1); + } +} + +static unsigned +nvfx_region_clone(struct nv04_2d_context* ctx, struct nv04_region* rgn, unsigned w, unsigned h, boolean for_read) +{ + unsigned begin = nv04_region_begin(rgn, w, h); + unsigned end = nv04_region_end(rgn, w, h); + unsigned size = end - begin; + struct nouveau_bo* bo = 0; + nouveau_bo_new(rgn->bo->device, NOUVEAU_BO_MAP | NOUVEAU_BO_GART, 256, size, &bo); + + if(for_read || (size > ((w * h) << rgn->bpps))) + nv04_memcpy(ctx, bo, 0, rgn->bo, rgn->offset + begin, size); + + rgn->bo = bo; + rgn->offset = -begin; + return begin; +} + +static void +nvfx_resource_copy_region(struct pipe_context *pipe, + struct pipe_resource *dstr, struct pipe_subresource subdst, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *srcr, struct pipe_subresource subsrc, + unsigned srcx, unsigned srcy, unsigned srcz, + unsigned w, unsigned h) +{ + static int copy_threshold = -1; + struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d; + struct nv04_region dst, src; + int dst_to_gpu; + int src_on_gpu; + boolean small; + int ret; + + if(!w || !h) + return; + + if(copy_threshold < 0) + copy_threshold = debug_get_num_option("NOUVEAU_COPY_THRESHOLD", 4); + + dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING; + src_on_gpu = nvfx_resource_on_gpu(srcr); + + nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz, TRUE); + nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz, FALSE); + w = util_format_get_stride(dstr->format, w) >> dst.bpps; + h = util_format_get_nblocksy(dstr->format, h); + + small = (w * h <= copy_threshold); + if((!dst_to_gpu || !src_on_gpu) && small) + ret = -1; /* use the CPU */ + else + ret = nv04_region_copy_2d(ctx, &dst, &src, w, h, dst_to_gpu, src_on_gpu); + if(!ret) + {} + else if(ret > 0 + && dstr->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL) + && srcr->bind & PIPE_BIND_SAMPLER_VIEW) + { + /* this currently works because we hack the bind flags on resource creation to be + * the maximum set that the resource type actually supports + * + * TODO: perhaps support reinterpreting the formats + */ + struct blitter_context* blitter = nvfx_get_blitter(pipe, 1); + util_blitter_copy_region(blitter, dstr, subdst, dstx, dsty, dstz, srcr, subsrc, srcx, srcy, srcz, w, h, TRUE); + nvfx_put_blitter(pipe, blitter); + } + else + { + struct nv04_region dstt = dst; + struct nv04_region srct = src; + unsigned dstbegin = 0; + + if(!small) + { + if(src_on_gpu) + nvfx_region_clone(ctx, &srct, w, h, TRUE); + + if(dst_to_gpu) + dstbegin = nvfx_region_clone(ctx, &dstt, w, h, FALSE); + } + + nv04_region_copy_cpu(&dstt, &srct, w, h); + + if(srct.bo != src.bo) + nouveau_screen_bo_release(pipe->screen, srct.bo); + + if(dstt.bo != dst.bo) + { + nv04_memcpy(ctx, dst.bo, dst.offset + dstbegin, dstt.bo, 0, dstt.bo->size); + nouveau_screen_bo_release(pipe->screen, dstt.bo); + } + } +} + +static int +nvfx_surface_fill(struct pipe_context* pipe, struct pipe_surface *dsts, + unsigned dx, unsigned dy, unsigned w, unsigned h, unsigned value) +{ + struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d; + struct nv04_region dst; + int ret; + /* Always try to use the GPU right now, if possible + * If the user wanted the surface data on the CPU, he would have cleared with memset (hopefully) */ + + // we don't care about interior pixel order since we set all them to the same value + nvfx_region_init_for_surface(&dst, (struct nvfx_surface*)dsts, dx, dy, TRUE); + + w = util_format_get_stride(dsts->format, w) >> dst.bpps; + h = util_format_get_nblocksy(dsts->format, h); + + ret = nv04_region_fill_2d(ctx, &dst, w, h, value); + if(ret > 0 && dsts->texture->bind & PIPE_BIND_RENDER_TARGET) + return 1; + else if(ret) + { + struct nv04_region dstt = dst; + unsigned dstbegin = 0; + + if(nvfx_resource_on_gpu(dsts->texture)) + dstbegin = nvfx_region_clone(ctx, &dstt, w, h, FALSE); + + nv04_region_fill_cpu(&dstt, w, h, value); + + if(dstt.bo != dst.bo) + { + nv04_memcpy(ctx, dst.bo, dst.offset + dstbegin, dstt.bo, 0, dstt.bo->size); + nouveau_screen_bo_release(pipe->screen, dstt.bo); + } + } + + return 0; +} + + +void +nvfx_screen_surface_takedown(struct pipe_screen *pscreen) +{ + nv04_2d_context_takedown(nvfx_screen(pscreen)->eng2d); + nvfx_screen(pscreen)->eng2d = 0; +} + +int +nvfx_screen_surface_init(struct pipe_screen *pscreen) +{ + struct nv04_2d_context* ctx = nv04_2d_context_init(nouveau_screen(pscreen)->channel); + if(!ctx) + return -1; + nvfx_screen(pscreen)->eng2d = ctx; + return 0; +} static void -nvfx_surface_copy(struct pipe_context *pipe, - struct pipe_surface *dest, unsigned destx, unsigned desty, - struct pipe_surface *src, unsigned srcx, unsigned srcy, - unsigned width, unsigned height) +nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int to_temp) +{ + struct nvfx_surface* ns = (struct nvfx_surface*)surf; + struct pipe_subresource tempsr, surfsr; + struct nvfx_context* nvfx = nvfx_context(pipe); + struct nvfx_miptree* temp; + unsigned use_vertex_buffers; + boolean use_index_buffer; + unsigned base_vertex; + + /* temporarily detach the temp, so it isn't used in place of the actual resource */ + temp = ns->temp; + ns->temp = 0; + + // TODO: we really should do this validation before setting these variable in draw calls + use_vertex_buffers = nvfx->use_vertex_buffers; + use_index_buffer = nvfx->use_index_buffer; + base_vertex = nvfx->base_vertex; + + tempsr.face = 0; + tempsr.level = 0; + surfsr.face = surf->face; + surfsr.level = surf->level; + + if(to_temp) + nvfx_resource_copy_region(pipe, &temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height); + else + nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height); + + /* If this triggers, it probably means we attempted to use the blitter + * but failed due to non-renderability of the target. + * Obviously, this would lead to infinite recursion if supported. */ + assert(!ns->temp); + + ns->temp = temp; + + nvfx->use_vertex_buffers = use_vertex_buffers; + nvfx->use_index_buffer = use_index_buffer; + nvfx->base_vertex = base_vertex; + + nvfx->dirty |= NVFX_NEW_ARRAYS; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; +} + +void +nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf) +{ + struct nvfx_surface* ns = (struct nvfx_surface*)surf; + struct pipe_resource template; + memset(&template, 0, sizeof(struct pipe_resource)); + template.target = PIPE_TEXTURE_2D; + template.format = surf->format; + template.width0 = surf->width; + template.height0 = surf->height; + template.depth0 = 1; + template.nr_samples = surf->texture->nr_samples; + template.flags = NVFX_RESOURCE_FLAG_LINEAR; + + assert(!ns->temp && !util_dirty_surface_is_dirty(&ns->base)); + + ns->temp = (struct nvfx_miptree*)nvfx_miptree_create(pipe->screen, &template); + nvfx_surface_copy_temp(pipe, surf, 1); +} + +void +nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf) { - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nv04_surface_2d *eng2d = nvfx->screen->eng2d; + struct nvfx_context* nvfx = (struct nvfx_context*)pipe; + struct nvfx_surface* ns = (struct nvfx_surface*)surf; + boolean bound = FALSE; + + nvfx_surface_copy_temp(pipe, surf, 0); + + util_dirty_surface_set_clean(nvfx_surface_get_dirty_surfaces(surf), &ns->base); - eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); + if(nvfx->framebuffer.zsbuf == surf) + bound = TRUE; + else + { + for(unsigned i = 0; i < nvfx->framebuffer.nr_cbufs; ++i) + { + if(nvfx->framebuffer.cbufs[i] == surf) + { + bound = TRUE; + break; + } + } + } + + if(!bound) + pipe_resource_reference((struct pipe_resource**)&ns->temp, 0); } static void -nvfx_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, unsigned width, - unsigned height, unsigned value) +nvfx_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) { - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nv04_surface_2d *eng2d = nvfx->screen->eng2d; + union util_color uc; + util_pack_color(rgba, dst->format, &uc); + + if(util_format_get_blocksizebits(dst->format) > 32 + || nvfx_surface_fill(pipe, dst, dstx, dsty, width, height, uc.ui)) + { + // TODO: probably should use hardware clear here instead if possible + struct blitter_context* blitter = nvfx_get_blitter(pipe, 0); + util_blitter_clear_render_target(blitter, dst, rgba, dstx, dsty, width, height); + nvfx_put_blitter(pipe, blitter); + } +} - eng2d->fill(eng2d, dest, destx, desty, width, height, value); +static void +nvfx_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + if(util_format_get_blocksizebits(dst->format) > 32 + || nvfx_surface_fill(pipe, dst, dstx, dsty, width, height, util_pack_z_stencil(dst->format, depth, stencil))) + { + // TODO: probably should use hardware clear here instead if possible + struct blitter_context* blitter = nvfx_get_blitter(pipe, 0); + util_blitter_clear_depth_stencil(blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); + nvfx_put_blitter(pipe, blitter); + } } + void nvfx_init_surface_functions(struct nvfx_context *nvfx) { - nvfx->pipe.surface_copy = nvfx_surface_copy; - nvfx->pipe.surface_fill = nvfx_surface_fill; + nvfx->pipe.resource_copy_region = nvfx_resource_copy_region; + nvfx->pipe.clear_render_target = nvfx_clear_render_target; + nvfx->pipe.clear_depth_stencil = nvfx_clear_depth_stencil; } diff --git a/src/gallium/drivers/nvfx/nvfx_surface.h b/src/gallium/drivers/nvfx/nvfx_surface.h new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_surface.h diff --git a/src/gallium/drivers/nvfx/nvfx_tex.h b/src/gallium/drivers/nvfx/nvfx_tex.h index 69187a79e79..2f2d7378085 100644 --- a/src/gallium/drivers/nvfx/nvfx_tex.h +++ b/src/gallium/drivers/nvfx/nvfx_tex.h @@ -1,70 +1,73 @@ #ifndef NVFX_TEX_H_ #define NVFX_TEX_H_ +#include "util/u_math.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + + static inline unsigned nvfx_tex_wrap_mode(unsigned wrap) { unsigned ret; switch (wrap) { case PIPE_TEX_WRAP_REPEAT: - ret = NV34TCL_TX_WRAP_S_REPEAT; + ret = NV30_3D_TEX_WRAP_S_REPEAT; break; case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; + ret = NV30_3D_TEX_WRAP_S_MIRRORED_REPEAT; break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; + ret = NV30_3D_TEX_WRAP_S_CLAMP_TO_EDGE; break; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; + ret = NV30_3D_TEX_WRAP_S_CLAMP_TO_BORDER; break; case PIPE_TEX_WRAP_CLAMP: - ret = NV34TCL_TX_WRAP_S_CLAMP; + ret = NV30_3D_TEX_WRAP_S_CLAMP; break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; + ret = NV40_3D_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; + ret = NV40_3D_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; break; case PIPE_TEX_WRAP_MIRROR_CLAMP: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; + ret = NV40_3D_TEX_WRAP_S_MIRROR_CLAMP; break; default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV34TCL_TX_WRAP_S_REPEAT; + assert(0); + ret = NV30_3D_TEX_WRAP_S_REPEAT; break; } - return ret >> NV34TCL_TX_WRAP_S_SHIFT; + return ret >> NV30_3D_TEX_WRAP_S__SHIFT; } static inline unsigned -nvfx_tex_wrap_compare_mode(const struct pipe_sampler_state* cso) +nvfx_tex_wrap_compare_mode(unsigned func) { - if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - switch (cso->compare_func) { - case PIPE_FUNC_NEVER: - return NV34TCL_TX_WRAP_RCOMP_NEVER; - case PIPE_FUNC_GREATER: - return NV34TCL_TX_WRAP_RCOMP_GREATER; - case PIPE_FUNC_EQUAL: - return NV34TCL_TX_WRAP_RCOMP_EQUAL; - case PIPE_FUNC_GEQUAL: - return NV34TCL_TX_WRAP_RCOMP_GEQUAL; - case PIPE_FUNC_LESS: - return NV34TCL_TX_WRAP_RCOMP_LESS; - case PIPE_FUNC_NOTEQUAL: - return NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; - case PIPE_FUNC_LEQUAL: - return NV34TCL_TX_WRAP_RCOMP_LEQUAL; - case PIPE_FUNC_ALWAYS: - return NV34TCL_TX_WRAP_RCOMP_ALWAYS; - default: - break; - } + switch (func) { + case PIPE_FUNC_NEVER: + return NV30_3D_TEX_WRAP_RCOMP_NEVER; + case PIPE_FUNC_GREATER: + return NV30_3D_TEX_WRAP_RCOMP_GREATER; + case PIPE_FUNC_EQUAL: + return NV30_3D_TEX_WRAP_RCOMP_EQUAL; + case PIPE_FUNC_GEQUAL: + return NV30_3D_TEX_WRAP_RCOMP_GEQUAL; + case PIPE_FUNC_LESS: + return NV30_3D_TEX_WRAP_RCOMP_LESS; + case PIPE_FUNC_NOTEQUAL: + return NV30_3D_TEX_WRAP_RCOMP_NOTEQUAL; + case PIPE_FUNC_LEQUAL: + return NV30_3D_TEX_WRAP_RCOMP_LEQUAL; + case PIPE_FUNC_ALWAYS: + return NV30_3D_TEX_WRAP_RCOMP_ALWAYS; + default: + assert(0); + return 0; } - return 0; } static inline unsigned nvfx_tex_filter(const struct pipe_sampler_state* cso) @@ -72,11 +75,11 @@ static inline unsigned nvfx_tex_filter(const struct pipe_sampler_state* cso) unsigned filter = 0; switch (cso->mag_img_filter) { case PIPE_TEX_FILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; + filter |= NV30_3D_TEX_FILTER_MAG_LINEAR; break; case PIPE_TEX_FILTER_NEAREST: default: - filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; + filter |= NV30_3D_TEX_FILTER_MAG_NEAREST; break; } @@ -84,14 +87,14 @@ static inline unsigned nvfx_tex_filter(const struct pipe_sampler_state* cso) case PIPE_TEX_FILTER_LINEAR: switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + filter |= NV30_3D_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST; break; case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + filter |= NV30_3D_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR; break; case PIPE_TEX_MIPFILTER_NONE: default: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; + filter |= NV30_3D_TEX_FILTER_MIN_LINEAR; break; } break; @@ -99,14 +102,14 @@ static inline unsigned nvfx_tex_filter(const struct pipe_sampler_state* cso) default: switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + filter |= NV30_3D_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST; break; case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + filter |= NV30_3D_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR; break; case PIPE_TEX_MIPFILTER_NONE: default: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; + filter |= NV30_3D_TEX_FILTER_MIN_NEAREST; break; } break; @@ -128,6 +131,45 @@ struct nvfx_sampler_state { uint32_t en; uint32_t filt; uint32_t bcol; + uint32_t min_lod; + uint32_t max_lod; + boolean compare; }; +struct nvfx_sampler_view { + struct pipe_sampler_view base; + int offset; + uint32_t swizzle; + uint32_t npot_size; + uint32_t filt; + uint32_t wrap_mask; + uint32_t wrap; + uint32_t lod_offset; + uint32_t max_lod_limit; + union + { + struct + { + uint32_t fmt[4]; /* nv30 has 4 entries, nv40 one */ + int rect; + } nv30; + struct + { + uint32_t fmt[2]; /* nv30 has 4 entries, nv40 one */ + uint32_t npot_size2; /* nv40 only */ + } nv40; + uint32_t init_fmt; + } u; +}; + +struct nvfx_texture_format { + int fmt[6]; + unsigned sign; + unsigned wrap; + unsigned char src[6]; + unsigned char comp[6]; +}; + +extern struct nvfx_texture_format nvfx_texture_formats[PIPE_FORMAT_COUNT]; + #endif /* NVFX_TEX_H_ */ diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c index b2ef27cf579..7cb47a20f64 100644 --- a/src/gallium/drivers/nvfx/nvfx_transfer.c +++ b/src/gallium/drivers/nvfx/nvfx_transfer.c @@ -4,202 +4,218 @@ #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_math.h" -#include "nouveau/nouveau_winsys.h" +#include "util/u_staging.h" #include "nvfx_context.h" #include "nvfx_screen.h" #include "nvfx_state.h" #include "nvfx_resource.h" #include "nvfx_transfer.h" -struct nvfx_transfer { - struct pipe_transfer base; - struct pipe_surface *surface; - boolean direct; -}; - -static void -nvfx_compatible_transfer_tex(struct pipe_resource *pt, unsigned width, unsigned height, - unsigned bind, - struct pipe_resource *template) -{ - memset(template, 0, sizeof(struct pipe_resource)); - template->target = pt->target; - template->format = pt->format; - template->width0 = width; - template->height0 = height; - template->depth0 = 1; - template->last_level = 0; - template->nr_samples = pt->nr_samples; - template->bind = bind; - template->usage = PIPE_USAGE_DYNAMIC; - template->flags = NVFX_RESOURCE_FLAG_LINEAR; -} - - -static unsigned nvfx_transfer_bind_flags( unsigned transfer_usage ) +struct nvfx_staging_transfer { - unsigned bind = 0; + struct util_staging_transfer base; - if (transfer_usage & PIPE_TRANSFER_WRITE) - bind |= PIPE_BIND_BLIT_SOURCE; - - if (transfer_usage & PIPE_TRANSFER_READ) - bind |= PIPE_BIND_BLIT_DESTINATION; - - return bind; -} + unsigned offset; + unsigned map_count; +}; struct pipe_transfer * -nvfx_miptree_transfer_new(struct pipe_context *pipe, +nvfx_transfer_new(struct pipe_context *pipe, struct pipe_resource *pt, struct pipe_subresource sr, unsigned usage, const struct pipe_box *box) { - struct pipe_screen *pscreen = pipe->screen; - struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; - struct nvfx_transfer *tx; - struct pipe_resource tx_tex_template, *tx_tex; - static int no_transfer = -1; - unsigned bind = nvfx_transfer_bind_flags(usage); - if(no_transfer < 0) - no_transfer = debug_get_bool_option("NOUVEAU_NO_TRANSFER", FALSE); - - - tx = CALLOC_STRUCT(nvfx_transfer); - if (!tx) - return NULL; - - /* Don't handle 3D transfers yet. - */ - assert(box->depth == 1); - - pipe_resource_reference(&tx->base.resource, pt); - tx->base.sr = sr; - tx->base.usage = usage; - tx->base.box = *box; - tx->base.stride = mt->level[sr.level].pitch; - - /* Direct access to texture */ - if ((pt->usage == PIPE_USAGE_DYNAMIC || - no_transfer) && - pt->flags & NVFX_RESOURCE_FLAG_LINEAR) + if((usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_DONTBLOCK)) == PIPE_TRANSFER_DONTBLOCK) + { + struct nouveau_bo* bo = ((struct nvfx_resource*)pt)->bo; + if(bo && nouveau_bo_busy(bo, NOUVEAU_BO_WR)) + return NULL; + } + + if(pt->target == PIPE_BUFFER) { - tx->direct = true; - - /* XXX: just call the internal nvfx function. - */ - tx->surface = pscreen->get_tex_surface(pscreen, pt, - sr.face, sr.level, - box->z, - bind); - return &tx->base; - } + // it would be nice if we could avoid all this ridiculous overhead... + struct pipe_transfer* tx; + struct nvfx_buffer* buffer = nvfx_buffer(pt); + + tx = CALLOC_STRUCT(pipe_transfer); + if (!tx) + return NULL; - tx->direct = false; + pipe_resource_reference(&tx->resource, pt); + tx->sr = sr; + tx->usage = usage; + tx->box = *box; - nvfx_compatible_transfer_tex(pt, box->width, box->height, bind, &tx_tex_template); + tx->slice_stride = tx->stride = util_format_get_stride(pt->format, box->width); + tx->data = buffer->data + util_format_get_stride(pt->format, box->x); - tx_tex = pscreen->resource_create(pscreen, &tx_tex_template); - if (!tx_tex) + return tx; + } + else { - FREE(tx); - return NULL; + struct nvfx_staging_transfer* tx; + bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR; + + tx = CALLOC_STRUCT(nvfx_staging_transfer); + if(!tx) + return NULL; + + util_staging_transfer_init(pipe, pt, sr, usage, box, direct, &tx->base); + + if(direct) + { + tx->base.base.stride = nvfx_subresource_pitch(pt, sr.level); + tx->base.base.slice_stride = tx->base.base.stride * u_minify(pt->height0, sr.level); + tx->offset = nvfx_subresource_offset(pt, sr.face, sr.level, box->z) + + util_format_get_2d_size(pt->format, tx->base.base.stride, box->y) + + util_format_get_stride(pt->format, box->x); + } + else + { + tx->base.base.stride = nvfx_subresource_pitch(tx->base.staging_resource, 0); + tx->base.base.slice_stride = tx->base.base.stride * tx->base.staging_resource->height0; + tx->offset = 0; + } + + assert(tx->base.base.stride); + + return &tx->base.base; } +} - tx->base.stride = ((struct nvfx_miptree*)tx_tex)->level[0].pitch; - - tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, - 0, 0, 0, - bind); - - pipe_resource_reference(&tx_tex, NULL); - - if (!tx->surface) +static void nvfx_buffer_dirty_interval(struct nvfx_buffer* buffer, unsigned begin, unsigned size, boolean unsynchronized) +{ + struct nvfx_screen* screen = nvfx_screen(buffer->base.base.screen); + buffer->last_update_static = buffer->bytes_to_draw_until_static < 0; + if(buffer->dirty_begin == buffer->dirty_end) { - pipe_surface_reference(&tx->surface, NULL); - FREE(tx); - return NULL; + buffer->dirty_begin = begin; + buffer->dirty_end = begin + size; + buffer->dirty_unsynchronized = unsynchronized; + } + else + { + buffer->dirty_begin = MIN2(buffer->dirty_begin, begin); + buffer->dirty_end = MAX2(buffer->dirty_end, begin + size); + buffer->dirty_unsynchronized &= unsynchronized; } - if (usage & PIPE_TRANSFER_READ) { - struct nvfx_screen *nvscreen = nvfx_screen(pscreen); - struct pipe_surface *src; + if(unsynchronized) + { + // TODO: revisit this, it doesn't seem quite right + //printf("UNSYNC UPDATE %p %u %u\n", buffer, begin, size); + buffer->bytes_to_draw_until_static += size * screen->static_reuse_threshold; + } + else + buffer->bytes_to_draw_until_static = buffer->size * screen->static_reuse_threshold; +} - src = pscreen->get_tex_surface(pscreen, pt, - sr.face, sr.level, box->z, - PIPE_BIND_BLIT_SOURCE); +static void nvfx_transfer_flush_region( struct pipe_context *pipe, + struct pipe_transfer *ptx, + const struct pipe_box *box) +{ + if(ptx->resource->target == PIPE_BUFFER && (ptx->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) + { + struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource); + nvfx_buffer_dirty_interval(buffer, + (uint8_t*)ptx->data - buffer->data + util_format_get_stride(buffer->base.base.format, box->x), + util_format_get_stride(buffer->base.base.format, box->width), + !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED)); + } +} - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - /* TODO: Check if SIFM can un-swizzle */ - nvscreen->eng2d->copy(nvscreen->eng2d, - tx->surface, 0, 0, - src, - box->x, box->y, - box->width, box->height); +static void +nvfx_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx) +{ + if(ptx->resource->target == PIPE_BUFFER) + { + struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource); + if((ptx->usage & (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT)) == PIPE_TRANSFER_WRITE) + nvfx_buffer_dirty_interval(buffer, + (uint8_t*)ptx->data - buffer->data, + ptx->stride, + !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED)); + pipe_resource_reference(&ptx->resource, 0); + FREE(ptx); + } + else + { + struct nouveau_channel* chan = nvfx_context(pipe)->screen->base.channel; + util_staging_transfer_destroy(pipe, ptx); - pipe_surface_reference(&src, NULL); + FIRE_RING(chan); } +} - return &tx->base; +void * +nvfx_transfer_map(struct pipe_context *pipe, struct pipe_transfer *ptx) +{ + if(ptx->resource->target == PIPE_BUFFER) + return ptx->data; + else + { + struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx; + if(!ptx->data) + { + struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource; + uint8_t *map = nouveau_screen_bo_map(pipe->screen, mt->base.bo, nouveau_screen_transfer_flags(ptx->usage)); + ptx->data = map + tx->offset; + } + + ++tx->map_count; + return ptx->data; + } } void -nvfx_miptree_transfer_del(struct pipe_context *pipe, - struct pipe_transfer *ptx) +nvfx_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx) { - struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; - - if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { - struct pipe_screen *pscreen = pipe->screen; - struct nvfx_screen *nvscreen = nvfx_screen(pscreen); - struct pipe_surface *dst; - - dst = pscreen->get_tex_surface(pscreen, - ptx->resource, - ptx->sr.face, - ptx->sr.level, - ptx->box.z, - PIPE_BIND_BLIT_DESTINATION); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - nvscreen->eng2d->copy(nvscreen->eng2d, - dst, ptx->box.x, ptx->box.y, - tx->surface, 0, 0, - ptx->box.width, ptx->box.height); - - pipe_surface_reference(&dst, NULL); + if(ptx->resource->target != PIPE_BUFFER) + { + struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx; + struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource; + + if(!--tx->map_count) + { + nouveau_screen_bo_unmap(pipe->screen, mt->base.bo); + ptx->data = 0; + } } - - pipe_surface_reference(&tx->surface, NULL); - pipe_resource_reference(&ptx->resource, NULL); - FREE(ptx); } -void * -nvfx_miptree_transfer_map(struct pipe_context *pipe, struct pipe_transfer *ptx) +static void nvfx_transfer_inline_write( struct pipe_context *pipe, + struct pipe_resource *pr, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned slice_stride) { - struct pipe_screen *pscreen = pipe->screen; - struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; - struct nv04_surface *ns = (struct nv04_surface *)tx->surface; - struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture; - uint8_t *map = nouveau_screen_bo_map(pscreen, mt->base.bo, - nouveau_screen_transfer_flags(ptx->usage)); - - if(!tx->direct) - return map + ns->base.offset; + if(pr->target != PIPE_BUFFER) + { + u_default_transfer_inline_write(pipe, pr, sr, usage, box, data, stride, slice_stride); + } else - return (map + ns->base.offset + - ptx->box.y * ns->pitch + - ptx->box.x * util_format_get_blocksize(ptx->resource->format)); + { + struct nvfx_buffer* buffer = nvfx_buffer(pr); + unsigned begin = util_format_get_stride(pr->format, box->x); + unsigned size = util_format_get_stride(pr->format, box->width); + memcpy(buffer->data + begin, data, size); + nvfx_buffer_dirty_interval(buffer, begin, size, + !!(pr->flags & PIPE_TRANSFER_UNSYNCHRONIZED)); + } } void -nvfx_miptree_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx) +nvfx_init_transfer_functions(struct pipe_context *pipe) { - struct pipe_screen *pscreen = pipe->screen; - struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; - struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture; - - nouveau_screen_bo_unmap(pscreen, mt->base.bo); + pipe->get_transfer = nvfx_transfer_new; + pipe->transfer_map = nvfx_transfer_map; + pipe->transfer_flush_region = nvfx_transfer_flush_region; + pipe->transfer_unmap = nvfx_transfer_unmap; + pipe->transfer_destroy = nvfx_transfer_destroy; + pipe->transfer_inline_write = nvfx_transfer_inline_write; } diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.h b/src/gallium/drivers/nvfx/nvfx_transfer.h index 3e3317b2c7b..20f20d5b0b8 100644 --- a/src/gallium/drivers/nvfx/nvfx_transfer.h +++ b/src/gallium/drivers/nvfx/nvfx_transfer.h @@ -7,19 +7,17 @@ struct pipe_transfer * -nvfx_miptree_transfer_new(struct pipe_context *pcontext, +nvfx_transfer_new(struct pipe_context *pcontext, struct pipe_resource *pt, struct pipe_subresource sr, unsigned usage, const struct pipe_box *box); -void -nvfx_miptree_transfer_del(struct pipe_context *pcontext, - struct pipe_transfer *ptx); + void * -nvfx_miptree_transfer_map(struct pipe_context *pcontext, +nvfx_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx); void -nvfx_miptree_transfer_unmap(struct pipe_context *pcontext, +nvfx_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx); diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 520bae5aed2..597664e7716 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -2,627 +2,642 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "translate/translate.h" #include "nvfx_context.h" #include "nvfx_state.h" #include "nvfx_resource.h" #include "nouveau/nouveau_channel.h" -#include "nouveau/nouveau_class.h" + #include "nouveau/nouveau_pushbuf.h" -#include "nouveau/nouveau_util.h" -static INLINE int -nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +static inline unsigned +util_guess_unique_indices_count(unsigned mode, unsigned indices) { - switch (pipe) { - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - *fmt = NV34TCL_VTXFMT_TYPE_FLOAT; - break; - case PIPE_FORMAT_R16_FLOAT: - case PIPE_FORMAT_R16G16_FLOAT: - case PIPE_FORMAT_R16G16B16_FLOAT: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - *fmt = NV34TCL_VTXFMT_TYPE_HALF; - break; - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - *fmt = NV34TCL_VTXFMT_TYPE_UBYTE; - break; - case PIPE_FORMAT_R16_SSCALED: - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *fmt = NV34TCL_VTXFMT_TYPE_USHORT; - break; - default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; + /* Euler's formula gives V = + * = E - F + 2 = + * = F * (polygon_edges / 2 - 1) + 2 = + * = F * (polygon_edges - 2) / 2 + 2 = + * = indices * (polygon_edges - 2) / (2 * indices_per_face) + 2 + * = indices * (1 / 2 - 1 / polygon_edges) + 2 + */ + switch(mode) + { + case PIPE_PRIM_LINES: + return indices >> 1; + case PIPE_PRIM_TRIANGLES: + { + // avoid an expensive division by 3 using the multiplicative inverse mod 2^32 + unsigned q; + unsigned inv3 = 2863311531; + indices >>= 1; + q = indices * inv3; + if(unlikely(q >= indices)) + { + q += inv3; + if(q >= indices) + q += inv3; + } + return indices + 2; + //return indices / 6 + 2; } - - switch (pipe) { - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R16_FLOAT: - case PIPE_FORMAT_R16_SSCALED: - *ncomp = 1; - break; - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R16G16_FLOAT: - case PIPE_FORMAT_R16G16_SSCALED: - *ncomp = 2; - break; - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R16G16B16_FLOAT: - case PIPE_FORMAT_R16G16B16_SSCALED: - *ncomp = 3; - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *ncomp = 4; - break; + // guess that indexed quads are created by successive connections, since a closed mesh seems unlikely + case PIPE_PRIM_QUADS: + return (indices >> 1) + 2; + // return (indices >> 2) + 2; // if it is a closed mesh default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; + return indices; } - - return 0; } -static boolean -nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib, - unsigned ib_size) +static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info) { - unsigned type; - - if (!ib) { - nvfx->idxbuf = NULL; - nvfx->idxbuf_format = 0xdeadbeef; - return FALSE; + struct nvfx_context* nvfx = nvfx_context(pipe); + unsigned hardware_cost = 0; + unsigned inline_cost = 0; + unsigned unique_vertices; + unsigned upload_mode; + float best_index_cost_for_hardware_vertices_as_inline_cost; + boolean prefer_hardware_indices; + unsigned index_inline_cost; + unsigned index_hardware_cost; + if (info->indexed) + unique_vertices = util_guess_unique_indices_count(info->mode, info->count); + else + unique_vertices = info->count; + + /* Here we try to figure out if we are better off writing vertex data directly on the FIFO, + * or create hardware buffer objects and pointing the hardware to them. + * + * This is done by computing the total memcpy cost of each option, ignoring uploads + * if we think that the buffer is static and thus the upload cost will be amortized over + * future draw calls. + * + * For instance, if everything looks static, we will always create buffer objects, while if + * everything is a user buffer and we are not doing indexed drawing, we never do. + * + * Other interesting cases are where a small user vertex buffer, but a huge user index buffer, + * where we will upload the vertex buffer, so that we can use hardware index lookup, and + * the opposite case, where we instead do index lookup in software to avoid uploading + * a huge amount of vertex data that is not going to be used. + * + * Otherwise, we generally move to the GPU the after it has been pushed + * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having + * been updated with a transfer (or just the buffer having been destroyed). + * + * There is no special handling for user buffers, since applications can use + * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this + * by the way. + * + * Note that currently we don't support only putting some data on the FIFO, and + * some on vertex buffers (constant and instanced data is independent from this). + * + * nVidia doesn't seem to do this either, even though it should be at least + * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed. + */ + + for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++) + { + struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; + struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); + buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices; + if (!nvfx_buffer_seems_static(buffer)) + { + hardware_cost += buffer->dirty_end - buffer->dirty_begin; + if (!buffer->base.bo) + hardware_cost += nvfx->screen->buffer_allocation_cost; + } + inline_cost += vbi->per_vertex_size * info->count; } - if (!nvfx->screen->index_buffer_reloc_flags || ib_size == 1) - return FALSE; + best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f; + prefer_hardware_indices = FALSE; + index_inline_cost = 0; + index_hardware_cost = 0; - switch (ib_size) { - case 2: - type = NV34TCL_IDXBUF_FORMAT_TYPE_U16; - break; - case 4: - type = NV34TCL_IDXBUF_FORMAT_TYPE_U32; - break; - default: - return FALSE; - } + if (info->indexed) + { + index_inline_cost = nvfx->idxbuf.index_size * info->count; + if (nvfx->screen->index_buffer_reloc_flags + && (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4) + && !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1))) + { + struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer); + buffer->bytes_to_draw_until_static -= index_inline_cost; - if (ib != nvfx->idxbuf || - type != nvfx->idxbuf_format) { - nvfx->dirty |= NVFX_NEW_ARRAYS; - nvfx->idxbuf = ib; - nvfx->idxbuf_format = type; - } + prefer_hardware_indices = TRUE; - return TRUE; -} + if (!nvfx_buffer_seems_static(buffer)) + { + index_hardware_cost = buffer->dirty_end - buffer->dirty_begin; + if (!buffer->base.bo) + index_hardware_cost += nvfx->screen->buffer_allocation_cost; + } -// type must be floating point -static inline void -nvfx_vbo_static_attrib(struct nvfx_context *nvfx, - int attrib, struct pipe_vertex_element *ve, - struct pipe_vertex_buffer *vb, unsigned ncomp) -{ - struct pipe_transfer *transfer; - struct nouveau_channel* chan = nvfx->screen->base.channel; - void *map; - float *v; - - map = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer); - map = (uint8_t *) map + vb->buffer_offset + ve->src_offset; - - v = map; - - switch (ncomp) { - case 4: - OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4)); - OUT_RING(chan, fui(v[0])); - OUT_RING(chan, fui(v[1])); - OUT_RING(chan, fui(v[2])); - OUT_RING(chan, fui(v[3])); - break; - case 3: - OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3)); - OUT_RING(chan, fui(v[0])); - OUT_RING(chan, fui(v[1])); - OUT_RING(chan, fui(v[2])); - break; - case 2: - OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2)); - OUT_RING(chan, fui(v[0])); - OUT_RING(chan, fui(v[1])); - break; - case 1: - OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1)); - OUT_RING(chan, fui(v[0])); - break; + if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost) + { + best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost; + } + else + { + best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost; + prefer_hardware_indices = TRUE; + } + } } - pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer); + /* let's finally figure out which of the 3 paths we want to take */ + if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost)) + upload_mode = 1 + prefer_hardware_indices; + else + upload_mode = 0; + +#ifdef DEBUG + if (unlikely(nvfx->screen->trace_draw)) + { + fprintf(stderr, "DRAW"); + if (info->indexed) + { + fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size); + if (info->index_bias) + fprintf(stderr, " biased %u", info->index_bias); + fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index); + } + if (info->instance_count > 1) + fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed); + fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode); + if (!upload_mode) + fprintf(stderr, " -> inline vertex data"); + else if (upload_mode == 2 || !info->indexed) + fprintf(stderr, " -> buffer range"); + else + fprintf(stderr, " -> inline indices"); + fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost); + for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i) + { + struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; + struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); + if (i) + fprintf(stderr, ", "); + fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static); + } + fprintf(stderr, ">\n"); + } +#endif + + return upload_mode; } -void -nvfx_draw_arrays(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) +void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - unsigned restart = 0; - - nvfx_vbo_set_idxbuf(nvfx, NULL, 0); - if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) { - nvfx_draw_elements_swtnl(pipe, NULL, 0, 0, - mode, start, count); - return; - } + unsigned upload_mode = 0; - while (count) { - unsigned vc, nr, avail; + if (!nvfx->vtxelt->needs_translate) + upload_mode = nvfx_decide_upload_mode(pipe, info); - nvfx_state_emit(nvfx); + nvfx->use_index_buffer = upload_mode > 1; - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + if ((upload_mode > 0) != nvfx->use_vertex_buffers) + { + nvfx->use_vertex_buffers = (upload_mode > 0); + nvfx->dirty |= NVFX_NEW_ARRAYS; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; + } - vc = nouveau_vbuf_split(avail, 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; + if (upload_mode > 0) + { + for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++) + { + struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; + nvfx_buffer_upload(nvfx_buffer(vb->buffer)); } - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); + if (upload_mode > 1) + { + nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer)); - nr = (vc & 0xff); - if (nr) { - OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1)); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; + if (unlikely(info->index_bias != nvfx->base_vertex)) + { + nvfx->base_vertex = info->index_bias; + nvfx->dirty |= NVFX_NEW_ARRAYS; + } } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push)); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; + else + { + if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex)) + { + nvfx->base_vertex = 0; + nvfx->dirty |= NVFX_NEW_ARRAYS; } } - - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); - - count -= vc; - start = restart; } - pipe->flush(pipe, 0, NULL); + if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) + nvfx_draw_vbo_swtnl(pipe, info); + else + nvfx_push_vbo(pipe, info); } -static INLINE void -nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) +boolean +nvfx_vbo_validate(struct nvfx_context *nvfx) { - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; + struct nouveau_channel* chan = nvfx->screen->base.channel; + int i; + int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); + unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD; - while (count) { - uint8_t *elts = (uint8_t *)ib + start; - unsigned vc, push, restart = 0, avail; + if (!elements) + return TRUE; - nvfx_state_emit(nvfx); + MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2); + for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i) + { + struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); + float v[4]; + ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0); + nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp); + } - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ - vc = nouveau_vbuf_split(avail, 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; + OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements)); + if(nvfx->use_vertex_buffers) + { + unsigned idx = 0; + for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) { + struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); + if(idx != ve->idx) + { + assert(idx < ve->idx); + OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], ve->idx - idx); + idx = ve->idx; + } - if (vc & 1) { - OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1)); - OUT_RING (chan, elts[0]); - elts++; vc--; + OUT_RING(chan, nvfx->vtxelt->vtxfmt[idx] | (vb->stride << NV30_3D_VTXFMT_STRIDE__SHIFT)); + ++idx; } + if(idx != nvfx->vtxelt->num_elements) + OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], nvfx->vtxelt->num_elements - idx); + } + else + OUT_RINGp(chan, nvfx->vtxelt->vtxfmt, nvfx->vtxelt->num_elements); - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1)); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); + for(i = nvfx->vtxelt->num_elements; i < elements; ++i) + OUT_RING(chan, NV30_3D_VTXFMT_TYPE_V32_FLOAT); - vc -= push; - elts += push; + if(nvfx->is_nv4x) { + unsigned i; + /* seems to be some kind of cache flushing */ + for(i = 0; i < 3; ++i) { + OUT_RING(chan, RING_3D(0x1718, 1)); + OUT_RING(chan, 0); } - - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); - - start = restart; } -} -static INLINE void -nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - - while (count) { - uint16_t *elts = (uint16_t *)ib + start; - unsigned vc, push, restart = 0, avail; - - nvfx_state_emit(nvfx); - - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ - - vc = nouveau_vbuf_split(avail, 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; + OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements)); + if(nvfx->use_vertex_buffers) + { + unsigned idx = 0; + for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) { + struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); + for(; idx < ve->idx; ++idx) + OUT_RING(chan, 0); - if (vc & 1) { - OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1)); - OUT_RING (chan, elts[0]); - elts++; vc--; + OUT_RELOC(chan, bo, + vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV30_3D_VTXBUF_DMA1); + ++idx; } - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1)); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); - - vc -= push; - elts += push; - } + for(; idx < elements; ++idx) + OUT_RING(chan, 0); + } + else + { + for (i = 0; i < elements; i++) + OUT_RING(chan, 0); + } - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); + OUT_RING(chan, RING_3D(0x1710, 1)); + OUT_RING(chan, 0); - start = restart; - } + nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements; + nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF; + return TRUE; } -static INLINE void -nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) +void +nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx) { - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned num_outputs = nvfx->vertprog->draw_elements; + int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr); - while (count) { - uint32_t *elts = (uint32_t *)ib + start; - unsigned vc, push, restart = 0, avail; + if (!elements) + return; - nvfx_state_emit(nvfx); + WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2); - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements)); + for(unsigned i = 0; i < num_outputs; ++i) + OUT_RING(chan, (4 << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT); + for(unsigned i = num_outputs; i < elements; ++i) + OUT_RING(chan, NV30_3D_VTXFMT_TYPE_V32_FLOAT); - vc = nouveau_vbuf_split(avail, 5, 1, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; + if(nvfx->is_nv4x) { + unsigned i; + /* seems to be some kind of cache flushing */ + for(i = 0; i < 3; ++i) { + OUT_RING(chan, RING_3D(0x1718, 1)); + OUT_RING(chan, 0); } - count -= vc; + } - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); + OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements)); + for (unsigned i = 0; i < elements; i++) + OUT_RING(chan, 0); - while (vc) { - push = MIN2(vc, 2047); + OUT_RING(chan, RING_3D(0x1710, 1)); + OUT_RING(chan, 0); - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push)); - OUT_RINGp (chan, elts, push); + nvfx->hw_vtxelt_nr = num_outputs; + nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF; +} - vc -= push; - elts += push; - } +void +nvfx_vbo_relocate(struct nvfx_context *nvfx) +{ + struct nouveau_channel* chan; + unsigned vb_flags; + int i; - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); + if(!nvfx->use_vertex_buffers) + return; + + chan = nvfx->screen->base.channel; + vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; - start = restart; + MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3); + for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) { + struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; + + OUT_RELOC(chan, bo, RING_3D(NV30_3D_VTXBUF(ve->idx), 1), + vb_flags, 0, 0); + OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV30_3D_VTXBUF_DMA1); } + nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF; } static void -nvfx_draw_elements_inline(struct pipe_context *pipe, - struct pipe_resource *ib, - unsigned ib_size, int ib_bias, - unsigned mode, unsigned start, unsigned count) +nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags) { - struct nvfx_context *nvfx = nvfx_context(pipe); - struct pipe_transfer *transfer; - void *map; - - map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer); - if (!ib) { - NOUVEAU_ERR("failed mapping ib\n"); - return; - } - - assert(ib_bias == 0); - - switch (ib_size) { - case 1: - nvfx_draw_elements_u08(nvfx, map, mode, start, count); - break; - case 2: - nvfx_draw_elements_u16(nvfx, map, mode, start, count); - break; - case 4: - nvfx_draw_elements_u32(nvfx, map, mode, start, count); - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - break; - } - - pipe_buffer_unmap(pipe, ib, transfer); + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned ib_format = (nvfx->idxbuf.index_size == 2) ? NV30_3D_IDXBUF_FORMAT_TYPE_U16 : NV30_3D_IDXBUF_FORMAT_TYPE_U32; + struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf.buffer)->bo; + ib_flags |= nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD; + + assert(nvfx->screen->index_buffer_reloc_flags); + + MARK_RING(chan, 3, 3); + if(ib_flags & NOUVEAU_BO_DUMMY) + OUT_RELOC(chan, bo, RING_3D(NV30_3D_IDXBUF_OFFSET, 2), ib_flags, 0, 0); + else + OUT_RING(chan, RING_3D(NV30_3D_IDXBUF_OFFSET, 2)); + OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR, + 0, NV30_3D_IDXBUF_FORMAT_DMA1); + nvfx->relocs_needed &=~ NVFX_RELOCATE_IDXBUF; } -static void -nvfx_draw_elements_vbo(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) +void +nvfx_idxbuf_validate(struct nvfx_context* nvfx) { - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - unsigned restart = 0; + nvfx_idxbuf_emit(nvfx, 0); +} - while (count) { - unsigned nr, vc, avail; +void +nvfx_idxbuf_relocate(struct nvfx_context* nvfx) +{ + nvfx_idxbuf_emit(nvfx, NOUVEAU_BO_DUMMY); +} - nvfx_state_emit(nvfx); +unsigned nvfx_vertex_formats[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_R32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT, + [PIPE_FORMAT_R32G32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT, + [PIPE_FORMAT_R32G32B32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT, + [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV30_3D_VTXFMT_TYPE_V32_FLOAT, + [PIPE_FORMAT_R16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT, + [PIPE_FORMAT_R16G16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT, + [PIPE_FORMAT_R16G16B16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV30_3D_VTXFMT_TYPE_V16_FLOAT, + [PIPE_FORMAT_R8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM, + [PIPE_FORMAT_R8G8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM, + [PIPE_FORMAT_R8G8B8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM, + [PIPE_FORMAT_R8G8B8A8_UNORM] = NV30_3D_VTXFMT_TYPE_U8_UNORM, + [PIPE_FORMAT_R8G8B8A8_USCALED] = NV30_3D_VTXFMT_TYPE_U8_USCALED, + [PIPE_FORMAT_R16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM, + [PIPE_FORMAT_R16G16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM, + [PIPE_FORMAT_R16G16B16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM, + [PIPE_FORMAT_R16G16B16A16_SNORM] = NV30_3D_VTXFMT_TYPE_V16_SNORM, + [PIPE_FORMAT_R16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED, + [PIPE_FORMAT_R16G16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED, + [PIPE_FORMAT_R16G16B16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED, + [PIPE_FORMAT_R16G16B16A16_SSCALED] = NV30_3D_VTXFMT_TYPE_V16_SSCALED, +}; + +static void * +nvfx_vtxelts_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state); + struct translate_key transkey; + unsigned per_vertex_size[16]; + unsigned vb_compacted_index[16]; - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + if(num_elements > 16) + { + _debug_printf("Error: application attempted to use %u vertex elements, but only 16 are supported: ignoring the rest\n", num_elements); + num_elements = 16; + } - vc = nouveau_vbuf_split(avail, 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; - } + memset(per_vertex_size, 0, sizeof(per_vertex_size)); + memcpy(cso->pipe, elements, num_elements * sizeof(elements[0])); + cso->num_elements = num_elements; + cso->needs_translate = FALSE; + + transkey.nr_elements = 0; + transkey.output_stride = 0; + + for(unsigned i = 0; i < num_elements; ++i) + { + const struct pipe_vertex_element* ve = &elements[i]; + if(!ve->instance_divisor) + per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1); + } + + for(unsigned i = 0; i < 16; ++i) + { + if(per_vertex_size[i]) + { + unsigned idx = cso->num_per_vertex_buffer_infos++; + cso->per_vertex_buffer_info[idx].vertex_buffer_index = i; + cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i]; + vb_compacted_index[i] = idx; + } + } + + for(unsigned i = 0; i < num_elements; ++i) + { + const struct pipe_vertex_element* ve = &elements[i]; + unsigned type = nvfx_vertex_formats[ve->src_format]; + unsigned ncomp = util_format_get_nr_components(ve->src_format); - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); + //if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX) + if(ve->instance_divisor) + { + struct nvfx_low_frequency_element* lfve; + cso->vtxfmt[i] = NV30_3D_VTXFMT_TYPE_V32_FLOAT; + + //if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT) + if(0) + lfve = &cso->constant[cso->num_constant++]; + else + { + lfve = &cso->per_instance[cso->num_per_instance++].base; + ((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor; + } - nr = (vc & 0xff); - if (nr) { - OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1)); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; + lfve->idx = i; + lfve->vertex_buffer_index = ve->vertex_buffer_index; + lfve->src_offset = ve->src_offset; + lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float; + lfve->ncomp = ncomp; } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push)); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; + else + { + unsigned idx; + + idx = cso->num_per_vertex++; + cso->per_vertex[idx].idx = i; + cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index; + cso->per_vertex[idx].src_offset = ve->src_offset; + + idx = transkey.nr_elements++; + transkey.element[idx].input_format = ve->src_format; + transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index]; + transkey.element[idx].input_offset = ve->src_offset; + transkey.element[idx].instance_divisor = 0; + transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL; + if(type) + { + transkey.element[idx].output_format = ve->src_format; + cso->vtxfmt[i] = (ncomp << NV30_3D_VTXFMT_SIZE__SHIFT) | type; + } + else + { + unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT}; + transkey.element[idx].output_format = float32[ncomp - 1]; + cso->needs_translate = TRUE; + cso->vtxfmt[i] = (ncomp << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT; } + transkey.element[idx].output_offset = transkey.output_stride; + transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3; } + } - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); + cso->translate = translate_create(&transkey); + cso->vertex_length = transkey.output_stride >> 2; + cso->max_vertices_per_packet = 2047 / MAX2(cso->vertex_length, 1); - count -= vc; - start = restart; - } + return (void *)cso; } -void -nvfx_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count) +static void +nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nvfx_context *nvfx = nvfx_context(pipe); - boolean idxbuf; - - idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); - if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) { - nvfx_draw_elements_swtnl(pipe, - indexBuffer, indexSize, indexBias, - mode, start, count); - return; - } - - if (idxbuf) { - nvfx_draw_elements_vbo(pipe, mode, start, count); - } else { - nvfx_draw_elements_inline(pipe, - indexBuffer, indexSize, indexBias, - mode, start, count); - } - - pipe->flush(pipe, 0, NULL); + FREE(hwcso); } -boolean -nvfx_vbo_validate(struct nvfx_context *nvfx) +static void +nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) { - struct nouveau_channel* chan = nvfx->screen->base.channel; - struct pipe_resource *ib = nvfx->idxbuf; - unsigned ib_format = nvfx->idxbuf_format; - int i; - int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); - uint32_t vtxfmt[16]; - unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD; - - if (!elements) - return TRUE; + struct nvfx_context *nvfx = nvfx_context(pipe); - nvfx->vbo_bo = 0; + nvfx->vtxelt = hwcso; + nvfx->use_vertex_buffers = -1; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; +} - MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2); - for (i = 0; i < nvfx->vtxelt->num_elements; i++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; - unsigned type, ncomp; - - ve = &nvfx->vtxelt->pipe[i]; - vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; - - if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { - MARK_UNDO(chan); - nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS; - return FALSE; - } +static void +nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); - if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) { - nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp); - vtxfmt[i] = type; - } else { - vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | - (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type); - nvfx->vbo_bo |= (1 << i); - } + for(unsigned i = 0; i < count; ++i) + { + pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer); + nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset; + nvfx->vtxbuf[i].max_index = vb[i].max_index; + nvfx->vtxbuf[i].stride = vb[i].stride; } - for(; i < elements; ++i) - vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT; - - OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements)); - OUT_RINGp(chan, vtxfmt, elements); - - if(nvfx->is_nv4x) { - unsigned i; - /* seems to be some kind of cache flushing */ - for(i = 0; i < 3; ++i) { - OUT_RING(chan, RING_3D(0x1718, 1)); - OUT_RING(chan, 0); - } - } + for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i) + pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0); - OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements)); - for (i = 0; i < nvfx->vtxelt->num_elements; i++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; + nvfx->vtxbuf_nr = count; + nvfx->use_vertex_buffers = -1; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; +} - ve = &nvfx->vtxelt->pipe[i]; - vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; +static void +nvfx_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); - if (!(nvfx->vbo_bo & (1 << i))) - OUT_RING(chan, 0); - else - { - struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; - OUT_RELOC(chan, bo, - vb->buffer_offset + ve->src_offset, - vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - 0, NV34TCL_VTXBUF_ADDRESS_DMA1); - } + if(ib) + { + pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer); + nvfx->idxbuf.index_size = ib->index_size; + nvfx->idxbuf.offset = ib->offset; } - - for (; i < elements; i++) - OUT_RING(chan, 0); - - OUT_RING(chan, RING_3D(0x1710, 1)); - OUT_RING(chan, 0); - - if (ib) { - unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD; - struct nouveau_bo* bo = nvfx_resource(ib)->bo; - - assert(nvfx->screen->index_buffer_reloc_flags); - - OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2)); - OUT_RELOC(chan, bo, 0, ib_flags | NOUVEAU_BO_LOW, 0, 0); - OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR, - 0, NV34TCL_IDXBUF_FORMAT_DMA1); + else + { + pipe_resource_reference(&nvfx->idxbuf.buffer, 0); + nvfx->idxbuf.index_size = 0; + nvfx->idxbuf.offset = 0; } - nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements; - return TRUE; + nvfx->dirty |= NVFX_NEW_INDEX; + nvfx->draw_dirty |= NVFX_NEW_INDEX; } void -nvfx_vbo_relocate(struct nvfx_context *nvfx) +nvfx_init_vbo_functions(struct nvfx_context *nvfx) { - struct nouveau_channel* chan = nvfx->screen->base.channel; - unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; - int i; + nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers; + nvfx->pipe.set_index_buffer = nvfx_set_index_buffer; - MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3); - for(i = 0; i < nvfx->vtxelt->num_elements; ++i) { - if(nvfx->vbo_bo & (1 << i)) { - struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i]; - struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; - struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; - OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1), - vb_flags, 0, 0); - OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset, - vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - 0, NV34TCL_VTXBUF_ADDRESS_DMA1); - } - } - - if(nvfx->idxbuf) - { - unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; - struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf)->bo; - - assert(nvfx->screen->index_buffer_reloc_flags); - - OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2), - ib_flags, 0, 0); - OUT_RELOC(chan, bo, 0, - ib_flags | NOUVEAU_BO_LOW, 0, 0); - OUT_RELOC(chan, bo, nvfx->idxbuf_format, - ib_flags | NOUVEAU_BO_OR, - 0, NV34TCL_IDXBUF_FORMAT_DMA1); - } + nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create; + nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete; + nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; } diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index 80b98b62d34..e543fda50ef 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -1,15 +1,20 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "util/u_inlines.h" +#include "util/u_linkage.h" +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_ureg.h" + +#include "draw/draw_context.h" #include "nvfx_context.h" #include "nvfx_state.h" +#include "nvfx_resource.h" /* TODO (at least...): * 1. Indexed consts + ARL @@ -25,26 +30,38 @@ #include "nv30_vertprog.h" #include "nv40_vertprog.h" -#define NVFX_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) +struct nvfx_loop_entry +{ + unsigned brk_target; + unsigned cont_target; +}; struct nvfx_vpc { + struct nvfx_context* nvfx; + struct pipe_shader_state pipe; struct nvfx_vertex_program *vp; + struct tgsi_shader_info* info; struct nvfx_vertex_program_exec *vpi; unsigned r_temps; unsigned r_temps_discard; - struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; - struct nvfx_sreg *r_address; - struct nvfx_sreg *r_temp; + struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nvfx_reg *r_address; + struct nvfx_reg *r_temp; + struct nvfx_reg *r_const; + struct nvfx_reg r_0_1; - struct nvfx_sreg *imm; + struct nvfx_reg *imm; unsigned nr_imm; unsigned hpos_idx; + + struct util_dynarray label_relocs; + struct util_dynarray loop_stack; }; -static struct nvfx_sreg +static struct nvfx_reg temp(struct nvfx_vpc *vpc) { int idx = ffs(~vpc->r_temps) - 1; @@ -52,22 +69,22 @@ temp(struct nvfx_vpc *vpc) if (idx < 0) { NOUVEAU_ERR("out of temps!!\n"); assert(0); - return nvfx_sr(NVFXSR_TEMP, 0); + return nvfx_reg(NVFXSR_TEMP, 0); } vpc->r_temps |= (1 << idx); vpc->r_temps_discard |= (1 << idx); - return nvfx_sr(NVFXSR_TEMP, idx); + return nvfx_reg(NVFXSR_TEMP, idx); } -static INLINE void +static inline void release_temps(struct nvfx_vpc *vpc) { vpc->r_temps &= ~vpc->r_temps_discard; vpc->r_temps_discard = 0; } -static struct nvfx_sreg +static struct nvfx_reg constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w) { struct nvfx_vertex_program *vp = vpc->vp; @@ -77,7 +94,7 @@ constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w) if (pipe >= 0) { for (idx = 0; idx < vp->nr_consts; idx++) { if (vp->consts[idx].index == pipe) - return nvfx_sr(NVFXSR_CONST, idx); + return nvfx_reg(NVFXSR_CONST, idx); } } @@ -90,35 +107,36 @@ constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w) vpd->value[1] = y; vpd->value[2] = z; vpd->value[3] = w; - return nvfx_sr(NVFXSR_CONST, idx); + return nvfx_reg(NVFXSR_CONST, idx); } -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nvfx_vp_arith(nvfx, (cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2)) +#define arith(s,t,o,d,m,s0,s1,s2) \ + nvfx_insn((s), (NVFX_VP_INST_SLOT_##t << 7) | NVFX_VP_INST_##t##_OP_##o, -1, (d), (m), (s0), (s1), (s2)) static void -emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src) +emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_src src) { struct nvfx_vertex_program *vp = vpc->vp; uint32_t sr = 0; + struct nvfx_relocation reloc; - switch (src.type) { + switch (src.reg.type) { case NVFXSR_TEMP: sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT)); - sr |= (src.index << NVFX_VP(SRC_TEMP_SRC_SHIFT)); + sr |= (src.reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT)); break; case NVFXSR_INPUT: sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << NVFX_VP(SRC_REG_TYPE_SHIFT)); - vp->ir |= (1 << src.index); - hw[1] |= (src.index << NVFX_VP(INST_INPUT_SRC_SHIFT)); + vp->ir |= (1 << src.reg.index); + hw[1] |= (src.reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT)); break; case NVFXSR_CONST: sr |= (NVFX_VP(SRC_REG_TYPE_CONST) << NVFX_VP(SRC_REG_TYPE_SHIFT)); - assert(vpc->vpi->const_index == -1 || - vpc->vpi->const_index == src.index); - vpc->vpi->const_index = src.index; + reloc.location = vp->nr_insns - 1; + reloc.target = src.reg.index; + util_dynarray_append(&vp->const_relocs, struct nvfx_relocation, reloc); break; case NVFXSR_NONE: sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << @@ -139,6 +157,18 @@ emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) | (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT))); + if(src.indirect) { + if(src.reg.type == NVFXSR_CONST) + hw[3] |= NVFX_VP(INST_INDEX_CONST); + else if(src.reg.type == NVFXSR_INPUT) + hw[0] |= NVFX_VP(INST_INDEX_INPUT); + else + assert(0); + if(src.indirect_reg) + hw[0] |= NVFX_VP(INST_ADDR_REG_SELECT_1); + hw[0] |= src.indirect_swz << NVFX_VP(INST_ADDR_SWZ_SHIFT); + } + switch (pos) { case 0: hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >> @@ -161,100 +191,67 @@ emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, } static void -emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst) +emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot, struct nvfx_reg dst) { struct nvfx_vertex_program *vp = vpc->vp; switch (dst.type) { + case NVFXSR_NONE: + if(!nvfx->is_nv4x) + hw[0] |= NV30_VP_INST_DEST_TEMP_ID_MASK; + else { + hw[3] |= NV40_VP_INST_DEST_MASK; + if (slot == 0) + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; + else + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + } + break; case NVFXSR_TEMP: if(!nvfx->is_nv4x) hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); else { hw[3] |= NV40_VP_INST_DEST_MASK; - if (slot == 0) { - hw[0] |= (dst.index << - NV40_VP_INST_VEC_DEST_TEMP_SHIFT); - } else { - hw[3] |= (dst.index << - NV40_VP_INST_SCA_DEST_TEMP_SHIFT); - } + if (slot == 0) + hw[0] |= (dst.index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + else + hw[3] |= (dst.index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT); } break; case NVFXSR_OUTPUT: /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */ - switch (dst.index) { - case NVFX_VP_INST_DEST_CLIP(0): - vp->or |= (1 << 6); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0; - dst.index = NVFX_VP(INST_DEST_FOGC); - break; - case NVFX_VP_INST_DEST_CLIP(1): - vp->or |= (1 << 7); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1; - dst.index = NVFX_VP(INST_DEST_FOGC); - break; - case NVFX_VP_INST_DEST_CLIP(2): - vp->or |= (1 << 8); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2; - dst.index = NVFX_VP(INST_DEST_FOGC); - break; - case NVFX_VP_INST_DEST_CLIP(3): - vp->or |= (1 << 9); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3; - dst.index = NVFX_VP(INST_DEST_PSZ); - break; - case NVFX_VP_INST_DEST_CLIP(4): - vp->or |= (1 << 10); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4; - dst.index = NVFX_VP(INST_DEST_PSZ); - break; - case NVFX_VP_INST_DEST_CLIP(5): - vp->or |= (1 << 11); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5; - dst.index = NVFX_VP(INST_DEST_PSZ); - break; - default: - if(!nvfx->is_nv4x) { - switch (dst.index) { - case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV30_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; - case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; - case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; - case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; - case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; - case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; - case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; - case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; - case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; - } - } else { - switch (dst.index) { - case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; - case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; - case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; - case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; - case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; - case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; - case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; - case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; - case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; - } + if(nvfx->is_nv4x) { + switch (dst.index) { + case NV30_VP_INST_DEST_CLP(0): + dst.index = NVFX_VP(INST_DEST_FOGC); + break; + case NV30_VP_INST_DEST_CLP(1): + dst.index = NVFX_VP(INST_DEST_FOGC); + break; + case NV30_VP_INST_DEST_CLP(2): + dst.index = NVFX_VP(INST_DEST_FOGC); + break; + case NV30_VP_INST_DEST_CLP(3): + dst.index = NVFX_VP(INST_DEST_PSZ); + break; + case NV30_VP_INST_DEST_CLP(4): + dst.index = NVFX_VP(INST_DEST_PSZ); + break; + case NV30_VP_INST_DEST_CLP(5): + dst.index = NVFX_VP(INST_DEST_PSZ); + break; + case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; + case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; } - break; } if(!nvfx->is_nv4x) { hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); - hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK; /*XXX: no way this is entirely correct, someone needs to * figure out what exactly it is. @@ -264,7 +261,7 @@ emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); if (slot == 0) { hw[0] |= NV40_VP_INST_VEC_RESULT; - hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; } else { hw[3] |= NV40_VP_INST_SCA_RESULT; hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; @@ -277,80 +274,99 @@ emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot } static void -nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op, - struct nvfx_sreg dst, int mask, - struct nvfx_sreg s0, struct nvfx_sreg s1, - struct nvfx_sreg s2) +nvfx_vp_emit(struct nvfx_vpc *vpc, struct nvfx_insn insn) { + struct nvfx_context* nvfx = vpc->nvfx; struct nvfx_vertex_program *vp = vpc->vp; + unsigned slot = insn.op >> 7; + unsigned op = insn.op & 0x7f; uint32_t *hw; vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); vpc->vpi = &vp->insns[vp->nr_insns - 1]; memset(vpc->vpi, 0, sizeof(*vpc->vpi)); - vpc->vpi->const_index = -1; hw = vpc->vpi->data; - hw[0] |= (NVFX_COND_TR << NVFX_VP(INST_COND_SHIFT)); - hw[0] |= ((0 << NVFX_VP(INST_COND_SWZ_X_SHIFT)) | - (1 << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) | - (2 << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) | - (3 << NVFX_VP(INST_COND_SWZ_W_SHIFT))); + hw[0] |= (insn.cc_test << NVFX_VP(INST_COND_SHIFT)); + hw[0] |= ((insn.cc_swz[0] << NVFX_VP(INST_COND_SWZ_X_SHIFT)) | + (insn.cc_swz[1] << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) | + (insn.cc_swz[2] << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) | + (insn.cc_swz[3] << NVFX_VP(INST_COND_SWZ_W_SHIFT))); + if(insn.cc_update) + hw[0] |= NVFX_VP(INST_COND_UPDATE_ENABLE); + + if(insn.sat) + { + assert(nvfx->use_nv4x); + if(nvfx->use_nv4x) + hw[0] |= NV40_VP_INST_SATURATE; + } if(!nvfx->is_nv4x) { - hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); + if(slot == 0) + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); + else + { + hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT); + hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT); + } // hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK); // hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT)); - if (dst.type == NVFXSR_OUTPUT) { + if (insn.dst.type == NVFXSR_OUTPUT) { if (slot) - hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); + hw[3] |= (insn.mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); else - hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); + hw[3] |= (insn.mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); } else { if (slot) - hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); + hw[3] |= (insn.mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); else - hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); + hw[3] |= (insn.mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); } } else { if (slot == 0) { hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; - hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); + hw[3] |= (insn.mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); } else { hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); - hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); - hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK ; + hw[3] |= (insn.mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); } } - emit_dst(nvfx, vpc, hw, slot, dst); - emit_src(nvfx, vpc, hw, 0, s0); - emit_src(nvfx, vpc, hw, 1, s1); - emit_src(nvfx, vpc, hw, 2, s2); + emit_dst(nvfx, vpc, hw, slot, insn.dst); + emit_src(nvfx, vpc, hw, 0, insn.src[0]); + emit_src(nvfx, vpc, hw, 1, insn.src[1]); + emit_src(nvfx, vpc, hw, 2, insn.src[2]); + +// if(insn.src[0].indirect || op == NVFX_VP_INST_VEC_OP_ARL) +// hw[3] |= NV40_VP_INST_SCA_RESULT; } -static INLINE struct nvfx_sreg +static inline struct nvfx_src tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nvfx_sreg src = { 0 }; + struct nvfx_src src; switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index); + src.reg = nvfx_reg(NVFXSR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); + src.reg = vpc->r_const[fsrc->Register.Index]; break; case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->Register.Index]; + src.reg = vpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - src = vpc->r_temp[fsrc->Register.Index]; + src.reg = vpc->r_temp[fsrc->Register.Index]; break; default: NOUVEAU_ERR("bad src file\n"); + src.reg.index = 0; + src.reg.type = -1; break; } @@ -360,14 +376,35 @@ tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { src.swz[1] = fsrc->Register.SwizzleY; src.swz[2] = fsrc->Register.SwizzleZ; src.swz[3] = fsrc->Register.SwizzleW; + src.indirect = 0; + src.indirect_reg = 0; + src.indirect_swz = 0; + + if(fsrc->Register.Indirect) { + if(fsrc->Indirect.File == TGSI_FILE_ADDRESS && + (fsrc->Register.File == TGSI_FILE_CONSTANT || fsrc->Register.File == TGSI_FILE_INPUT)) + { + src.indirect = 1; + src.indirect_reg = fsrc->Indirect.Index; + src.indirect_swz = fsrc->Indirect.SwizzleX; + } + else + { + src.reg.index = 0; + src.reg.type = -1; + } + } return src; } -static INLINE struct nvfx_sreg +static INLINE struct nvfx_reg tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nvfx_sreg dst = { 0 }; + struct nvfx_reg dst; switch (fdst->Register.File) { + case TGSI_FILE_NULL: + dst = nvfx_reg(NVFXSR_NONE, 0); + break; case TGSI_FILE_OUTPUT: dst = vpc->r_result[fdst->Register.Index]; break; @@ -378,14 +415,16 @@ tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) { dst = vpc->r_address[fdst->Register.Index]; break; default: - NOUVEAU_ERR("bad dst file\n"); + NOUVEAU_ERR("bad dst file %i\n", fdst->Register.File); + dst.index = 0; + dst.type = 0; break; } return dst; } -static INLINE int +static inline int tgsi_mask(uint tgsi) { int mask = 0; @@ -399,16 +438,20 @@ tgsi_mask(uint tgsi) static boolean nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, - const struct tgsi_full_instruction *finst) + unsigned idx, const struct tgsi_full_instruction *finst) { - struct nvfx_sreg src[3], dst, tmp; - struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_src src[3], tmp; + struct nvfx_reg dst; + struct nvfx_reg final_dst; + struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); + struct nvfx_insn insn; + struct nvfx_relocation reloc; + struct nvfx_loop_entry loop; + boolean sat = FALSE; int mask; int ai = -1, ci = -1, ii = -1; int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; + unsigned sub_depth = 0; for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; @@ -430,9 +473,8 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, ai = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { - src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL, - tgsi_src(vpc, fsrc), none, none); + src[i] = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none)); } break; case TGSI_FILE_CONSTANT: @@ -441,9 +483,8 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, ci = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { - src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL, - tgsi_src(vpc, fsrc), none, none); + src[i] = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none)); } break; case TGSI_FILE_IMMEDIATE: @@ -452,9 +493,8 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, ii = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { - src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL, - tgsi_src(vpc, fsrc), none, none); + src[i] = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none)); } break; case TGSI_FILE_TEMPORARY: @@ -466,138 +506,294 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, } } - dst = tgsi_dst(vpc, &finst->Dst[0]); + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + if(src[i].reg.type < 0) + return FALSE; + } + + if(finst->Dst[0].Register.File == TGSI_FILE_ADDRESS && + finst->Instruction.Opcode != TGSI_OPCODE_ARL) + return FALSE; + + final_dst = dst = tgsi_dst(vpc, &finst->Dst[0]); mask = tgsi_mask(finst->Dst[0].Register.WriteMask); + if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) + { + assert(finst->Instruction.Opcode != TGSI_OPCODE_ARL); + if(nvfx->use_nv4x) + sat = TRUE; + else if(dst.type != NVFXSR_TEMP) + dst = temp(vpc); + } switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: - arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none); + nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, abs(src[0]), none, none)); break; case TGSI_OPCODE_ADD: - arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]); + nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, src[1])); break; case TGSI_OPCODE_ARL: - arith(vpc, VEC, ARL, dst, mask, src[0], none, none); + nvfx_vp_emit(vpc, arith(0, VEC, ARL, dst, mask, src[0], none, none)); + break; + case TGSI_OPCODE_CMP: + insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); + insn.cc_update = 1; + nvfx_vp_emit(vpc, insn); + + insn = arith(sat, VEC, MOV, dst, mask, src[2], none, none); + insn.cc_test = NVFX_COND_GE; + nvfx_vp_emit(vpc, insn); + + insn = arith(sat, VEC, MOV, dst, mask, src[1], none, none); + insn.cc_test = NVFX_COND_LT; + nvfx_vp_emit(vpc, insn); break; case TGSI_OPCODE_COS: - arith(vpc, SCA, COS, dst, mask, none, none, src[0]); + nvfx_vp_emit(vpc, arith(sat, SCA, COS, dst, mask, none, none, src[0])); break; + case TGSI_OPCODE_DP2: + tmp = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X | NVFX_VP_MASK_Y, src[0], src[1], none)); + nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, swz(tmp, X, X, X, X), none, swz(tmp, Y, Y, Y, Y))); + break; case TGSI_OPCODE_DP3: - arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, DP3, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_DP4: - arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, DP4, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_DPH: - arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, DPH, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_DST: - arith(vpc, VEC, DST, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, DST, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_EX2: - arith(vpc, SCA, EX2, dst, mask, none, none, src[0]); + nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, src[0])); break; case TGSI_OPCODE_EXP: - arith(vpc, SCA, EXP, dst, mask, none, none, src[0]); + nvfx_vp_emit(vpc, arith(sat, SCA, EXP, dst, mask, none, none, src[0])); break; case TGSI_OPCODE_FLR: - arith(vpc, VEC, FLR, dst, mask, src[0], none, none); + nvfx_vp_emit(vpc, arith(sat, VEC, FLR, dst, mask, src[0], none, none)); break; case TGSI_OPCODE_FRC: - arith(vpc, VEC, FRC, dst, mask, src[0], none, none); + nvfx_vp_emit(vpc, arith(sat, VEC, FRC, dst, mask, src[0], none, none)); break; case TGSI_OPCODE_LG2: - arith(vpc, SCA, LG2, dst, mask, none, none, src[0]); + nvfx_vp_emit(vpc, arith(sat, SCA, LG2, dst, mask, none, none, src[0])); break; case TGSI_OPCODE_LIT: - arith(vpc, SCA, LIT, dst, mask, none, none, src[0]); + nvfx_vp_emit(vpc, arith(sat, SCA, LIT, dst, mask, none, none, src[0])); break; case TGSI_OPCODE_LOG: - arith(vpc, SCA, LOG, dst, mask, none, none, src[0]); + nvfx_vp_emit(vpc, arith(sat, SCA, LOG, dst, mask, none, none, src[0])); break; case TGSI_OPCODE_LRP: - tmp = temp(vpc); - arith(vpc, VEC, MAD, tmp, mask, neg(src[0]), src[2], src[2]); - arith(vpc, VEC, MAD, dst, mask, src[0], src[1], tmp); + tmp = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2])); + nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], tmp)); break; case TGSI_OPCODE_MAD: - arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]); + nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], src[2])); break; case TGSI_OPCODE_MAX: - arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, MAX, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_MIN: - arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, MIN, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_MOV: - arith(vpc, VEC, MOV, dst, mask, src[0], none, none); + nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, src[0], none, none)); break; case TGSI_OPCODE_MUL: - arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, MUL, dst, mask, src[0], src[1], none)); + break; + case TGSI_OPCODE_NOP: break; case TGSI_OPCODE_POW: - tmp = temp(vpc); - arith(vpc, SCA, LG2, tmp, NVFX_VP_MASK_X, none, none, - swz(src[0], X, X, X, X)); - arith(vpc, VEC, MUL, tmp, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(vpc, SCA, EX2, dst, mask, none, none, - swz(tmp, X, X, X, X)); + tmp = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, SCA, LG2, tmp.reg, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X))); + nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none)); + nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X))); break; case TGSI_OPCODE_RCP: - arith(vpc, SCA, RCP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_RET: + nvfx_vp_emit(vpc, arith(sat, SCA, RCP, dst, mask, none, none, src[0])); break; case TGSI_OPCODE_RSQ: - arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0])); + nvfx_vp_emit(vpc, arith(sat, SCA, RSQ, dst, mask, none, none, abs(src[0]))); break; case TGSI_OPCODE_SEQ: - arith(vpc, VEC, SEQ, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, SEQ, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SFL: - arith(vpc, VEC, SFL, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, SFL, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SGE: - arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, SGE, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SGT: - arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, SGT, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SIN: - arith(vpc, SCA, SIN, dst, mask, none, none, src[0]); + nvfx_vp_emit(vpc, arith(sat, SCA, SIN, dst, mask, none, none, src[0])); break; case TGSI_OPCODE_SLE: - arith(vpc, VEC, SLE, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, SLE, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SLT: - arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, SLT, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SNE: - arith(vpc, VEC, SNE, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, SNE, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SSG: - arith(vpc, VEC, SSG, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, SSG, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_STR: - arith(vpc, VEC, STR, dst, mask, src[0], src[1], none); + nvfx_vp_emit(vpc, arith(sat, VEC, STR, dst, mask, src[0], src[1], none)); break; case TGSI_OPCODE_SUB: - arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1])); + nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, neg(src[1]))); break; + case TGSI_OPCODE_TRUNC: + tmp = nvfx_src(temp(vpc)); + insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); + insn.cc_update = 1; + nvfx_vp_emit(vpc, insn); + + nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, abs(src[0]), none, none)); + nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, tmp, none, none)); + + insn = arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none); + insn.cc_test = NVFX_COND_LT; + nvfx_vp_emit(vpc, insn); + break; case TGSI_OPCODE_XPD: - tmp = temp(vpc); - arith(vpc, VEC, MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); + tmp = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none)); + nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp))); + break; + + case TGSI_OPCODE_IF: + insn = arith(0, VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none); + insn.cc_update = 1; + nvfx_vp_emit(vpc, insn); + + reloc.location = vpc->vp->nr_insns; + reloc.target = finst->Label.Label + 1; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + + insn = arith(0, SCA, BRA, none.reg, 0, none, none, none); + insn.cc_test = NVFX_COND_EQ; + insn.cc_swz[0] = insn.cc_swz[1] = insn.cc_swz[2] = insn.cc_swz[3] = 0; + nvfx_vp_emit(vpc, insn); + break; + + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_BRA: + case TGSI_OPCODE_CAL: + reloc.location = vpc->vp->nr_insns; + reloc.target = finst->Label.Label; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + + if(finst->Instruction.Opcode == TGSI_OPCODE_CAL) + insn = arith(0, SCA, CAL, none.reg, 0, none, none, none); + else + insn = arith(0, SCA, BRA, none.reg, 0, none, none, none); + nvfx_vp_emit(vpc, insn); + break; + + case TGSI_OPCODE_RET: + if(sub_depth || !nvfx->use_vp_clipping) { + tmp = none; + tmp.swz[0] = tmp.swz[1] = tmp.swz[2] = tmp.swz[3] = 0; + nvfx_vp_emit(vpc, arith(0, SCA, RET, none.reg, 0, none, none, tmp)); + } else { + reloc.location = vpc->vp->nr_insns; + reloc.target = vpc->info->num_instructions; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); + } + break; + + case TGSI_OPCODE_BGNSUB: + ++sub_depth; + break; + case TGSI_OPCODE_ENDSUB: + --sub_depth; + break; + case TGSI_OPCODE_ENDIF: + /* nothing to do here */ + break; + + case TGSI_OPCODE_BGNLOOP: + loop.cont_target = idx; + loop.brk_target = finst->Label.Label + 1; + util_dynarray_append(&vpc->loop_stack, struct nvfx_loop_entry, loop); + break; + + case TGSI_OPCODE_ENDLOOP: + loop = util_dynarray_pop(&vpc->loop_stack, struct nvfx_loop_entry); + + reloc.location = vpc->vp->nr_insns; + reloc.target = loop.cont_target; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + + nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); + break; + + case TGSI_OPCODE_CONT: + loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry); + + reloc.location = vpc->vp->nr_insns; + reloc.target = loop.cont_target; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + + nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); break; + + case TGSI_OPCODE_BRK: + loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry); + + reloc.location = vpc->vp->nr_insns; + reloc.target = loop.brk_target; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + + nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); + break; + + case TGSI_OPCODE_END: + assert(!sub_depth); + if(nvfx->use_vp_clipping) { + if(idx != (vpc->info->num_instructions - 1)) { + reloc.location = vpc->vp->nr_insns; + reloc.target = vpc->info->num_instructions; + util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); + nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); + } + } else { + if(vpc->vp->nr_insns) + vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; + nvfx_vp_emit(vpc, arith(0, VEC, NOP, none.reg, 0, none, none, none)); + vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; + } + break; + default: NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); return FALSE; } + if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE && !nvfx->use_nv4x) + { + if(!vpc->r_0_1.type) + vpc->r_0_1 = constant(vpc, -1, 0, 1, 0, 0); + nvfx_vp_emit(vpc, arith(0, VEC, MAX, dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), X, X, X, X), none)); + nvfx_vp_emit(vpc, arith(0, VEC, MIN, final_dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), Y, Y, Y, Y), none)); + } + release_temps(vpc); return TRUE; } @@ -643,12 +839,13 @@ nvfx_vertprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, hw = NVFX_VP(INST_DEST_PSZ); break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NVFX_VP(INST_DEST_TC(fdec->Semantic.Index)); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } + hw = (vpc->vp->generic_to_fp_input[fdec->Semantic.Index] & 0xf) - NVFX_FP_OP_INPUT_SRC_TC(0); + if(hw <= 8) + hw = NVFX_VP(INST_DEST_TC(hw)); + else if(hw == 9) /* TODO: this is correct, but how does this overlapping work exactly? */ + hw = NV40_VP_INST_DEST_PSZ; + else + assert(0); break; case TGSI_SEMANTIC_EDGEFLAG: /* not really an error just a fallback */ @@ -659,7 +856,7 @@ nvfx_vertprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, return FALSE; } - vpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); + vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw); return TRUE; } @@ -667,9 +864,40 @@ static boolean nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) { struct tgsi_parse_context p; - int high_temp = -1, high_addr = -1, nr_imm = 0, i; + int high_const = -1, high_temp = -1, high_addr = -1, nr_imm = 0, i; + struct util_semantic_set set; + unsigned char sem_layout[10]; + unsigned num_outputs; + unsigned num_texcoords = nvfx->is_nv4x ? 10 : 8; + + num_outputs = util_semantic_set_from_program_file(&set, vpc->pipe.tokens, TGSI_FILE_OUTPUT); + + if(num_outputs > num_texcoords) { + NOUVEAU_ERR("too many vertex program outputs: %i\n", num_outputs); + return FALSE; + } + util_semantic_layout_from_set(sem_layout, &set, num_texcoords, num_texcoords); + + /* hope 0xf is (0, 0, 0, 1) initialized; otherwise, we are _probably_ not required to do this */ + memset(vpc->vp->generic_to_fp_input, 0x0f, sizeof(vpc->vp->generic_to_fp_input)); + for(int i = 0; i < num_texcoords; ++i) { + if(sem_layout[i] == 0xff) + continue; + //printf("vp: GENERIC[%i] to fpreg %i\n", sem_layout[i], NVFX_FP_OP_INPUT_SRC_TC(0) + i); + vpc->vp->generic_to_fp_input[sem_layout[i]] = 0xf0 | NVFX_FP_OP_INPUT_SRC_TC(i); + } - tgsi_parse_init(&p, vpc->vp->pipe.tokens); + vpc->vp->sprite_fp_input = -1; + for(int i = 0; i < num_texcoords; ++i) + { + if(sem_layout[i] == 0xff) + { + vpc->vp->sprite_fp_input = NVFX_FP_OP_INPUT_SRC_TC(i); + break; + } + } + + tgsi_parse_init(&p, vpc->pipe.tokens); while (!tgsi_parse_end_of_tokens(&p)) { const union tgsi_full_token *tok = &p.FullToken; @@ -690,14 +918,18 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) fdec->Range.Last; } break; -#if 0 /* this would be nice.. except gallium doesn't track it */ case TGSI_FILE_ADDRESS: if (fdec->Range.Last > high_addr) { high_addr = fdec->Range.Last; } break; -#endif + case TGSI_FILE_CONSTANT: + if (fdec->Range.Last > high_const) { + high_const = + fdec->Range.Last; + } + break; case TGSI_FILE_OUTPUT: if (!nvfx_vertprog_parse_decl_output(nvfx, vpc, fdec)) return FALSE; @@ -707,23 +939,6 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) } } break; -#if 1 /* yay, parse instructions looking for address regs instead */ - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - const struct tgsi_full_dst_register *fdst; - - finst = &p.FullToken.FullInstruction; - fdst = &finst->Dst[0]; - - if (fdst->Register.File == TGSI_FILE_ADDRESS) { - if (fdst->Register.Index > high_addr) - high_addr = fdst->Register.Index; - } - - } - break; -#endif default: break; } @@ -731,56 +946,88 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) tgsi_parse_free(&p); if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg)); + vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_reg)); assert(vpc->imm); } if (++high_temp) { - vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); + vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg)); for (i = 0; i < high_temp; i++) vpc->r_temp[i] = temp(vpc); } if (++high_addr) { - vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_sreg)); + vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_reg)); for (i = 0; i < high_addr; i++) - vpc->r_address[i] = temp(vpc); + vpc->r_address[i] = nvfx_reg(NVFXSR_TEMP, i); + } + + if(++high_const) { + vpc->r_const = CALLOC(high_const, sizeof(struct nvfx_reg)); + for (i = 0; i < high_const; i++) + vpc->r_const[i] = constant(vpc, i, 0, 0, 0, 0); } vpc->r_temps_discard = 0; return TRUE; } -static void -nvfx_vertprog_translate(struct nvfx_context *nvfx, - struct nvfx_vertex_program *vp) +DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE) + +static struct nvfx_vertex_program* +nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_state* vps, struct tgsi_shader_info* info) { struct tgsi_parse_context parse; + struct nvfx_vertex_program* vp = NULL; struct nvfx_vpc *vpc = NULL; - struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); + struct util_dynarray insns; int i; - vpc = CALLOC(1, sizeof(struct nvfx_vpc)); + tgsi_parse_init(&parse, vps->tokens); + + vp = CALLOC_STRUCT(nvfx_vertex_program); + if(!vp) + goto out_err; + + vpc = CALLOC_STRUCT(nvfx_vpc); if (!vpc) - return; + goto out_err; + + vpc->nvfx = nvfx; vpc->vp = vp; + vpc->pipe = *vps; + vpc->info = info; + + { + // TODO: use a 64-bit atomic here! + static unsigned long long id = 0; + vp->id = ++id; + } + + /* reserve space for ucps */ + if(nvfx->use_vp_clipping) + { + for(i = 0; i < 6; ++i) + constant(vpc, -1, 0, 0, 0, 0); + } if (!nvfx_vertprog_prepare(nvfx, vpc)) { FREE(vpc); - return; + return NULL; } /* Redirect post-transform vertex position to a temp if user clip * planes are enabled. We need to append code to the vtxprog * to handle clip planes later. */ - if (vp->ucp.nr) { + /* TODO: maybe support patching this depending on whether there are ucps: not sure if it is really matters much */ + if (nvfx->use_vp_clipping) { vpc->r_result[vpc->hpos_idx] = temp(vpc); vpc->r_temps_discard = 0; } - tgsi_parse_init(&parse, vp->pipe.tokens); - + util_dynarray_init(&insns); while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); @@ -803,8 +1050,10 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, case TGSI_TOKEN_TYPE_INSTRUCTION: { const struct tgsi_full_instruction *finst; + unsigned idx = insns.size >> 2; + util_dynarray_append(&insns, unsigned, vp->nr_insns); finst = &parse.FullToken.FullInstruction; - if (!nvfx_vertprog_parse_instruction(nvfx, vpc, finst)) + if (!nvfx_vertprog_parse_instruction(nvfx, vpc, idx, finst)) goto out_err; } break; @@ -813,91 +1062,164 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, } } + util_dynarray_append(&insns, unsigned, vp->nr_insns); + + for(unsigned i = 0; i < vpc->label_relocs.size; i += sizeof(struct nvfx_relocation)) + { + struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)vpc->label_relocs.data + i); + struct nvfx_relocation hw_reloc; + + hw_reloc.location = label_reloc->location; + hw_reloc.target = ((unsigned*)insns.data)[label_reloc->target]; + + //debug_printf("hw %u -> tgsi %u = hw %u\n", hw_reloc.location, label_reloc->target, hw_reloc.target); + + util_dynarray_append(&vp->branch_relocs, struct nvfx_relocation, hw_reloc); + } + util_dynarray_fini(&insns); + util_dynarray_trim(&vp->branch_relocs); + + /* XXX: what if we add a RET before?! make sure we jump here...*/ + /* Write out HPOS if it was redirected to a temp earlier */ if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) { - struct nvfx_sreg hpos = nvfx_sr(NVFXSR_OUTPUT, + struct nvfx_reg hpos = nvfx_reg(NVFXSR_OUTPUT, NVFX_VP(INST_DEST_POS)); - struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; + struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]); - arith(vpc, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none); + nvfx_vp_emit(vpc, arith(0, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none)); } /* Insert code to handle user clip planes */ - for (i = 0; i < vp->ucp.nr; i++) { - struct nvfx_sreg cdst = nvfx_sr(NVFXSR_OUTPUT, - NVFX_VP_INST_DEST_CLIP(i)); - struct nvfx_sreg ceqn = constant(vpc, -1, - nvfx->clip.ucp[i][0], - nvfx->clip.ucp[i][1], - nvfx->clip.ucp[i][2], - nvfx->clip.ucp[i][3]); - struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; - unsigned mask; - - switch (i) { - case 0: case 3: mask = NVFX_VP_MASK_Y; break; - case 1: case 4: mask = NVFX_VP_MASK_Z; break; - case 2: case 5: mask = NVFX_VP_MASK_W; break; - default: - NOUVEAU_ERR("invalid clip dist #%d\n", i); - goto out_err; + if(nvfx->use_vp_clipping) + { + for (i = 0; i < 6; i++) { + struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, NV30_VP_INST_DEST_CLP(i)); + struct nvfx_src ceqn = nvfx_src(nvfx_reg(NVFXSR_CONST, i)); + struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]); + unsigned mask; + + if(nvfx->is_nv4x) + { + switch (i) { + case 0: case 3: mask = NVFX_VP_MASK_Y; break; + case 1: case 4: mask = NVFX_VP_MASK_Z; break; + case 2: case 5: mask = NVFX_VP_MASK_W; break; + default: + NOUVEAU_ERR("invalid clip dist #%d\n", i); + goto out_err; + } + } + else + mask = NVFX_VP_MASK_X; + + nvfx_vp_emit(vpc, arith(0, VEC, DP4, cdst, mask, htmp, ceqn, none)); } + } - arith(vpc, VEC, DP4, cdst, mask, htmp, ceqn, none); + if(debug_get_option_nvfx_dump_vp()) + { + debug_printf("\n"); + tgsi_dump(vpc->pipe.tokens, 0); + + debug_printf("\n%s vertex program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x"); + for (i = 0; i < vp->nr_insns; i++) + debug_printf("%3u: %08x %08x %08x %08x\n", i, vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]); + debug_printf("\n"); } - vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; - vp->translated = TRUE; -out_err: + vp->clip_nr = -1; + vp->exec_start = -1; + +out: tgsi_parse_free(&parse); - if (vpc->r_temp) + if(vpc) { + util_dynarray_fini(&vpc->label_relocs); + util_dynarray_fini(&vpc->loop_stack); FREE(vpc->r_temp); - if (vpc->r_address) FREE(vpc->r_address); - if (vpc->imm) + FREE(vpc->r_const); FREE(vpc->imm); - FREE(vpc); + FREE(vpc); + } + return vp; + +out_err: + FREE(vp); + vp = NULL; + goto out; +} + +static struct nvfx_vertex_program* +nvfx_vertprog_translate_draw_vp(struct nvfx_context *nvfx, struct nvfx_pipe_vertex_program* pvp) +{ + struct nvfx_vertex_program* vp = NULL; + struct pipe_shader_state vps; + struct tgsi_shader_info info; + struct ureg_program *ureg = NULL; + unsigned num_outputs = MIN2(pvp->info.num_outputs, 16); + + ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); + if(ureg == NULL) + return 0; + + for (unsigned i = 0; i < num_outputs; i++) + ureg_MOV(ureg, ureg_DECL_output(ureg, pvp->info.output_semantic_name[i], pvp->info.output_semantic_index[i]), ureg_DECL_vs_input(ureg, i)); + + ureg_END( ureg ); + + vps.tokens = ureg_get_tokens(ureg, 0); + tgsi_scan_shader(vps.tokens, &info); + vp = nvfx_vertprog_translate(nvfx, &vps, &info); + ureg_free_tokens(vps.tokens); + ureg_destroy(ureg); + + return vp; } boolean nvfx_vertprog_validate(struct nvfx_context *nvfx) { - struct pipe_context *pipe = &nvfx->pipe; struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - struct nvfx_vertex_program *vp; + struct nvfx_pipe_vertex_program *pvp = nvfx->vertprog; + struct nvfx_vertex_program* vp; struct pipe_resource *constbuf; - struct pipe_transfer *transfer = NULL; boolean upload_code = FALSE, upload_data = FALSE; int i; if (nvfx->render_mode == HW) { - vp = nvfx->vertprog; - constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; + nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; + vp = pvp->vp; - // TODO: ouch! can't we just use constant slots for these?! - if ((nvfx->dirty & NVFX_NEW_UCP) || - memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) { - nvfx_vertprog_destroy(nvfx, vp); - memcpy(&vp->ucp, &nvfx->clip, sizeof(vp->ucp)); + if(!vp) { + vp = nvfx_vertprog_translate(nvfx, &pvp->pipe, &pvp->info); + if(!vp) + vp = NVFX_VP_FAILED; + pvp->vp = vp; } - } else { - vp = nvfx->swtnl.vertprog; - constbuf = NULL; - } - /* Translate TGSI shader into hw bytecode */ - if (!vp->translated) - { - nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; - nvfx_vertprog_translate(nvfx, vp); - if (!vp->translated) { + if(vp == NVFX_VP_FAILED) { nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; return FALSE; } + + constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; + } else { + vp = pvp->draw_vp; + if(!vp) + { + pvp->draw_vp = vp = nvfx_vertprog_translate_draw_vp(nvfx, pvp); + if(!vp) { + _debug_printf("Error: unable to create a swtnl passthrough vertex shader: aborting."); + abort(); + } + } + constbuf = NULL; } + nvfx->hw_vertprog = vp; + /* Allocate hw vtxprog exec slots */ if (!vp->exec) { struct nouveau_resource *heap = nvfx->screen->vp_exec_heap; @@ -912,7 +1234,11 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) } if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) - assert(0); + { + debug_printf("Vertex shader too long: %u instructions\n", vplen); + nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; + return FALSE; + } } upload_code = TRUE; @@ -931,9 +1257,15 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) } if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) - assert(0); + { + debug_printf("Vertex shader uses too many constants: %u constants\n", vp->nr_consts); + nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; + return FALSE; + } } + //printf("start at %u nc %u\n", vp->data->start, vp->nr_consts); + /*XXX: handle this some day */ assert(vp->data->start >= vp->data_start_min); @@ -946,44 +1278,71 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) * fixup offsets and register IDs. */ if (vp->exec_start != vp->exec->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; + //printf("vp_relocs %u -> %u\n", vp->exec_start, vp->exec->start); + for(unsigned i = 0; i < vp->branch_relocs.size; i += sizeof(struct nvfx_relocation)) + { + struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->branch_relocs.data + i); + uint32_t* hw = vp->insns[reloc->location].data; + unsigned target = vp->exec->start + reloc->target; + + //debug_printf("vp_reloc hw %u -> hw %u\n", reloc->location, target); + + if(!nvfx->is_nv4x) + { + hw[2] &=~ NV30_VP_INST_IADDR_MASK; + hw[2] |= (target & 0x1ff) << NV30_VP_INST_IADDR_SHIFT; + } + else + { + hw[3] &=~ NV40_VP_INST_IADDRL_MASK; + hw[3] |= (target & 7) << NV40_VP_INST_IADDRL_SHIFT; - if (vpi->has_branch_offset) { - assert(0); + hw[2] &=~ NV40_VP_INST_IADDRH_MASK; + hw[2] |= ((target >> 3) & 0x3f) << NV40_VP_INST_IADDRH_SHIFT; } } vp->exec_start = vp->exec->start; } - if (vp->nr_consts && vp->data_start != vp->data->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; + if (vp->data_start != vp->data->start) { + for(unsigned i = 0; i < vp->const_relocs.size; i += sizeof(struct nvfx_relocation)) + { + struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->const_relocs.data + i); + struct nvfx_vertex_program_exec *vpi = &vp->insns[reloc->location]; - if (vpi->const_index >= 0) { - vpi->data[1] &= ~NVFX_VP(INST_CONST_SRC_MASK); - vpi->data[1] |= - (vpi->const_index + vp->data->start) << - NVFX_VP(INST_CONST_SRC_SHIFT); + //printf("reloc %i to %i + %i\n", reloc->location, vp->data->start, reloc->target); - } + vpi->data[1] &= ~NVFX_VP(INST_CONST_SRC_MASK); + vpi->data[1] |= + (reloc->target + vp->data->start) << + NVFX_VP(INST_CONST_SRC_SHIFT); } vp->data_start = vp->data->start; + upload_code = TRUE; } /* Update + Upload constant values */ if (vp->nr_consts) { float *map = NULL; - if (constbuf) { - map = pipe_buffer_map(pipe, constbuf, - PIPE_TRANSFER_READ, - &transfer); + if (constbuf) + map = (float*)nvfx_buffer(constbuf)->data; + + /* + * WAIT_RING(chan, 512 * 6); + for (i = 0; i < 512; i++) { + float v[4] = {0.1, 0,2, 0.3, 0.4}; + OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5)); + OUT_RING(chan, i); + OUT_RINGp(chan, (uint32_t *)v, 4); + printf("frob %i\n", i); } + */ - for (i = 0; i < vp->nr_consts; i++) { + WAIT_RING(chan, 6 * vp->nr_consts); + for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) { struct nvfx_vertex_program_data *vpd = &vp->consts[i]; if (vpd->index >= 0) { @@ -995,45 +1354,36 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) 4 * sizeof(float)); } - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (chan, i + vp->data->start); - OUT_RINGp (chan, (uint32_t *)vpd->value, 4); - } + //printf("upload into %i + %i: %f %f %f %f\n", vp->data->start, i, vpd->value[0], vpd->value[1], vpd->value[2], vpd->value[3]); - if (constbuf) - pipe_buffer_unmap(pipe, constbuf, transfer); + OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5)); + OUT_RING(chan, i + vp->data->start); + OUT_RINGp(chan, (uint32_t *)vpd->value, 4); + } } /* Upload vtxprog */ if (upload_code) { -#if 0 - for (i = 0; i < vp->nr_insns; i++) { - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); - } -#endif - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (chan, vp->exec->start); + WAIT_RING(chan, 2 + 5 * vp->nr_insns); + OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1)); + OUT_RING(chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (chan, vp->insns[i].data, 4); + OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4)); + //printf("%08x %08x %08x %08x\n", vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]); + OUT_RINGp(chan, vp->insns[i].data, 4); } + vp->clip_nr = -1; } - if(nvfx->dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP)) + if(nvfx->dirty & (NVFX_NEW_VERTPROG)) { - WAIT_RING(chan, 7); - OUT_RING(chan, RING_3D(NV34TCL_VP_START_FROM_ID, 1)); + WAIT_RING(chan, 6); + OUT_RING(chan, RING_3D(NV30_3D_VP_START_FROM_ID, 1)); OUT_RING(chan, vp->exec->start); if(nvfx->is_nv4x) { - OUT_RING(chan, RING_3D(NV40TCL_VP_ATTRIB_EN, 2)); + OUT_RING(chan, RING_3D(NV40_3D_VP_ATTRIB_EN, 1)); OUT_RING(chan, vp->ir); - OUT_RING(chan, vp->or); } - OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1)); - OUT_RING(chan, vp->clip_ctrl); } return TRUE; @@ -1042,25 +1392,64 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) void nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp) { - vp->translated = FALSE; - - if (vp->nr_insns) { + if (vp->nr_insns) FREE(vp->insns); - vp->insns = NULL; - vp->nr_insns = 0; - } - if (vp->nr_consts) { + if (vp->nr_consts) FREE(vp->consts); - vp->consts = NULL; - vp->nr_consts = 0; - } nouveau_resource_free(&vp->exec); - vp->exec_start = 0; nouveau_resource_free(&vp->data); - vp->data_start = 0; - vp->data_start_min = 0; - vp->ir = vp->or = vp->clip_ctrl = 0; + util_dynarray_fini(&vp->branch_relocs); + util_dynarray_fini(&vp->const_relocs); + FREE(vp); +} + +static void * +nvfx_vp_state_create(struct pipe_context *pipe, const struct pipe_shader_state *cso) +{ + struct nvfx_pipe_vertex_program *pvp; + + pvp = CALLOC(1, sizeof(struct nvfx_pipe_vertex_program)); + pvp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + tgsi_scan_shader(pvp->pipe.tokens, &pvp->info); + pvp->draw_elements = MAX2(1, MIN2(pvp->info.num_outputs, 16)); + pvp->draw_no_elements = pvp->info.num_outputs == 0; + + return (void *)pvp; +} + +static void +nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->vertprog = hwcso; + nvfx->dirty |= NVFX_NEW_VERTPROG; + nvfx->draw_dirty |= NVFX_NEW_VERTPROG; +} + +static void +nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_pipe_vertex_program *pvp = hwcso; + + if(pvp->draw_vs) + draw_delete_vertex_shader(nvfx->draw, pvp->draw_vs); + if(pvp->vp && pvp->vp != NVFX_VP_FAILED) + nvfx_vertprog_destroy(nvfx, pvp->vp); + if(pvp->draw_vp) + nvfx_vertprog_destroy(nvfx, pvp->draw_vp); + FREE((void*)pvp->pipe.tokens); + FREE(pvp); +} + +void +nvfx_init_vertprog_functions(struct nvfx_context *nvfx) +{ + nvfx->pipe.create_vs_state = nvfx_vp_state_create; + nvfx->pipe.bind_vs_state = nvfx_vp_state_bind; + nvfx->pipe.delete_vs_state = nvfx_vp_state_delete; } |