summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau/nv50
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nouveau/nv50')
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h416
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h2110
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h98
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_blit.h223
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.c317
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h322
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_debug.h25
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h200
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_draw.c88
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_formats.c504
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_miptree.c498
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c445
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.h106
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_push.c309
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query.c399
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_resource.c104
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_resource.h153
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c845
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.h153
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_shader_state.c623
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c1110
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c414
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_stateobj.h78
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h34
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c1353
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_tex.c352
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h306
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_transfer.c412
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_transfer.h27
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c820
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_winsys.h125
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv84_video.c797
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv84_video.h138
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c250
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv84_video_vp.c552
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv98_video.c297
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv98_video.h48
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c159
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c143
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv98_video_vp.c202
40 files changed, 15555 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
new file mode 100644
index 00000000000..dfbef2c6a30
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
@@ -0,0 +1,416 @@
+#ifndef RNNDB_NV50_2D_XML
+#define RNNDB_NV50_2D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/nv50_2d.xml ( 11113 bytes, from 2011-07-09 13:43:58)
+- ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58)
+- ./rnndb/nv_object.xml ( 12912 bytes, from 2012-07-12 09:41:09)
+- ./rnndb/nvchipsets.xml ( 3736 bytes, from 2012-07-12 09:41:09)
+- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-07-09 13:43:58)
+- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58)
+
+Copyright (C) 2006-2011 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- imirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin Koƛcielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+
+#define NV50_2D_DMA_NOTIFY 0x00000180
+
+#define NV50_2D_DMA_DST 0x00000184
+
+#define NV50_2D_DMA_SRC 0x00000188
+
+#define NV50_2D_DMA_COND 0x0000018c
+
+#define NV50_2D_DST_FORMAT 0x00000200
+
+#define NV50_2D_DST_LINEAR 0x00000204
+
+#define NV50_2D_DST_TILE_MODE 0x00000208
+
+#define NV50_2D_DST_DEPTH 0x0000020c
+
+#define NV50_2D_DST_LAYER 0x00000210
+
+#define NV50_2D_DST_PITCH 0x00000214
+
+#define NV50_2D_DST_WIDTH 0x00000218
+
+#define NV50_2D_DST_HEIGHT 0x0000021c
+
+#define NV50_2D_DST_ADDRESS_HIGH 0x00000220
+
+#define NV50_2D_DST_ADDRESS_LOW 0x00000224
+
+#define NV50_2D_UNK228 0x00000228
+
+#define NVC0_2D_UNK228 0x00000228
+
+#define NV50_2D_SRC_FORMAT 0x00000230
+
+#define NV50_2D_SRC_LINEAR 0x00000234
+
+#define NV50_2D_SRC_TILE_MODE 0x00000238
+
+#define NV50_2D_SRC_DEPTH 0x0000023c
+
+#define NV50_2D_SRC_LAYER 0x00000240
+
+#define NVC0_2D_UNK0240 0x00000240
+
+#define NV50_2D_SRC_PITCH 0x00000244
+#define NV50_2D_SRC_PITCH__MAX 0x00040000
+
+#define NV50_2D_SRC_WIDTH 0x00000248
+#define NV50_2D_SRC_WIDTH__MAX 0x00010000
+
+#define NV50_2D_SRC_HEIGHT 0x0000024c
+#define NV50_2D_SRC_HEIGHT__MAX 0x00010000
+
+#define NV50_2D_SRC_ADDRESS_HIGH 0x00000250
+
+#define NV50_2D_SRC_ADDRESS_LOW 0x00000254
+
+#define NV50_2D_UNK258 0x00000258
+
+#define NV50_2D_UNK260 0x00000260
+
+#define NV50_2D_COND_ADDRESS_HIGH 0x00000264
+
+#define NV50_2D_COND_ADDRESS_LOW 0x00000268
+
+#define NV50_2D_COND_MODE 0x0000026c
+#define NV50_2D_COND_MODE_NEVER 0x00000000
+#define NV50_2D_COND_MODE_ALWAYS 0x00000001
+#define NV50_2D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NV50_2D_COND_MODE_EQUAL 0x00000003
+#define NV50_2D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NV50_2D_CLIP_X 0x00000280
+
+#define NV50_2D_CLIP_Y 0x00000284
+
+#define NV50_2D_CLIP_W 0x00000288
+
+#define NV50_2D_CLIP_H 0x0000028c
+
+#define NV50_2D_CLIP_ENABLE 0x00000290
+
+#define NV50_2D_COLOR_KEY_FORMAT 0x00000294
+#define NV50_2D_COLOR_KEY_FORMAT_16BPP 0x00000000
+#define NV50_2D_COLOR_KEY_FORMAT_15BPP 0x00000001
+#define NV50_2D_COLOR_KEY_FORMAT_24BPP 0x00000002
+#define NV50_2D_COLOR_KEY_FORMAT_30BPP 0x00000003
+#define NV50_2D_COLOR_KEY_FORMAT_8BPP 0x00000004
+#define NV50_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005
+#define NV50_2D_COLOR_KEY_FORMAT_32BPP 0x00000006
+
+#define NV50_2D_COLOR_KEY 0x00000298
+
+#define NV50_2D_COLOR_KEY_ENABLE 0x0000029c
+
+#define NV50_2D_ROP 0x000002a0
+
+#define NV50_2D_BETA1 0x000002a4
+#define NV50_2D_BETA1_BETA1__MASK 0x7f800000
+#define NV50_2D_BETA1_BETA1__SHIFT 23
+
+#define NV50_2D_BETA4 0x000002a8
+#define NV50_2D_BETA4_B__MASK 0x000000ff
+#define NV50_2D_BETA4_B__SHIFT 0
+#define NV50_2D_BETA4_G__MASK 0x0000ff00
+#define NV50_2D_BETA4_G__SHIFT 8
+#define NV50_2D_BETA4_R__MASK 0x00ff0000
+#define NV50_2D_BETA4_R__SHIFT 16
+#define NV50_2D_BETA4_A__MASK 0xff000000
+#define NV50_2D_BETA4_A__SHIFT 24
+
+#define NV50_2D_OPERATION 0x000002ac
+#define NV50_2D_OPERATION_SRCCOPY_AND 0x00000000
+#define NV50_2D_OPERATION_ROP_AND 0x00000001
+#define NV50_2D_OPERATION_BLEND 0x00000002
+#define NV50_2D_OPERATION_SRCCOPY 0x00000003
+#define NV50_2D_OPERATION_ROP 0x00000004
+#define NV50_2D_OPERATION_SRCCOPY_PREMULT 0x00000005
+#define NV50_2D_OPERATION_BLEND_PREMULT 0x00000006
+
+#define NV50_2D_PATTERN_OFFSET 0x000002b0
+#define NV50_2D_PATTERN_OFFSET_X__MASK 0x0000003f
+#define NV50_2D_PATTERN_OFFSET_X__SHIFT 0
+#define NV50_2D_PATTERN_OFFSET_Y__MASK 0x00003f00
+#define NV50_2D_PATTERN_OFFSET_Y__SHIFT 8
+
+#define NV50_2D_PATTERN_SELECT 0x000002b4
+#define NV50_2D_PATTERN_SELECT_MONO_8X8 0x00000000
+#define NV50_2D_PATTERN_SELECT_MONO_64X1 0x00000001
+#define NV50_2D_PATTERN_SELECT_MONO_1X64 0x00000002
+#define NV50_2D_PATTERN_SELECT_COLOR 0x00000003
+
+#define NVC0_2D_UNK2DC 0x000002dc
+
+#define NVC0_2D_UNK2E0 0x000002e0
+
+#define NV50_2D_PATTERN_COLOR_FORMAT 0x000002e8
+#define NV50_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000
+#define NV50_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001
+#define NV50_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002
+#define NV50_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003
+#define NV50_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004
+#define NV50_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005
+#define NV50_2D_PATTERN_COLOR_FORMAT_UNK6 0x00000006
+
+#define NV50_2D_PATTERN_MONO_FORMAT 0x000002ec
+#define NV50_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000
+#define NV50_2D_PATTERN_MONO_FORMAT_LE 0x00000001
+
+#define NV50_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0))
+#define NV50_2D_PATTERN_COLOR__ESIZE 0x00000004
+#define NV50_2D_PATTERN_COLOR__LEN 0x00000002
+
+#define NV50_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0))
+#define NV50_2D_PATTERN_BITMAP__ESIZE 0x00000004
+#define NV50_2D_PATTERN_BITMAP__LEN 0x00000002
+
+#define NV50_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0))
+#define NV50_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004
+#define NV50_2D_PATTERN_X8R8G8B8__LEN 0x00000040
+#define NV50_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff
+#define NV50_2D_PATTERN_X8R8G8B8_B__SHIFT 0
+#define NV50_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00
+#define NV50_2D_PATTERN_X8R8G8B8_G__SHIFT 8
+#define NV50_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000
+#define NV50_2D_PATTERN_X8R8G8B8_R__SHIFT 16
+
+#define NV50_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0))
+#define NV50_2D_PATTERN_R5G6B5__ESIZE 0x00000004
+#define NV50_2D_PATTERN_R5G6B5__LEN 0x00000020
+#define NV50_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f
+#define NV50_2D_PATTERN_R5G6B5_B0__SHIFT 0
+#define NV50_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0
+#define NV50_2D_PATTERN_R5G6B5_G0__SHIFT 5
+#define NV50_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800
+#define NV50_2D_PATTERN_R5G6B5_R0__SHIFT 11
+#define NV50_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000
+#define NV50_2D_PATTERN_R5G6B5_B1__SHIFT 16
+#define NV50_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000
+#define NV50_2D_PATTERN_R5G6B5_G1__SHIFT 21
+#define NV50_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000
+#define NV50_2D_PATTERN_R5G6B5_R1__SHIFT 27
+
+#define NV50_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0))
+#define NV50_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004
+#define NV50_2D_PATTERN_X1R5G5B5__LEN 0x00000020
+#define NV50_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f
+#define NV50_2D_PATTERN_X1R5G5B5_B0__SHIFT 0
+#define NV50_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0
+#define NV50_2D_PATTERN_X1R5G5B5_G0__SHIFT 5
+#define NV50_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00
+#define NV50_2D_PATTERN_X1R5G5B5_R0__SHIFT 10
+#define NV50_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000
+#define NV50_2D_PATTERN_X1R5G5B5_B1__SHIFT 16
+#define NV50_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000
+#define NV50_2D_PATTERN_X1R5G5B5_G1__SHIFT 21
+#define NV50_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000
+#define NV50_2D_PATTERN_X1R5G5B5_R1__SHIFT 26
+
+#define NV50_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0))
+#define NV50_2D_PATTERN_Y8__ESIZE 0x00000004
+#define NV50_2D_PATTERN_Y8__LEN 0x00000010
+#define NV50_2D_PATTERN_Y8_Y0__MASK 0x000000ff
+#define NV50_2D_PATTERN_Y8_Y0__SHIFT 0
+#define NV50_2D_PATTERN_Y8_Y1__MASK 0x0000ff00
+#define NV50_2D_PATTERN_Y8_Y1__SHIFT 8
+#define NV50_2D_PATTERN_Y8_Y2__MASK 0x00ff0000
+#define NV50_2D_PATTERN_Y8_Y2__SHIFT 16
+#define NV50_2D_PATTERN_Y8_Y3__MASK 0xff000000
+#define NV50_2D_PATTERN_Y8_Y3__SHIFT 24
+
+#define NVC0_2D_DRAW_COLOR_LONG(i0) (0x00000540 + 0x4*(i0))
+#define NVC0_2D_DRAW_COLOR_LONG__ESIZE 0x00000004
+#define NVC0_2D_DRAW_COLOR_LONG__LEN 0x00000004
+
+#define NV50_2D_DRAW_SHAPE 0x00000580
+#define NV50_2D_DRAW_SHAPE_POINTS 0x00000000
+#define NV50_2D_DRAW_SHAPE_LINES 0x00000001
+#define NV50_2D_DRAW_SHAPE_LINE_STRIP 0x00000002
+#define NV50_2D_DRAW_SHAPE_TRIANGLES 0x00000003
+#define NV50_2D_DRAW_SHAPE_RECTANGLES 0x00000004
+
+#define NV50_2D_DRAW_COLOR_FORMAT 0x00000584
+
+#define NV50_2D_DRAW_COLOR 0x00000588
+
+#define NV50_2D_UNK58C 0x0000058c
+#define NV50_2D_UNK58C_0 0x00000001
+#define NV50_2D_UNK58C_1 0x00000010
+#define NV50_2D_UNK58C_2 0x00000100
+#define NV50_2D_UNK58C_3 0x00001000
+
+#define NV50_2D_DRAW_POINT16 0x000005e0
+#define NV50_2D_DRAW_POINT16_X__MASK 0x0000ffff
+#define NV50_2D_DRAW_POINT16_X__SHIFT 0
+#define NV50_2D_DRAW_POINT16_Y__MASK 0xffff0000
+#define NV50_2D_DRAW_POINT16_Y__SHIFT 16
+
+#define NV50_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0))
+#define NV50_2D_DRAW_POINT32_X__ESIZE 0x00000008
+#define NV50_2D_DRAW_POINT32_X__LEN 0x00000040
+
+#define NV50_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0))
+#define NV50_2D_DRAW_POINT32_Y__ESIZE 0x00000008
+#define NV50_2D_DRAW_POINT32_Y__LEN 0x00000040
+
+#define NV50_2D_SIFC_BITMAP_ENABLE 0x00000800
+
+#define NV50_2D_SIFC_FORMAT 0x00000804
+
+#define NV50_2D_SIFC_BITMAP_FORMAT 0x00000808
+#define NV50_2D_SIFC_BITMAP_FORMAT_I1 0x00000000
+#define NV50_2D_SIFC_BITMAP_FORMAT_I4 0x00000001
+#define NV50_2D_SIFC_BITMAP_FORMAT_I8 0x00000002
+
+#define NV50_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c
+
+#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810
+#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000
+#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001
+#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002
+
+#define NV50_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814
+
+#define NV50_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818
+
+#define NV50_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c
+
+#define NV50_2D_SIFC_WIDTH 0x00000838
+
+#define NV50_2D_SIFC_HEIGHT 0x0000083c
+
+#define NV50_2D_SIFC_DX_DU_FRACT 0x00000840
+
+#define NV50_2D_SIFC_DX_DU_INT 0x00000844
+
+#define NV50_2D_SIFC_DY_DV_FRACT 0x00000848
+
+#define NV50_2D_SIFC_DY_DV_INT 0x0000084c
+
+#define NV50_2D_SIFC_DST_X_FRACT 0x00000850
+
+#define NV50_2D_SIFC_DST_X_INT 0x00000854
+
+#define NV50_2D_SIFC_DST_Y_FRACT 0x00000858
+
+#define NV50_2D_SIFC_DST_Y_INT 0x0000085c
+
+#define NV50_2D_SIFC_DATA 0x00000860
+
+#define NV50_2D_UNK0870 0x00000870
+
+#define NV50_2D_UNK0880 0x00000880
+
+#define NV50_2D_UNK0884 0x00000884
+
+#define NV50_2D_UNK0888 0x00000888
+
+#define NV50_2D_BLIT_CONTROL 0x0000088c
+#define NV50_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001
+#define NV50_2D_BLIT_CONTROL_ORIGIN__SHIFT 0
+#define NV50_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000
+#define NV50_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001
+#define NV50_2D_BLIT_CONTROL_FILTER__MASK 0x00000010
+#define NV50_2D_BLIT_CONTROL_FILTER__SHIFT 4
+#define NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000
+#define NV50_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010
+
+#define NV50_2D_BLIT_DST_X 0x000008b0
+
+#define NV50_2D_BLIT_DST_Y 0x000008b4
+
+#define NV50_2D_BLIT_DST_W 0x000008b8
+
+#define NV50_2D_BLIT_DST_H 0x000008bc
+
+#define NV50_2D_BLIT_DU_DX_FRACT 0x000008c0
+
+#define NV50_2D_BLIT_DU_DX_INT 0x000008c4
+
+#define NV50_2D_BLIT_DV_DY_FRACT 0x000008c8
+
+#define NV50_2D_BLIT_DV_DY_INT 0x000008cc
+
+#define NV50_2D_BLIT_SRC_X_FRACT 0x000008d0
+
+#define NV50_2D_BLIT_SRC_X_INT 0x000008d4
+
+#define NV50_2D_BLIT_SRC_Y_FRACT 0x000008d8
+
+#define NV50_2D_BLIT_SRC_Y_INT 0x000008dc
+
+#define NVC0_2D_FIRMWARE(i0) (0x000008e0 + 0x4*(i0))
+#define NVC0_2D_FIRMWARE__ESIZE 0x00000004
+#define NVC0_2D_FIRMWARE__LEN 0x00000020
+
+
+#endif /* RNNDB_NV50_2D_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h
new file mode 100644
index 00000000000..9dff8b2dd13
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h
@@ -0,0 +1,2110 @@
+#ifndef RNNDB_NV50_3D_XML
+#define RNNDB_NV50_3D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/nv50_3d.xml ( 65226 bytes, from 2012-01-28 13:46:30)
+- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv_3ddefs.xml ( 16394 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv_object.xml ( 12672 bytes, from 2011-08-11 18:25:12)
+
+Copyright (C) 2006-2012 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- imirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin Koƛcielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NV50_3D_DMA_NOTIFY 0x00000180
+
+#define NV50_3D_DMA_ZETA 0x00000184
+
+#define NV50_3D_DMA_QUERY 0x00000188
+
+#define NV50_3D_DMA_VTXBUF 0x0000018c
+
+#define NV50_3D_DMA_LOCAL 0x00000190
+
+#define NV50_3D_DMA_STACK 0x00000194
+
+#define NV50_3D_DMA_CODE_CB 0x00000198
+
+#define NV50_3D_DMA_TSC 0x0000019c
+
+#define NV50_3D_DMA_TIC 0x000001a0
+
+#define NV50_3D_DMA_TEXTURE 0x000001a4
+
+#define NV50_3D_DMA_STRMOUT 0x000001a8
+
+#define NV50_3D_DMA_CLIPID 0x000001ac
+
+#define NV50_3D_DMA_COLOR(i0) (0x000001c0 + 0x4*(i0))
+#define NV50_3D_DMA_COLOR__ESIZE 0x00000004
+#define NV50_3D_DMA_COLOR__LEN 0x00000008
+
+#define NV50_3D_RT(i0) (0x00000200 + 0x20*(i0))
+#define NV50_3D_RT__ESIZE 0x00000020
+#define NV50_3D_RT__LEN 0x00000008
+
+#define NV50_3D_RT_ADDRESS_HIGH(i0) (0x00000200 + 0x20*(i0))
+
+#define NV50_3D_RT_ADDRESS_LOW(i0) (0x00000204 + 0x20*(i0))
+
+#define NV50_3D_RT_FORMAT(i0) (0x00000208 + 0x20*(i0))
+
+#define NV50_3D_RT_TILE_MODE(i0) (0x0000020c + 0x20*(i0))
+#define NV50_3D_RT_TILE_MODE_X__MASK 0x0000000f
+#define NV50_3D_RT_TILE_MODE_X__SHIFT 0
+#define NV50_3D_RT_TILE_MODE_Y__MASK 0x000000f0
+#define NV50_3D_RT_TILE_MODE_Y__SHIFT 4
+#define NV50_3D_RT_TILE_MODE_Z__MASK 0x00000f00
+#define NV50_3D_RT_TILE_MODE_Z__SHIFT 8
+
+#define NV50_3D_RT_LAYER_STRIDE(i0) (0x00000210 + 0x20*(i0))
+#define NV50_3D_RT_LAYER_STRIDE__SHR 2
+
+#define NV50_3D_RT_UNK14(i0) (0x00000214 + 0x20*(i0))
+
+#define NV50_3D_VTX_ATTR_1F(i0) (0x00000300 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_1F__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_1F__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_2H(i0) (0x00000340 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_2H__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_2H__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_2H_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_2H_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_2H_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_2H_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_2F_X(i0) (0x00000380 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_2F_X__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_2F_X__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_2F_Y(i0) (0x00000384 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_2F_Y__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_2F_Y__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_3F_X(i0) (0x00000400 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_3F_X__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_3F_X__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_3F_Y(i0) (0x00000404 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_3F_Y__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_3F_Y__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_3F_Z(i0) (0x00000408 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_3F_Z__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_3F_Z__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4F_X(i0) (0x00000500 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_4F_X__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_4F_X__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4F_Y(i0) (0x00000504 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_4F_Y__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_4F_Y__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4F_Z(i0) (0x00000508 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_4F_Z__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_4F_Z__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4F_W(i0) (0x0000050c + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_4F_W__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_4F_W__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4H_0(i0) (0x00000600 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4H_0__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4H_0__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4H_0_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4H_0_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4H_0_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4H_0_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4H_1(i0) (0x00000604 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4H_1__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4H_1__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4H_1_Z__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4H_1_Z__SHIFT 0
+#define NV50_3D_VTX_ATTR_4H_1_W__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4H_1_W__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_2I(i0) (0x00000680 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_2I__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_2I__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_2I_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_2I_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_2I_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_2I_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_2NI(i0) (0x000006c0 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_2NI__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_2NI__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_2NI_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_2NI_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_2NI_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_2NI_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4I_0(i0) (0x00000700 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4I_0__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4I_0__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4I_0_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4I_0_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4I_0_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4I_0_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4I_1(i0) (0x00000704 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4I_1__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4I_1__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4I_1_Z__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4I_1_Z__SHIFT 0
+#define NV50_3D_VTX_ATTR_4I_1_W__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4I_1_W__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4NI_0(i0) (0x00000780 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4NI_0__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4NI_0__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4NI_0_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4NI_0_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4NI_0_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4NI_0_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4NI_1(i0) (0x00000784 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4NI_1__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4NI_1__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4NI_1_Z__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4NI_1_Z__SHIFT 0
+#define NV50_3D_VTX_ATTR_4NI_1_W__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4NI_1_W__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4UB(i0) (0x00000800 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_4UB__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_4UB__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4UB_X__MASK 0x000000ff
+#define NV50_3D_VTX_ATTR_4UB_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4UB_Y__MASK 0x0000ff00
+#define NV50_3D_VTX_ATTR_4UB_Y__SHIFT 8
+#define NV50_3D_VTX_ATTR_4UB_Z__MASK 0x00ff0000
+#define NV50_3D_VTX_ATTR_4UB_Z__SHIFT 16
+#define NV50_3D_VTX_ATTR_4UB_W__MASK 0xff000000
+#define NV50_3D_VTX_ATTR_4UB_W__SHIFT 24
+
+#define NV50_3D_VTX_ATTR_4B(i0) (0x00000840 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_4B__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_4B__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4B_X__MASK 0x000000ff
+#define NV50_3D_VTX_ATTR_4B_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4B_Y__MASK 0x0000ff00
+#define NV50_3D_VTX_ATTR_4B_Y__SHIFT 8
+#define NV50_3D_VTX_ATTR_4B_Z__MASK 0x00ff0000
+#define NV50_3D_VTX_ATTR_4B_Z__SHIFT 16
+#define NV50_3D_VTX_ATTR_4B_W__MASK 0xff000000
+#define NV50_3D_VTX_ATTR_4B_W__SHIFT 24
+
+#define NV50_3D_VTX_ATTR_4NUB(i0) (0x00000880 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_4NUB__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_4NUB__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4NUB_X__MASK 0x000000ff
+#define NV50_3D_VTX_ATTR_4NUB_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4NUB_Y__MASK 0x0000ff00
+#define NV50_3D_VTX_ATTR_4NUB_Y__SHIFT 8
+#define NV50_3D_VTX_ATTR_4NUB_Z__MASK 0x00ff0000
+#define NV50_3D_VTX_ATTR_4NUB_Z__SHIFT 16
+#define NV50_3D_VTX_ATTR_4NUB_W__MASK 0xff000000
+#define NV50_3D_VTX_ATTR_4NUB_W__SHIFT 24
+
+#define NV50_3D_VTX_ATTR_4NB(i0) (0x000008c0 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_4NB__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_4NB__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4NB_X__MASK 0x000000ff
+#define NV50_3D_VTX_ATTR_4NB_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4NB_Y__MASK 0x0000ff00
+#define NV50_3D_VTX_ATTR_4NB_Y__SHIFT 8
+#define NV50_3D_VTX_ATTR_4NB_Z__MASK 0x00ff0000
+#define NV50_3D_VTX_ATTR_4NB_Z__SHIFT 16
+#define NV50_3D_VTX_ATTR_4NB_W__MASK 0xff000000
+#define NV50_3D_VTX_ATTR_4NB_W__SHIFT 24
+
+#define NV50_3D_VERTEX_ARRAY_FETCH(i0) (0x00000900 + 0x10*(i0))
+#define NV50_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010
+#define NV50_3D_VERTEX_ARRAY_FETCH__LEN 0x00000010
+#define NV50_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff
+#define NV50_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0
+#define NV50_3D_VERTEX_ARRAY_FETCH_ENABLE 0x20000000
+
+#define NV50_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00000904 + 0x10*(i0))
+#define NV50_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010
+#define NV50_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000010
+
+#define NV50_3D_VERTEX_ARRAY_START_LOW(i0) (0x00000908 + 0x10*(i0))
+#define NV50_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010
+#define NV50_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000010
+
+#define NV50_3D_VERTEX_ARRAY_DIVISOR(i0) (0x0000090c + 0x10*(i0))
+#define NV50_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010
+#define NV50_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_SCALE_X__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_SCALE_Y__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_SCALE_Z__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0))
+#define NV50_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0))
+#define NV50_3D_VIEWPORT_HORIZ__ESIZE 0x00000010
+#define NV50_3D_VIEWPORT_HORIZ__LEN 0x00000010
+#define NV50_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff
+#define NV50_3D_VIEWPORT_HORIZ_X__SHIFT 0
+#define NV50_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000
+#define NV50_3D_VIEWPORT_HORIZ_W__SHIFT 16
+
+#define NV50_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0))
+#define NV50_3D_VIEWPORT_VERT__ESIZE 0x00000010
+#define NV50_3D_VIEWPORT_VERT__LEN 0x00000010
+#define NV50_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff
+#define NV50_3D_VIEWPORT_VERT_Y__SHIFT 0
+#define NV50_3D_VIEWPORT_VERT_H__MASK 0xffff0000
+#define NV50_3D_VIEWPORT_VERT_H__SHIFT 16
+
+#define NV50_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0))
+#define NV50_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010
+#define NV50_3D_DEPTH_RANGE_NEAR__LEN 0x00000010
+
+#define NV50_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0))
+#define NV50_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010
+#define NV50_3D_DEPTH_RANGE_FAR__LEN 0x00000010
+
+#define NV50_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0))
+#define NV50_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008
+#define NV50_3D_CLIP_RECT_HORIZ__LEN 0x00000008
+#define NV50_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff
+#define NV50_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0
+#define NV50_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000
+#define NV50_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16
+
+#define NV50_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0))
+#define NV50_3D_CLIP_RECT_VERT__ESIZE 0x00000008
+#define NV50_3D_CLIP_RECT_VERT__LEN 0x00000008
+#define NV50_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff
+#define NV50_3D_CLIP_RECT_VERT_MIN__SHIFT 0
+#define NV50_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000
+#define NV50_3D_CLIP_RECT_VERT_MAX__SHIFT 16
+
+#define NV50_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0))
+#define NV50_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008
+#define NV50_3D_CLIPID_REGION_HORIZ__LEN 0x00000004
+#define NV50_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff
+#define NV50_3D_CLIPID_REGION_HORIZ_X__SHIFT 0
+#define NV50_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000
+#define NV50_3D_CLIPID_REGION_HORIZ_W__SHIFT 16
+
+#define NV50_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0))
+#define NV50_3D_CLIPID_REGION_VERT__ESIZE 0x00000008
+#define NV50_3D_CLIPID_REGION_VERT__LEN 0x00000004
+#define NV50_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff
+#define NV50_3D_CLIPID_REGION_VERT_Y__SHIFT 0
+#define NV50_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000
+#define NV50_3D_CLIPID_REGION_VERT_H__SHIFT 16
+
+#define NV50_3D_UNK0D60 0x00000d60
+
+#define NV50_3D_UNK0D64 0x00000d64
+
+#define NV50_3D_COUNTER_ENABLE 0x00000d68
+#define NV50_3D_COUNTER_ENABLE_VFETCH_VERTICES 0x00000001
+#define NV50_3D_COUNTER_ENABLE_VFETCH_PRIMITIVES 0x00000002
+#define NV50_3D_COUNTER_ENABLE_VP_LAUNCHES 0x00000004
+#define NV50_3D_COUNTER_ENABLE_GP_LAUNCHES 0x00000008
+#define NV50_3D_COUNTER_ENABLE_GP_PRIMITIVES_OUT 0x00000010
+#define NV50_3D_COUNTER_ENABLE_TRANSFORM_FEEDBACK 0x00000020
+#define NV50_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000040
+#define NV50_3D_COUNTER_ENABLE_RAST_PRIMITIVES_PRECLIP 0x00000080
+#define NV50_3D_COUNTER_ENABLE_RAST_PRIMITIVES_POSTCLIP 0x00000100
+#define NV50_3D_COUNTER_ENABLE_FP_PIXELS 0x00000200
+#define NV84_3D_COUNTER_ENABLE_UNK0A 0x00000400
+
+#define NV50_3D_UNK0D6C(i0) (0x00000d6c + 0x4*(i0))
+#define NV50_3D_UNK0D6C__ESIZE 0x00000004
+#define NV50_3D_UNK0D6C__LEN 0x00000002
+#define NV50_3D_UNK0D6C_X__MASK 0x0000ffff
+#define NV50_3D_UNK0D6C_X__SHIFT 0
+#define NV50_3D_UNK0D6C_Y__MASK 0xffff0000
+#define NV50_3D_UNK0D6C_Y__SHIFT 16
+
+#define NV50_3D_VERTEX_BUFFER_FIRST 0x00000d74
+
+#define NV50_3D_VERTEX_BUFFER_COUNT 0x00000d78
+
+#define NV50_3D_UNK0D7C 0x00000d7c
+
+#define NV50_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0))
+#define NV50_3D_CLEAR_COLOR__ESIZE 0x00000004
+#define NV50_3D_CLEAR_COLOR__LEN 0x00000004
+
+#define NV50_3D_CLEAR_DEPTH 0x00000d90
+
+#define NV50_3D_STACK_ADDRESS_HIGH 0x00000d94
+
+#define NV50_3D_STACK_ADDRESS_LOW 0x00000d98
+
+#define NV50_3D_STACK_SIZE_LOG 0x00000d9c
+
+#define NV50_3D_CLEAR_STENCIL 0x00000da0
+
+#define NV50_3D_STRMOUT_PARAMS_LATCH 0x00000da4
+
+#define NV50_3D_STRMOUT_PRIMITIVE_LIMIT 0x00000da8
+
+#define NV50_3D_POLYGON_MODE_FRONT 0x00000dac
+#define NV50_3D_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NV50_3D_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NV50_3D_POLYGON_MODE_FRONT_FILL 0x00001b02
+
+#define NV50_3D_POLYGON_MODE_BACK 0x00000db0
+#define NV50_3D_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NV50_3D_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NV50_3D_POLYGON_MODE_BACK_FILL 0x00001b02
+
+#define NV50_3D_POLYGON_SMOOTH_ENABLE 0x00000db4
+
+#define NV50_3D_UNK0DB8 0x00000db8
+
+#define NV50_3D_ZCULL_UNK0DBC 0x00000dbc
+#define NV50_3D_ZCULL_UNK0DBC_UNK0 0x00000001
+#define NV50_3D_ZCULL_UNK0DBC_UNK16__MASK 0x00030000
+#define NV50_3D_ZCULL_UNK0DBC_UNK16__SHIFT 16
+
+#define NV50_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0
+
+#define NV50_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4
+
+#define NV50_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8
+
+#define NV50_3D_UNK0DCC 0x00000dcc
+
+#define NV50_3D_VTX_ATTR_MASK_UNK0DD0(i0) (0x00000dd0 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_MASK_UNK0DD0__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_MASK_UNK0DD0__LEN 0x00000002
+
+#define NV50_3D_ZCULL_UNK0DD8 0x00000dd8
+#define NV50_3D_ZCULL_UNK0DD8_UNK0__MASK 0x00000007
+#define NV50_3D_ZCULL_UNK0DD8_UNK0__SHIFT 0
+#define NVA3_3D_ZCULL_UNK0DD8_UNK9 0x00000200
+#define NV50_3D_ZCULL_UNK0DD8_UNK16__MASK 0xffff0000
+#define NV50_3D_ZCULL_UNK0DD8_UNK16__SHIFT 16
+
+#define NV50_3D_UNK0DDC 0x00000ddc
+
+#define NV50_3D_UNK0DE0 0x00000de0
+
+#define NV50_3D_WATCHDOG_TIMER 0x00000de4
+
+#define NV50_3D_UNK0DE8 0x00000de8
+
+#define NV50_3D_UNK0DEC 0x00000dec
+
+#define NV50_3D_UNK0DF0 0x00000df0
+#define NV50_3D_UNK0DF0_UNK0 0x00000001
+#define NV50_3D_UNK0DF0_UNK1__MASK 0x00000ff0
+#define NV50_3D_UNK0DF0_UNK1__SHIFT 4
+
+#define NV50_3D_UNK0DF4 0x00000df4
+
+#define NV50_3D_WINDOW_OFFSET_X 0x00000df8
+
+#define NV50_3D_WINDOW_OFFSET_Y 0x00000dfc
+
+#define NV50_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0))
+#define NV50_3D_SCISSOR_ENABLE__ESIZE 0x00000010
+#define NV50_3D_SCISSOR_ENABLE__LEN 0x00000010
+
+#define NV50_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0))
+#define NV50_3D_SCISSOR_HORIZ__ESIZE 0x00000010
+#define NV50_3D_SCISSOR_HORIZ__LEN 0x00000010
+#define NV50_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff
+#define NV50_3D_SCISSOR_HORIZ_MIN__SHIFT 0
+#define NV50_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000
+#define NV50_3D_SCISSOR_HORIZ_MAX__SHIFT 16
+
+#define NV50_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0))
+#define NV50_3D_SCISSOR_VERT__ESIZE 0x00000010
+#define NV50_3D_SCISSOR_VERT__LEN 0x00000010
+#define NV50_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff
+#define NV50_3D_SCISSOR_VERT_MIN__SHIFT 0
+#define NV50_3D_SCISSOR_VERT_MAX__MASK 0xffff0000
+#define NV50_3D_SCISSOR_VERT_MAX__SHIFT 16
+
+#define NV50_3D_CB_ADDR 0x00000f00
+#define NV50_3D_CB_ADDR_ID__MASK 0x003fff00
+#define NV50_3D_CB_ADDR_ID__SHIFT 8
+#define NV50_3D_CB_ADDR_BUFFER__MASK 0x0000007f
+#define NV50_3D_CB_ADDR_BUFFER__SHIFT 0
+
+#define NV50_3D_CB_DATA(i0) (0x00000f04 + 0x4*(i0))
+#define NV50_3D_CB_DATA__ESIZE 0x00000004
+#define NV50_3D_CB_DATA__LEN 0x00000010
+
+#define NV50_3D_LOCAL_WARPS_LOG_ALLOC 0x00000f44
+
+#define NV50_3D_LOCAL_WARPS_NO_CLAMP 0x00000f48
+
+#define NV50_3D_STACK_WARPS_LOG_ALLOC 0x00000f4c
+
+#define NV50_3D_STACK_WARPS_NO_CLAMP 0x00000f50
+
+#define NV50_3D_STENCIL_BACK_FUNC_REF 0x00000f54
+
+#define NV50_3D_STENCIL_BACK_MASK 0x00000f58
+
+#define NV50_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c
+
+#define NV50_3D_UNK0F60(i0) (0x00000f60 + 0x4*(i0))
+#define NV50_3D_UNK0F60__ESIZE 0x00000004
+#define NV50_3D_UNK0F60__LEN 0x00000004
+
+#define NV50_3D_GP_ADDRESS_HIGH 0x00000f70
+
+#define NV50_3D_GP_ADDRESS_LOW 0x00000f74
+
+#define NV50_3D_UNK0F78 0x00000f78
+
+#define NV50_3D_VP_ADDRESS_HIGH 0x00000f7c
+
+#define NV50_3D_VP_ADDRESS_LOW 0x00000f80
+
+#define NV50_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84
+
+#define NV50_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88
+
+#define NV50_3D_UNK0F8C 0x00000f8c
+
+#define NV50_3D_COLOR_MASK_COMMON 0x00000f90
+
+#define NV50_3D_UNK0F94 0x00000f94
+
+#define NV50_3D_UNK0F98 0x00000f98
+
+#define NV50_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0))
+#define NV50_3D_DEPTH_BOUNDS__ESIZE 0x00000004
+#define NV50_3D_DEPTH_BOUNDS__LEN 0x00000002
+
+#define NV50_3D_FP_ADDRESS_HIGH 0x00000fa4
+
+#define NV50_3D_FP_ADDRESS_LOW 0x00000fa8
+
+#define NV50_3D_UNK0FAC 0x00000fac
+#define NV50_3D_UNK0FAC_UNK0 0x00000001
+#define NVA0_3D_UNK0FAC_UNK2 0x00000002
+#define NV50_3D_UNK0FAC_UNK1__MASK 0x000ffff0
+#define NV50_3D_UNK0FAC_UNK1__SHIFT 4
+
+#define NV50_3D_UNK0FB0 0x00000fb0
+
+#define NV50_3D_UNK0FB4 0x00000fb4
+
+#define NV50_3D_UNK0FB8 0x00000fb8
+
+#define NV50_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0))
+#define NV50_3D_MSAA_MASK__ESIZE 0x00000004
+#define NV50_3D_MSAA_MASK__LEN 0x00000004
+
+#define NV50_3D_CLIPID_ADDRESS_HIGH 0x00000fcc
+
+#define NV50_3D_CLIPID_ADDRESS_LOW 0x00000fd0
+
+#define NV50_3D_SEMANTIC_VIEWPORT 0x00000fd4
+#define NV50_3D_SEMANTIC_VIEWPORT_VIEWPORT_ID__MASK 0x000000ff
+#define NV50_3D_SEMANTIC_VIEWPORT_VIEWPORT_ID__SHIFT 0
+
+#define NV50_3D_UNK0FD8 0x00000fd8
+#define NV50_3D_UNK0FD8_UNK0 0x00000001
+#define NV50_3D_UNK0FD8_UNK1 0x00000010
+
+#define NV50_3D_UNK0FDC 0x00000fdc
+
+#define NV50_3D_ZETA_ADDRESS_HIGH 0x00000fe0
+
+#define NV50_3D_ZETA_ADDRESS_LOW 0x00000fe4
+
+#define NV50_3D_ZETA_FORMAT 0x00000fe8
+
+#define NV50_3D_ZETA_TILE_MODE 0x00000fec
+
+#define NV50_3D_ZETA_LAYER_STRIDE 0x00000ff0
+#define NV50_3D_ZETA_LAYER_STRIDE__SHR 2
+
+#define NV50_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4
+#define NV50_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000
+#define NV50_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16
+#define NV50_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff
+#define NV50_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0
+
+#define NV50_3D_SCREEN_SCISSOR_VERT 0x00000ff8
+#define NV50_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000
+#define NV50_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16
+#define NV50_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff
+#define NV50_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0
+
+#define NV50_3D_UNK0FFC 0x00000ffc
+
+#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001000 + 0x4*(i0))
+#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004
+#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000010
+
+#define NV50_3D_UNK1040(i0) (0x00001040 + 0x4*(i0))
+#define NV50_3D_UNK1040__ESIZE 0x00000004
+#define NV50_3D_UNK1040__LEN 0x00000010
+
+#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001080 + 0x8*(i0))
+#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008
+#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000010
+
+#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001084 + 0x8*(i0))
+#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008
+#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000010
+
+#define NV50_3D_UNK1100 0x00001100
+
+#define NV84_3D_UNK1104 0x00001104
+#define NV84_3D_UNK1104_0__MASK 0x0000ffff
+#define NV84_3D_UNK1104_0__SHIFT 0
+#define NV84_3D_UNK1104_0__MAX 0x00002000
+#define NV84_3D_UNK1104_0__ALIGN 0x00000040
+#define NV84_3D_UNK1104_1__MASK 0xffff0000
+#define NV84_3D_UNK1104_1__SHIFT 16
+#define NV84_3D_UNK1104_1__MAX 0x00002000
+#define NV84_3D_UNK1104_1__ALIGN 0x00000040
+
+#define NV84_3D_UNK1108 0x00001108
+#define NV84_3D_UNK1108_0 0x00000001
+#define NV84_3D_UNK1108_1 0x00000010
+
+#define NV84_3D_UNK110C 0x0000110c
+
+#define NV84_3D_UNK1110 0x00001110
+
+#define NV84_3D_WRCACHE_FLUSH 0x00001114
+
+#define NV84_3D_VERTEX_ID_BASE 0x00001118
+
+#define NV84_3D_PRIMITIVE_ID 0x0000111c
+
+#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT(i0) (0x00001120 + 0x4*(i0))
+#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT__ESIZE 0x00000004
+#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT__LEN 0x00000004
+
+#define NVA3_3D_VP_ATTR_EN_ALT(i0) (0x00001130 + 0x4*(i0))
+#define NVA3_3D_VP_ATTR_EN_ALT__ESIZE 0x00000004
+#define NVA3_3D_VP_ATTR_EN_ALT__LEN 0x00000004
+#define NVA3_3D_VP_ATTR_EN_ALT_7__MASK 0xf0000000
+#define NVA3_3D_VP_ATTR_EN_ALT_7__SHIFT 28
+#define NVA3_3D_VP_ATTR_EN_ALT_7_X 0x10000000
+#define NVA3_3D_VP_ATTR_EN_ALT_7_Y 0x20000000
+#define NVA3_3D_VP_ATTR_EN_ALT_7_Z 0x40000000
+#define NVA3_3D_VP_ATTR_EN_ALT_7_W 0x80000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6__MASK 0x0f000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6__SHIFT 24
+#define NVA3_3D_VP_ATTR_EN_ALT_6_X 0x01000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6_Y 0x02000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6_Z 0x04000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6_W 0x08000000
+#define NVA3_3D_VP_ATTR_EN_ALT_5__MASK 0x00f00000
+#define NVA3_3D_VP_ATTR_EN_ALT_5__SHIFT 20
+#define NVA3_3D_VP_ATTR_EN_ALT_5_X 0x00100000
+#define NVA3_3D_VP_ATTR_EN_ALT_5_Y 0x00200000
+#define NVA3_3D_VP_ATTR_EN_ALT_5_Z 0x00400000
+#define NVA3_3D_VP_ATTR_EN_ALT_5_W 0x00800000
+#define NVA3_3D_VP_ATTR_EN_ALT_4__MASK 0x000f0000
+#define NVA3_3D_VP_ATTR_EN_ALT_4__SHIFT 16
+#define NVA3_3D_VP_ATTR_EN_ALT_4_X 0x00010000
+#define NVA3_3D_VP_ATTR_EN_ALT_4_Y 0x00020000
+#define NVA3_3D_VP_ATTR_EN_ALT_4_Z 0x00040000
+#define NVA3_3D_VP_ATTR_EN_ALT_4_W 0x00080000
+#define NVA3_3D_VP_ATTR_EN_ALT_3__MASK 0x0000f000
+#define NVA3_3D_VP_ATTR_EN_ALT_3__SHIFT 12
+#define NVA3_3D_VP_ATTR_EN_ALT_3_X 0x00001000
+#define NVA3_3D_VP_ATTR_EN_ALT_3_Y 0x00002000
+#define NVA3_3D_VP_ATTR_EN_ALT_3_Z 0x00004000
+#define NVA3_3D_VP_ATTR_EN_ALT_3_W 0x00008000
+#define NVA3_3D_VP_ATTR_EN_ALT_2__MASK 0x00000f00
+#define NVA3_3D_VP_ATTR_EN_ALT_2__SHIFT 8
+#define NVA3_3D_VP_ATTR_EN_ALT_2_X 0x00000100
+#define NVA3_3D_VP_ATTR_EN_ALT_2_Y 0x00000200
+#define NVA3_3D_VP_ATTR_EN_ALT_2_Z 0x00000400
+#define NVA3_3D_VP_ATTR_EN_ALT_2_W 0x00000800
+#define NVA3_3D_VP_ATTR_EN_ALT_1__MASK 0x000000f0
+#define NVA3_3D_VP_ATTR_EN_ALT_1__SHIFT 4
+#define NVA3_3D_VP_ATTR_EN_ALT_1_X 0x00000010
+#define NVA3_3D_VP_ATTR_EN_ALT_1_Y 0x00000020
+#define NVA3_3D_VP_ATTR_EN_ALT_1_Z 0x00000040
+#define NVA3_3D_VP_ATTR_EN_ALT_1_W 0x00000080
+#define NVA3_3D_VP_ATTR_EN_ALT_0__MASK 0x0000000f
+#define NVA3_3D_VP_ATTR_EN_ALT_0__SHIFT 0
+#define NVA3_3D_VP_ATTR_EN_ALT_0_X 0x00000001
+#define NVA3_3D_VP_ATTR_EN_ALT_0_Y 0x00000002
+#define NVA3_3D_VP_ATTR_EN_ALT_0_Z 0x00000004
+#define NVA3_3D_VP_ATTR_EN_ALT_0_W 0x00000008
+
+#define NVA3_3D_UNK1140 0x00001140
+
+#define NVA0_3D_UNK1144 0x00001144
+
+#define NVA0_3D_VTX_ATTR_DEFINE 0x0000114c
+#define NVA0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff
+#define NVA0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0
+#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700
+#define NVA0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8
+#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001
+#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000
+
+#define NVA0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0))
+#define NVA0_3D_VTX_ATTR_DATA__ESIZE 0x00000004
+#define NVA0_3D_VTX_ATTR_DATA__LEN 0x00000004
+
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT(i0) (0x00001160 + 0x4*(i0))
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT__ESIZE 0x00000004
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT__LEN 0x00000020
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BUFFER__MASK 0x0000001f
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BUFFER__SHIFT 0
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_CONST 0x00000040
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_OFFSET__MASK 0x001fff80
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_OFFSET__SHIFT 7
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT__MASK 0x07e00000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT__SHIFT 21
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32_32_32 0x00200000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32_32 0x00400000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16_16_16 0x00600000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32 0x00800000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16_16 0x00a00000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8_8_8 0x01400000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16 0x01e00000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32 0x02400000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8_8 0x02600000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8 0x03000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16 0x03600000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8 0x03a00000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_10_10_10_2 0x06000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__MASK 0x38000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__SHIFT 27
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SNORM 0x08000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_UNORM 0x10000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SINT 0x18000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_UINT 0x20000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_USCALED 0x28000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SSCALED 0x30000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_FLOAT 0x38000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BGRA 0x80000000
+
+#define NV50_3D_RT_CONTROL 0x0000121c
+#define NV50_3D_RT_CONTROL_COUNT__MASK 0x0000000f
+#define NV50_3D_RT_CONTROL_COUNT__SHIFT 0
+#define NV50_3D_RT_CONTROL_MAP0__MASK 0x00000070
+#define NV50_3D_RT_CONTROL_MAP0__SHIFT 4
+#define NV50_3D_RT_CONTROL_MAP1__MASK 0x00000380
+#define NV50_3D_RT_CONTROL_MAP1__SHIFT 7
+#define NV50_3D_RT_CONTROL_MAP2__MASK 0x00001c00
+#define NV50_3D_RT_CONTROL_MAP2__SHIFT 10
+#define NV50_3D_RT_CONTROL_MAP3__MASK 0x0000e000
+#define NV50_3D_RT_CONTROL_MAP3__SHIFT 13
+#define NV50_3D_RT_CONTROL_MAP4__MASK 0x00070000
+#define NV50_3D_RT_CONTROL_MAP4__SHIFT 16
+#define NV50_3D_RT_CONTROL_MAP5__MASK 0x00380000
+#define NV50_3D_RT_CONTROL_MAP5__SHIFT 19
+#define NV50_3D_RT_CONTROL_MAP6__MASK 0x01c00000
+#define NV50_3D_RT_CONTROL_MAP6__SHIFT 22
+#define NV50_3D_RT_CONTROL_MAP7__MASK 0x0e000000
+#define NV50_3D_RT_CONTROL_MAP7__SHIFT 25
+
+#define NV50_3D_UNK1220 0x00001220
+
+#define NV50_3D_RT_ARRAY_MODE 0x00001224
+#define NV50_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NV50_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0
+#define NV50_3D_RT_ARRAY_MODE_MODE__MASK 0x00010000
+#define NV50_3D_RT_ARRAY_MODE_MODE__SHIFT 16
+#define NV50_3D_RT_ARRAY_MODE_MODE_2D_ARRAY 0x00000000
+#define NV50_3D_RT_ARRAY_MODE_MODE_3D 0x00010000
+
+#define NV50_3D_ZETA_HORIZ 0x00001228
+
+#define NV50_3D_ZETA_VERT 0x0000122c
+
+#define NV50_3D_ZETA_ARRAY_MODE 0x00001230
+#define NV50_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NV50_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0
+#define NV50_3D_ZETA_ARRAY_MODE_UNK 0x00010000
+
+#define NV50_3D_LINKED_TSC 0x00001234
+
+#define NV50_3D_UNK1238 0x00001238
+
+#define NVA0_3D_DRAW_TFB_BYTES 0x0000123c
+
+#define NV50_3D_RT_HORIZ(i0) (0x00001240 + 0x8*(i0))
+#define NV50_3D_RT_HORIZ__ESIZE 0x00000008
+#define NV50_3D_RT_HORIZ__LEN 0x00000008
+#define NV50_3D_RT_HORIZ_WIDTH__MASK 0x0fffffff
+#define NV50_3D_RT_HORIZ_WIDTH__SHIFT 0
+#define NV50_3D_RT_HORIZ_LINEAR 0x80000000
+
+#define NV50_3D_RT_VERT(i0) (0x00001244 + 0x8*(i0))
+#define NV50_3D_RT_VERT__ESIZE 0x00000008
+#define NV50_3D_RT_VERT__LEN 0x00000008
+
+#define NV50_3D_CB_DEF_ADDRESS_HIGH 0x00001280
+
+#define NV50_3D_CB_DEF_ADDRESS_LOW 0x00001284
+
+#define NV50_3D_CB_DEF_SET 0x00001288
+#define NV50_3D_CB_DEF_SET_SIZE__MASK 0x0000ffff
+#define NV50_3D_CB_DEF_SET_SIZE__SHIFT 0
+#define NV50_3D_CB_DEF_SET_BUFFER__MASK 0x007f0000
+#define NV50_3D_CB_DEF_SET_BUFFER__SHIFT 16
+
+#define NV50_3D_UNK128C 0x0000128c
+#define NV50_3D_UNK128C_0__MASK 0x00000003
+#define NV50_3D_UNK128C_0__SHIFT 0
+#define NV50_3D_UNK128C_1__MASK 0x00000030
+#define NV50_3D_UNK128C_1__SHIFT 4
+#define NV50_3D_UNK128C_2__MASK 0x00000300
+#define NV50_3D_UNK128C_2__SHIFT 8
+#define NV50_3D_UNK128C_3__MASK 0x00003000
+#define NV50_3D_UNK128C_3__SHIFT 12
+
+#define NV50_3D_CALL_LIMIT_LOG 0x00001290
+#define NV50_3D_CALL_LIMIT_LOG_VP__MASK 0x0000000f
+#define NV50_3D_CALL_LIMIT_LOG_VP__SHIFT 0
+#define NV50_3D_CALL_LIMIT_LOG_GP__MASK 0x000000f0
+#define NV50_3D_CALL_LIMIT_LOG_GP__SHIFT 4
+#define NV50_3D_CALL_LIMIT_LOG_FP__MASK 0x00000f00
+#define NV50_3D_CALL_LIMIT_LOG_FP__SHIFT 8
+
+#define NV50_3D_STRMOUT_BUFFERS_CTRL 0x00001294
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED 0x00000001
+#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE__MASK 0x00000002
+#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE__SHIFT 1
+#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_PRIMITIVES 0x00000000
+#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET 0x00000002
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__MASK 0x000000f0
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT 4
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MASK 0x000fff00
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT 8
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX 0x00000800
+
+#define NV50_3D_FP_RESULT_COUNT 0x00001298
+
+#define NV50_3D_VTX_UNK129C 0x0000129c
+
+#define NV50_3D_UNK12A0 0x000012a0
+
+#define NV50_3D_UNK12A8 0x000012a8
+#define NV50_3D_UNK12A8_UNK1 0x00000001
+#define NV50_3D_UNK12A8_UNK2__MASK 0x000ffff0
+#define NV50_3D_UNK12A8_UNK2__SHIFT 4
+
+#define NV50_3D_UNK12AC 0x000012ac
+
+#define NV50_3D_UNK12B0 0x000012b0
+#define NV50_3D_UNK12B0_UNK0__MASK 0x000000ff
+#define NV50_3D_UNK12B0_UNK0__SHIFT 0
+#define NV50_3D_UNK12B0_UNK1__MASK 0x0000ff00
+#define NV50_3D_UNK12B0_UNK1__SHIFT 8
+#define NV50_3D_UNK12B0_UNK2__MASK 0x00ff0000
+#define NV50_3D_UNK12B0_UNK2__SHIFT 16
+#define NV50_3D_UNK12B0_UNK3__MASK 0xff000000
+#define NV50_3D_UNK12B0_UNK3__SHIFT 24
+#define NV50_3D_UNK12B0_UNK3__MAX 0x00000080
+
+#define NV50_3D_UNK12B4 0x000012b4
+
+#define NV50_3D_UNK12B8 0x000012b8
+
+#define NV50_3D_DEPTH_TEST_ENABLE 0x000012cc
+
+#define NV50_3D_D3D_FILL_MODE 0x000012d0
+#define NV50_3D_D3D_FILL_MODE_POINT 0x00000001
+#define NV50_3D_D3D_FILL_MODE_WIREFRAME 0x00000002
+#define NV50_3D_D3D_FILL_MODE_SOLID 0x00000003
+
+#define NV50_3D_SHADE_MODEL 0x000012d4
+#define NV50_3D_SHADE_MODEL_FLAT 0x00001d00
+#define NV50_3D_SHADE_MODEL_SMOOTH 0x00001d01
+
+#define NV50_3D_LOCAL_ADDRESS_HIGH 0x000012d8
+
+#define NV50_3D_LOCAL_ADDRESS_LOW 0x000012dc
+
+#define NV50_3D_LOCAL_SIZE_LOG 0x000012e0
+
+#define NV50_3D_BLEND_INDEPENDENT 0x000012e4
+
+#define NV50_3D_DEPTH_WRITE_ENABLE 0x000012e8
+
+#define NV50_3D_ALPHA_TEST_ENABLE 0x000012ec
+
+#define NV50_3D_PM_SET(i0) (0x000012f0 + 0x4*(i0))
+#define NV50_3D_PM_SET__ESIZE 0x00000004
+#define NV50_3D_PM_SET__LEN 0x00000004
+
+#define NV50_3D_VB_ELEMENT_U8_SETUP 0x00001300
+#define NV50_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000
+#define NV50_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30
+#define NV50_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff
+#define NV50_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0
+
+#define NV50_3D_VB_ELEMENT_U8 0x00001304
+#define NV50_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff
+#define NV50_3D_VB_ELEMENT_U8_I0__SHIFT 0
+#define NV50_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00
+#define NV50_3D_VB_ELEMENT_U8_I1__SHIFT 8
+#define NV50_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000
+#define NV50_3D_VB_ELEMENT_U8_I2__SHIFT 16
+#define NV50_3D_VB_ELEMENT_U8_I3__MASK 0xff000000
+#define NV50_3D_VB_ELEMENT_U8_I3__SHIFT 24
+
+#define NV50_3D_D3D_CULL_MODE 0x00001308
+#define NV50_3D_D3D_CULL_MODE_NONE 0x00000001
+#define NV50_3D_D3D_CULL_MODE_FRONT 0x00000002
+#define NV50_3D_D3D_CULL_MODE_BACK 0x00000003
+
+#define NV50_3D_DEPTH_TEST_FUNC 0x0000130c
+#define NV50_3D_DEPTH_TEST_FUNC_NEVER 0x00000200
+#define NV50_3D_DEPTH_TEST_FUNC_LESS 0x00000201
+#define NV50_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202
+#define NV50_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203
+#define NV50_3D_DEPTH_TEST_FUNC_GREATER 0x00000204
+#define NV50_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205
+#define NV50_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206
+#define NV50_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207
+
+#define NV50_3D_ALPHA_TEST_REF 0x00001310
+
+#define NV50_3D_ALPHA_TEST_FUNC 0x00001314
+#define NV50_3D_ALPHA_TEST_FUNC_NEVER 0x00000200
+#define NV50_3D_ALPHA_TEST_FUNC_LESS 0x00000201
+#define NV50_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202
+#define NV50_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203
+#define NV50_3D_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NV50_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
+#define NV50_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206
+#define NV50_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207
+
+#define NVA0_3D_DRAW_TFB_STRIDE 0x00001318
+#define NVA0_3D_DRAW_TFB_STRIDE__MIN 0x00000001
+#define NVA0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff
+
+#define NV50_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0))
+#define NV50_3D_BLEND_COLOR__ESIZE 0x00000004
+#define NV50_3D_BLEND_COLOR__LEN 0x00000004
+
+#define NV50_3D_UNK132C 0x0000132c
+
+#define NV50_3D_TSC_FLUSH 0x00001330
+#define NV50_3D_TSC_FLUSH_SPECIFIC 0x00000001
+#define NV50_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_3D_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_3D_TIC_FLUSH 0x00001334
+#define NV50_3D_TIC_FLUSH_SPECIFIC 0x00000001
+#define NV50_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_3D_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_3D_TEX_CACHE_CTL 0x00001338
+#define NV50_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030
+#define NV50_3D_TEX_CACHE_CTL_UNK1__SHIFT 4
+
+#define NV50_3D_BLEND_SEPARATE_ALPHA 0x0000133c
+
+#define NV50_3D_BLEND_EQUATION_RGB 0x00001340
+#define NV50_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NV50_3D_BLEND_EQUATION_RGB_MIN 0x00008007
+#define NV50_3D_BLEND_EQUATION_RGB_MAX 0x00008008
+#define NV50_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NV50_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NV50_3D_BLEND_FUNC_SRC_RGB 0x00001344
+
+#define NV50_3D_BLEND_FUNC_DST_RGB 0x00001348
+
+#define NV50_3D_BLEND_EQUATION_ALPHA 0x0000134c
+#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NV50_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NV50_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NV50_3D_BLEND_FUNC_SRC_ALPHA 0x00001350
+
+#define NV50_3D_UNK1354 0x00001354
+
+#define NV50_3D_BLEND_FUNC_DST_ALPHA 0x00001358
+
+#define NV50_3D_BLEND_ENABLE_COMMON 0x0000135c
+
+#define NV50_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0))
+#define NV50_3D_BLEND_ENABLE__ESIZE 0x00000004
+#define NV50_3D_BLEND_ENABLE__LEN 0x00000008
+
+#define NV50_3D_STENCIL_ENABLE 0x00001380
+
+#define NV50_3D_STENCIL_FRONT_OP_FAIL 0x00001384
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NV50_3D_STENCIL_FRONT_FUNC_REF 0x00001394
+
+#define NV50_3D_STENCIL_FRONT_MASK 0x00001398
+
+#define NV50_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c
+
+#define NV50_3D_UNK13A0 0x000013a0
+
+#define NVA0_3D_DRAW_TFB_BASE 0x000013a4
+
+#define NV50_3D_FRAG_COLOR_CLAMP_EN 0x000013a8
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000
+
+#define NV50_3D_SCREEN_Y_CONTROL 0x000013ac
+#define NV50_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001
+#define NV50_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010
+
+#define NV50_3D_LINE_WIDTH 0x000013b0
+
+#define NV50_3D_TEX_LIMITS(i0) (0x000013b4 + 0x4*(i0))
+#define NV50_3D_TEX_LIMITS__ESIZE 0x00000004
+#define NV50_3D_TEX_LIMITS__LEN 0x00000003
+#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f
+#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0
+#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000
+#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004
+#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0
+#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4
+#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000
+#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007
+
+#define NV50_3D_POINT_COORD_REPLACE_MAP(i0) (0x000013c0 + 0x4*(i0))
+#define NV50_3D_POINT_COORD_REPLACE_MAP__ESIZE 0x00000004
+#define NV50_3D_POINT_COORD_REPLACE_MAP__LEN 0x00000010
+
+#define NV50_3D_UNK1400_LANES 0x00001400
+
+#define NV50_3D_UNK1404 0x00001404
+
+#define NV50_3D_UNK1408 0x00001408
+
+#define NV50_3D_VP_START_ID 0x0000140c
+
+#define NV50_3D_GP_START_ID 0x00001410
+
+#define NV50_3D_FP_START_ID 0x00001414
+
+#define NVA3_3D_UNK1418 0x00001418
+
+#define NV50_3D_UNK141C 0x0000141c
+
+#define NV50_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420
+#define NV50_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001
+#define NV50_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400
+
+#define NV50_3D_VERTEX_ARRAY_FLUSH 0x0000142c
+
+#define NV50_3D_UNK1430 0x00001430
+#define NV50_3D_UNK1430_UNK0 0x00000010
+#define NV50_3D_UNK1430_UNK1 0x00000100
+
+#define NV50_3D_VB_ELEMENT_BASE 0x00001434
+
+#define NV50_3D_VB_INSTANCE_BASE 0x00001438
+
+#define NV50_3D_CLEAR_FLAGS 0x0000143c
+#define NV50_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001
+#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT__MASK 0x00000010
+#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT__SHIFT 4
+#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT_SCISSOR 0x00000000
+#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT 0x00000010
+
+#define NV50_3D_CODE_CB_FLUSH 0x00001440
+
+#define NV50_3D_BIND_TSC(i0) (0x00001444 + 0x8*(i0))
+#define NV50_3D_BIND_TSC__ESIZE 0x00000008
+#define NV50_3D_BIND_TSC__LEN 0x00000003
+#define NV50_3D_BIND_TSC_VALID 0x00000001
+#define NV50_3D_BIND_TSC_SAMPLER__MASK 0x000000f0
+#define NV50_3D_BIND_TSC_SAMPLER__SHIFT 4
+#define NV50_3D_BIND_TSC_TSC__MASK 0x001ff000
+#define NV50_3D_BIND_TSC_TSC__SHIFT 12
+
+#define NV50_3D_BIND_TIC(i0) (0x00001448 + 0x8*(i0))
+#define NV50_3D_BIND_TIC__ESIZE 0x00000008
+#define NV50_3D_BIND_TIC__LEN 0x00000003
+#define NV50_3D_BIND_TIC_VALID 0x00000001
+#define NV50_3D_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NV50_3D_BIND_TIC_TEXTURE__SHIFT 1
+#define NV50_3D_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NV50_3D_BIND_TIC_TIC__SHIFT 9
+
+#define NV50_3D_BIND_TSC2(i0) (0x00001468 + 0x8*(i0))
+#define NV50_3D_BIND_TSC2__ESIZE 0x00000008
+#define NV50_3D_BIND_TSC2__LEN 0x00000003
+#define NV50_3D_BIND_TSC2_VALID 0x00000001
+#define NV50_3D_BIND_TSC2_SAMPLER__MASK 0x00000010
+#define NV50_3D_BIND_TSC2_SAMPLER__SHIFT 4
+#define NV50_3D_BIND_TSC2_TSC__MASK 0x001ff000
+#define NV50_3D_BIND_TSC2_TSC__SHIFT 12
+
+#define NV50_3D_BIND_TIC2(i0) (0x0000146c + 0x8*(i0))
+#define NV50_3D_BIND_TIC2__ESIZE 0x00000008
+#define NV50_3D_BIND_TIC2__LEN 0x00000003
+#define NV50_3D_BIND_TIC2_VALID 0x00000001
+#define NV50_3D_BIND_TIC2_TEXTURE__MASK 0x00000002
+#define NV50_3D_BIND_TIC2_TEXTURE__SHIFT 1
+#define NV50_3D_BIND_TIC2_TIC__MASK 0x7ffffe00
+#define NV50_3D_BIND_TIC2_TIC__SHIFT 9
+
+#define NV50_3D_STRMOUT_MAP(i0) (0x00001480 + 0x4*(i0))
+#define NV50_3D_STRMOUT_MAP__ESIZE 0x00000004
+#define NV50_3D_STRMOUT_MAP__LEN 0x00000020
+
+#define NV50_3D_CLIPID_HEIGHT 0x00001504
+#define NV50_3D_CLIPID_HEIGHT__MAX 0x00002000
+
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ 0x00001508
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16
+
+#define NV50_3D_CLIPID_FILL_RECT_VERT 0x0000150c
+#define NV50_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff
+#define NV50_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0
+#define NV50_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000
+#define NV50_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16
+
+#define NV50_3D_CLIP_DISTANCE_ENABLE 0x00001510
+#define NV50_3D_CLIP_DISTANCE_ENABLE_0 0x00000001
+#define NV50_3D_CLIP_DISTANCE_ENABLE_1 0x00000002
+#define NV50_3D_CLIP_DISTANCE_ENABLE_2 0x00000004
+#define NV50_3D_CLIP_DISTANCE_ENABLE_3 0x00000008
+#define NV50_3D_CLIP_DISTANCE_ENABLE_4 0x00000010
+#define NV50_3D_CLIP_DISTANCE_ENABLE_5 0x00000020
+#define NV50_3D_CLIP_DISTANCE_ENABLE_6 0x00000040
+#define NV50_3D_CLIP_DISTANCE_ENABLE_7 0x00000080
+
+#define NV50_3D_SAMPLECNT_ENABLE 0x00001514
+
+#define NV50_3D_POINT_SIZE 0x00001518
+
+#define NV50_3D_ZCULL_STATCTRS_ENABLE 0x0000151c
+
+#define NV50_3D_POINT_SPRITE_ENABLE 0x00001520
+
+#define NVA0_3D_UNK152C 0x0000152c
+#define NVA0_3D_UNK152C_UNK0 0x00000001
+#define NVA0_3D_UNK152C_UNK1 0x00000010
+#define NVA0_3D_UNK152C_UNK2 0x00000100
+#define NVA0_3D_UNK152C_UNK3__MASK 0x000ff000
+#define NVA0_3D_UNK152C_UNK3__SHIFT 12
+#define NVA0_3D_UNK152C_UNK3__MAX 0x00000028
+
+#define NV50_3D_COUNTER_RESET 0x00001530
+#define NV50_3D_COUNTER_RESET_SAMPLECNT 0x00000001
+#define NV50_3D_COUNTER_RESET_ZCULL_STATS 0x00000002
+#define NVA0_3D_COUNTER_RESET_STRMOUT_VERTICES 0x00000008
+#define NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK 0x00000010
+#define NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x00000011
+#define NV50_3D_COUNTER_RESET_VFETCH_VERTICES 0x00000012
+#define NV50_3D_COUNTER_RESET_VFETCH_PRIMITIVES 0x00000013
+#define NV50_3D_COUNTER_RESET_VP_LAUNCHES 0x00000015
+#define NV50_3D_COUNTER_RESET_GP_LAUNCHES 0x0000001a
+#define NV50_3D_COUNTER_RESET_GP_PRIMITIVES_OUT 0x0000001b
+#define NV50_3D_COUNTER_RESET_RAST_PRIMITIVES_PRECLIP 0x0000001c
+#define NV50_3D_COUNTER_RESET_RAST_PRIMITIVES_POSTCLIP 0x0000001d
+#define NV50_3D_COUNTER_RESET_FP_PIXELS 0x0000001e
+
+#define NV50_3D_MULTISAMPLE_ENABLE 0x00001534
+
+#define NV50_3D_ZETA_ENABLE 0x00001538
+
+#define NV50_3D_MULTISAMPLE_CTRL 0x0000153c
+#define NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001
+#define NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010
+
+#define NV50_3D_NOPERSPECTIVE_BITMAP(i0) (0x00001540 + 0x4*(i0))
+#define NV50_3D_NOPERSPECTIVE_BITMAP__ESIZE 0x00000004
+#define NV50_3D_NOPERSPECTIVE_BITMAP__LEN 0x00000004
+
+#define NV50_3D_COND_ADDRESS_HIGH 0x00001550
+
+#define NV50_3D_COND_ADDRESS_LOW 0x00001554
+
+#define NV50_3D_COND_MODE 0x00001558
+#define NV50_3D_COND_MODE_NEVER 0x00000000
+#define NV50_3D_COND_MODE_ALWAYS 0x00000001
+#define NV50_3D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NV50_3D_COND_MODE_EQUAL 0x00000003
+#define NV50_3D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NV50_3D_TSC_ADDRESS_HIGH 0x0000155c
+
+#define NV50_3D_TSC_ADDRESS_LOW 0x00001560
+#define NV50_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020
+
+#define NV50_3D_TSC_LIMIT 0x00001564
+#define NV50_3D_TSC_LIMIT__MAX 0x00001fff
+
+#define NV50_3D_UNK1568 0x00001568
+
+#define NV50_3D_POLYGON_OFFSET_FACTOR 0x0000156c
+
+#define NV50_3D_LINE_SMOOTH_ENABLE 0x00001570
+
+#define NV50_3D_TIC_ADDRESS_HIGH 0x00001574
+
+#define NV50_3D_TIC_ADDRESS_LOW 0x00001578
+
+#define NV50_3D_TIC_LIMIT 0x0000157c
+
+#define NV50_3D_PM_CONTROL(i0) (0x00001580 + 0x4*(i0))
+#define NV50_3D_PM_CONTROL__ESIZE 0x00000004
+#define NV50_3D_PM_CONTROL__LEN 0x00000004
+#define NV50_3D_PM_CONTROL_UNK0 0x00000001
+#define NV50_3D_PM_CONTROL_UNK1__MASK 0x00000070
+#define NV50_3D_PM_CONTROL_UNK1__SHIFT 4
+#define NV50_3D_PM_CONTROL_UNK2__MASK 0x00ffff00
+#define NV50_3D_PM_CONTROL_UNK2__SHIFT 8
+#define NV50_3D_PM_CONTROL_UNK3__MASK 0xff000000
+#define NV50_3D_PM_CONTROL_UNK3__SHIFT 24
+
+#define NV50_3D_ZCULL_REGION 0x00001590
+
+#define NV50_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594
+
+#define NV50_3D_STENCIL_BACK_OP_FAIL 0x00001598
+#define NV50_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NV50_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NV50_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NV50_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NV50_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NV50_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_BACK_OP_ZPASS 0x000015a0
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NV50_3D_UNK15A8 0x000015a8
+#define NV50_3D_UNK15A8_UNK1__MASK 0x00000007
+#define NV50_3D_UNK15A8_UNK1__SHIFT 0
+#define NV50_3D_UNK15A8_UNK2__MASK 0x00000070
+#define NV50_3D_UNK15A8_UNK2__SHIFT 4
+
+#define NV50_3D_UNK15AC 0x000015ac
+
+#define NV50_3D_UNK15B0 0x000015b0
+#define NV50_3D_UNK15B0_0 0x00000001
+#define NV50_3D_UNK15B0_1 0x00000010
+#define NV50_3D_UNK15B0_2 0x00000100
+
+#define NV50_3D_CSAA_ENABLE 0x000015b4
+
+#define NV50_3D_FRAMEBUFFER_SRGB 0x000015b8
+
+#define NV50_3D_POLYGON_OFFSET_UNITS 0x000015bc
+
+#define NVA3_3D_UNK15C4 0x000015c4
+
+#define NVA3_3D_UNK15C8 0x000015c8
+
+#define NV50_3D_LAYER 0x000015cc
+#define NV50_3D_LAYER_IDX__MASK 0x0000ffff
+#define NV50_3D_LAYER_IDX__SHIFT 0
+#define NV50_3D_LAYER_USE_GP 0x00010000
+
+#define NV50_3D_MULTISAMPLE_MODE 0x000015d0
+#define NV50_3D_MULTISAMPLE_MODE_MS1 0x00000000
+#define NV50_3D_MULTISAMPLE_MODE_MS2 0x00000001
+#define NV50_3D_MULTISAMPLE_MODE_MS4 0x00000002
+#define NV50_3D_MULTISAMPLE_MODE_MS8 0x00000003
+#define NV50_3D_MULTISAMPLE_MODE_MS8_ALT 0x00000004
+#define NV50_3D_MULTISAMPLE_MODE_MS2_ALT 0x00000005
+#define NV50_3D_MULTISAMPLE_MODE_UNK6 0x00000006
+#define NV50_3D_MULTISAMPLE_MODE_MS4_CS4 0x00000008
+#define NV50_3D_MULTISAMPLE_MODE_MS4_CS12 0x00000009
+#define NV50_3D_MULTISAMPLE_MODE_MS8_CS8 0x0000000a
+#define NV50_3D_MULTISAMPLE_MODE_MS8_CS24 0x0000000b
+
+#define NV50_3D_VERTEX_BEGIN_D3D 0x000015d4
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NV50_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000
+#define NV84_3D_VERTEX_BEGIN_D3D_PRIMITIVE_ID_CONT 0x20000000
+#define NVA0_3D_VERTEX_BEGIN_D3D_INSTANCE_CONT 0x40000000
+
+#define NV50_3D_VERTEX_END_D3D 0x000015d8
+#define NV50_3D_VERTEX_END_D3D_UNK0 0x00000001
+#define NVA0_3D_VERTEX_END_D3D_UNK1 0x00000002
+
+#define NV50_3D_VERTEX_BEGIN_GL 0x000015dc
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x10000000
+#define NV84_3D_VERTEX_BEGIN_GL_PRIMITIVE_ID_CONT 0x20000000
+#define NVA0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x40000000
+
+#define NV50_3D_VERTEX_END_GL 0x000015e0
+#define NV50_3D_VERTEX_END_GL_UNK0 0x00000001
+#define NVA0_3D_VERTEX_END_GL_UNK1 0x00000002
+
+#define NV50_3D_EDGEFLAG 0x000015e4
+
+#define NV50_3D_VB_ELEMENT_U32 0x000015e8
+
+#define NV50_3D_VB_ELEMENT_U16_SETUP 0x000015ec
+#define NV50_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000
+#define NV50_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30
+#define NV50_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff
+#define NV50_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0
+
+#define NV50_3D_VB_ELEMENT_U16 0x000015f0
+#define NV50_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff
+#define NV50_3D_VB_ELEMENT_U16_I0__SHIFT 0
+#define NV50_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000
+#define NV50_3D_VB_ELEMENT_U16_I1__SHIFT 16
+
+#define NV50_3D_VERTEX_BASE_HIGH 0x000015f4
+
+#define NV50_3D_VERTEX_BASE_LOW 0x000015f8
+
+#define NV50_3D_VERTEX_DATA 0x00001640
+
+#define NV50_3D_PRIM_RESTART_ENABLE 0x00001644
+
+#define NV50_3D_PRIM_RESTART_INDEX 0x00001648
+
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000
+
+#define NV50_3D_VP_ATTR_EN(i0) (0x00001650 + 0x4*(i0))
+#define NV50_3D_VP_ATTR_EN__ESIZE 0x00000004
+#define NV50_3D_VP_ATTR_EN__LEN 0x00000002
+#define NV50_3D_VP_ATTR_EN_7__MASK 0xf0000000
+#define NV50_3D_VP_ATTR_EN_7__SHIFT 28
+#define NV50_3D_VP_ATTR_EN_7_X 0x10000000
+#define NV50_3D_VP_ATTR_EN_7_Y 0x20000000
+#define NV50_3D_VP_ATTR_EN_7_Z 0x40000000
+#define NV50_3D_VP_ATTR_EN_7_W 0x80000000
+#define NV50_3D_VP_ATTR_EN_6__MASK 0x0f000000
+#define NV50_3D_VP_ATTR_EN_6__SHIFT 24
+#define NV50_3D_VP_ATTR_EN_6_X 0x01000000
+#define NV50_3D_VP_ATTR_EN_6_Y 0x02000000
+#define NV50_3D_VP_ATTR_EN_6_Z 0x04000000
+#define NV50_3D_VP_ATTR_EN_6_W 0x08000000
+#define NV50_3D_VP_ATTR_EN_5__MASK 0x00f00000
+#define NV50_3D_VP_ATTR_EN_5__SHIFT 20
+#define NV50_3D_VP_ATTR_EN_5_X 0x00100000
+#define NV50_3D_VP_ATTR_EN_5_Y 0x00200000
+#define NV50_3D_VP_ATTR_EN_5_Z 0x00400000
+#define NV50_3D_VP_ATTR_EN_5_W 0x00800000
+#define NV50_3D_VP_ATTR_EN_4__MASK 0x000f0000
+#define NV50_3D_VP_ATTR_EN_4__SHIFT 16
+#define NV50_3D_VP_ATTR_EN_4_X 0x00010000
+#define NV50_3D_VP_ATTR_EN_4_Y 0x00020000
+#define NV50_3D_VP_ATTR_EN_4_Z 0x00040000
+#define NV50_3D_VP_ATTR_EN_4_W 0x00080000
+#define NV50_3D_VP_ATTR_EN_3__MASK 0x0000f000
+#define NV50_3D_VP_ATTR_EN_3__SHIFT 12
+#define NV50_3D_VP_ATTR_EN_3_X 0x00001000
+#define NV50_3D_VP_ATTR_EN_3_Y 0x00002000
+#define NV50_3D_VP_ATTR_EN_3_Z 0x00004000
+#define NV50_3D_VP_ATTR_EN_3_W 0x00008000
+#define NV50_3D_VP_ATTR_EN_2__MASK 0x00000f00
+#define NV50_3D_VP_ATTR_EN_2__SHIFT 8
+#define NV50_3D_VP_ATTR_EN_2_X 0x00000100
+#define NV50_3D_VP_ATTR_EN_2_Y 0x00000200
+#define NV50_3D_VP_ATTR_EN_2_Z 0x00000400
+#define NV50_3D_VP_ATTR_EN_2_W 0x00000800
+#define NV50_3D_VP_ATTR_EN_1__MASK 0x000000f0
+#define NV50_3D_VP_ATTR_EN_1__SHIFT 4
+#define NV50_3D_VP_ATTR_EN_1_X 0x00000010
+#define NV50_3D_VP_ATTR_EN_1_Y 0x00000020
+#define NV50_3D_VP_ATTR_EN_1_Z 0x00000040
+#define NV50_3D_VP_ATTR_EN_1_W 0x00000080
+#define NV50_3D_VP_ATTR_EN_0__MASK 0x0000000f
+#define NV50_3D_VP_ATTR_EN_0__SHIFT 0
+#define NV50_3D_VP_ATTR_EN_0_X 0x00000001
+#define NV50_3D_VP_ATTR_EN_0_Y 0x00000002
+#define NV50_3D_VP_ATTR_EN_0_Z 0x00000004
+#define NV50_3D_VP_ATTR_EN_0_W 0x00000008
+
+#define NV50_3D_POINT_SMOOTH_ENABLE 0x00001658
+
+#define NV50_3D_POINT_RASTER_RULES 0x0000165c
+#define NV50_3D_POINT_RASTER_RULES_OGL 0x00000000
+#define NV50_3D_POINT_RASTER_RULES_D3D 0x00000001
+
+#define NV50_3D_POINT_SPRITE_CTRL 0x00001660
+#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN__MASK 0x00000010
+#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN__SHIFT 4
+#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN_LOWER_LEFT 0x00000000
+#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN_UPPER_LEFT 0x00000010
+
+#define NVA0_3D_TEX_MISC 0x00001664
+#define NVA0_3D_TEX_MISC_UNK1 0x00000002
+#define NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004
+
+#define NV50_3D_LINE_SMOOTH_BLUR 0x00001668
+#define NV50_3D_LINE_SMOOTH_BLUR_LOW 0x00000000
+#define NV50_3D_LINE_SMOOTH_BLUR_MEDIUM 0x00000001
+#define NV50_3D_LINE_SMOOTH_BLUR_HIGH 0x00000002
+
+#define NV50_3D_LINE_STIPPLE_ENABLE 0x0000166c
+
+#define NV50_3D_COVERAGE_LUT(i0) (0x00001670 + 0x4*(i0))
+#define NV50_3D_COVERAGE_LUT__ESIZE 0x00000004
+#define NV50_3D_COVERAGE_LUT__LEN 0x00000004
+#define NV50_3D_COVERAGE_LUT_0__MASK 0x000000ff
+#define NV50_3D_COVERAGE_LUT_0__SHIFT 0
+#define NV50_3D_COVERAGE_LUT_1__MASK 0x0000ff00
+#define NV50_3D_COVERAGE_LUT_1__SHIFT 8
+#define NV50_3D_COVERAGE_LUT_2__MASK 0x00ff0000
+#define NV50_3D_COVERAGE_LUT_2__SHIFT 16
+#define NV50_3D_COVERAGE_LUT_3__MASK 0xff000000
+#define NV50_3D_COVERAGE_LUT_3__SHIFT 24
+
+#define NV50_3D_LINE_STIPPLE 0x00001680
+#define NV50_3D_LINE_STIPPLE_FACTOR_M1__MASK 0x000000ff
+#define NV50_3D_LINE_STIPPLE_FACTOR_M1__SHIFT 0
+#define NV50_3D_LINE_STIPPLE_PATTERN__MASK 0x00ffff00
+#define NV50_3D_LINE_STIPPLE_PATTERN__SHIFT 8
+
+#define NV50_3D_PROVOKING_VERTEX_LAST 0x00001684
+
+#define NV50_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688
+
+#define NV50_3D_POLYGON_STIPPLE_ENABLE 0x0000168c
+
+#define NV50_3D_UNK1690 0x00001690
+#define NV50_3D_UNK1690_ALWAYS_DERIV 0x00000001
+#define NV50_3D_UNK1690_UNK16 0x00010000
+
+#define NV50_3D_SET_PROGRAM_CB 0x00001694
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM__MASK 0x000000f0
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM__SHIFT 4
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX 0x00000000
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY 0x00000020
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT 0x00000030
+#define NV50_3D_SET_PROGRAM_CB_INDEX__MASK 0x00000f00
+#define NV50_3D_SET_PROGRAM_CB_INDEX__SHIFT 8
+#define NV50_3D_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000
+#define NV50_3D_SET_PROGRAM_CB_BUFFER__SHIFT 12
+#define NV50_3D_SET_PROGRAM_CB_VALID 0x00000001
+
+#define NV50_3D_UNK1698 0x00001698
+#define NV50_3D_UNK1698_0 0x00000001
+#define NV50_3D_UNK1698_1 0x00000010
+#define NV50_3D_UNK1698_2 0x00000100
+
+#define NVA3_3D_SAMPLE_SHADING 0x0000169c
+#define NVA3_3D_SAMPLE_SHADING_MIN_SAMPLES__MASK 0x0000000f
+#define NVA3_3D_SAMPLE_SHADING_MIN_SAMPLES__SHIFT 0
+#define NVA3_3D_SAMPLE_SHADING_ENABLE 0x00000010
+
+#define NVA3_3D_UNK16A0 0x000016a0
+
+#define NV50_3D_VP_RESULT_MAP_SIZE 0x000016ac
+
+#define NV50_3D_VP_REG_ALLOC_TEMP 0x000016b0
+
+#define NVA0_3D_UNK16B4 0x000016b4
+#define NVA0_3D_UNK16B4_UNK0 0x00000001
+#define NVA3_3D_UNK16B4_UNK1 0x00000002
+
+#define NV50_3D_VP_REG_ALLOC_RESULT 0x000016b8
+
+#define NV50_3D_VP_RESULT_MAP(i0) (0x000016bc + 0x4*(i0))
+#define NV50_3D_VP_RESULT_MAP__ESIZE 0x00000004
+#define NV50_3D_VP_RESULT_MAP__LEN 0x00000011
+#define NV50_3D_VP_RESULT_MAP_0__MASK 0x000000ff
+#define NV50_3D_VP_RESULT_MAP_0__SHIFT 0
+#define NV50_3D_VP_RESULT_MAP_1__MASK 0x0000ff00
+#define NV50_3D_VP_RESULT_MAP_1__SHIFT 8
+#define NV50_3D_VP_RESULT_MAP_2__MASK 0x00ff0000
+#define NV50_3D_VP_RESULT_MAP_2__SHIFT 16
+#define NV50_3D_VP_RESULT_MAP_3__MASK 0xff000000
+#define NV50_3D_VP_RESULT_MAP_3__SHIFT 24
+
+#define NV50_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0))
+#define NV50_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004
+#define NV50_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020
+
+#define NVA0_3D_STRMOUT_OFFSET(i0) (0x00001780 + 0x4*(i0))
+#define NVA0_3D_STRMOUT_OFFSET__ESIZE 0x00000004
+#define NVA0_3D_STRMOUT_OFFSET__LEN 0x00000004
+
+#define NV50_3D_GP_ENABLE 0x00001798
+
+#define NV50_3D_GP_REG_ALLOC_TEMP 0x000017a0
+
+#define NV50_3D_GP_REG_ALLOC_RESULT 0x000017a8
+
+#define NV50_3D_GP_RESULT_MAP_SIZE 0x000017ac
+
+#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE 0x000017b0
+#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS 0x00000001
+#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP 0x00000002
+#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP 0x00000003
+
+#define NV50_3D_RASTERIZE_ENABLE 0x000017b4
+
+#define NV50_3D_STRMOUT_ENABLE 0x000017b8
+
+#define NV50_3D_GP_RESULT_MAP(i0) (0x000017fc + 0x4*(i0))
+#define NV50_3D_GP_RESULT_MAP__ESIZE 0x00000004
+#define NV50_3D_GP_RESULT_MAP__LEN 0x00000021
+#define NV50_3D_GP_RESULT_MAP_0__MASK 0x000000ff
+#define NV50_3D_GP_RESULT_MAP_0__SHIFT 0
+#define NV50_3D_GP_RESULT_MAP_1__MASK 0x0000ff00
+#define NV50_3D_GP_RESULT_MAP_1__SHIFT 8
+#define NV50_3D_GP_RESULT_MAP_2__MASK 0x00ff0000
+#define NV50_3D_GP_RESULT_MAP_2__SHIFT 16
+#define NV50_3D_GP_RESULT_MAP_3__MASK 0xff000000
+#define NV50_3D_GP_RESULT_MAP_3__SHIFT 24
+
+#define NV50_3D_POLYGON_OFFSET_CLAMP 0x0000187c
+
+#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT(i0) (0x00001880 + 0x4*(i0))
+#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT__ESIZE 0x00000004
+#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT__LEN 0x00000020
+
+#define NV50_3D_GP_VIEWPORT_ID_ENABLE 0x00001900
+
+#define NV50_3D_SEMANTIC_COLOR 0x00001904
+#define NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK 0x000000ff
+#define NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT 0
+#define NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK 0x0000ff00
+#define NV50_3D_SEMANTIC_COLOR_BFC0_ID__SHIFT 8
+#define NV50_3D_SEMANTIC_COLOR_COLR_NR__MASK 0x00ff0000
+#define NV50_3D_SEMANTIC_COLOR_COLR_NR__SHIFT 16
+#define NV50_3D_SEMANTIC_COLOR_CLMP_EN 0x01000000
+
+#define NV50_3D_SEMANTIC_CLIP 0x00001908
+#define NV50_3D_SEMANTIC_CLIP_CLIP_START__MASK 0x000000ff
+#define NV50_3D_SEMANTIC_CLIP_CLIP_START__SHIFT 0
+#define NV50_3D_SEMANTIC_CLIP_CLIP_NUM__MASK 0x00000f00
+#define NV50_3D_SEMANTIC_CLIP_CLIP_NUM__SHIFT 8
+
+#define NV50_3D_SEMANTIC_LAYER 0x0000190c
+#define NV50_3D_SEMANTIC_LAYER_LAYER_ID__MASK 0x000000ff
+#define NV50_3D_SEMANTIC_LAYER_LAYER_ID__SHIFT 0
+
+#define NV50_3D_SEMANTIC_PTSZ 0x00001910
+#define NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK 0x00000001
+#define NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__SHIFT 0
+#define NV50_3D_SEMANTIC_PTSZ_PTSZ_ID__MASK 0x00000ff0
+#define NV50_3D_SEMANTIC_PTSZ_PTSZ_ID__SHIFT 4
+
+#define NV50_3D_SEMANTIC_PRIM_ID 0x00001914
+#define NV50_3D_SEMANTIC_PRIM_ID_PRIM_ID__MASK 0x000000ff
+#define NV50_3D_SEMANTIC_PRIM_ID_PRIM_ID__SHIFT 0
+
+#define NV50_3D_CULL_FACE_ENABLE 0x00001918
+
+#define NV50_3D_FRONT_FACE 0x0000191c
+#define NV50_3D_FRONT_FACE_CW 0x00000900
+#define NV50_3D_FRONT_FACE_CCW 0x00000901
+
+#define NV50_3D_CULL_FACE 0x00001920
+#define NV50_3D_CULL_FACE_FRONT 0x00000404
+#define NV50_3D_CULL_FACE_BACK 0x00000405
+#define NV50_3D_CULL_FACE_FRONT_AND_BACK 0x00000408
+
+#define NV50_3D_LINE_LAST_PIXEL 0x00001924
+
+#define NVA3_3D_FP_MULTISAMPLE 0x00001928
+#define NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK 0x00000001
+#define NVA3_3D_FP_MULTISAMPLE_FORCE_PER_SAMPLE 0x00000002
+
+#define NV50_3D_VIEWPORT_TRANSFORM_EN 0x0000192c
+
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001
+#define NVA0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002
+#define NVA0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000
+#define NV84_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000
+
+#define NV50_3D_CLIP_DISTANCE_MODE 0x00001940
+#define NV50_3D_CLIP_DISTANCE_MODE_0__MASK 0x00000001
+#define NV50_3D_CLIP_DISTANCE_MODE_0__SHIFT 0
+#define NV50_3D_CLIP_DISTANCE_MODE_0_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_0_CULL 0x00000001
+#define NV50_3D_CLIP_DISTANCE_MODE_1__MASK 0x00000010
+#define NV50_3D_CLIP_DISTANCE_MODE_1__SHIFT 4
+#define NV50_3D_CLIP_DISTANCE_MODE_1_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_1_CULL 0x00000010
+#define NV50_3D_CLIP_DISTANCE_MODE_2__MASK 0x00000100
+#define NV50_3D_CLIP_DISTANCE_MODE_2__SHIFT 8
+#define NV50_3D_CLIP_DISTANCE_MODE_2_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_2_CULL 0x00000100
+#define NV50_3D_CLIP_DISTANCE_MODE_3__MASK 0x00001000
+#define NV50_3D_CLIP_DISTANCE_MODE_3__SHIFT 12
+#define NV50_3D_CLIP_DISTANCE_MODE_3_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_3_CULL 0x00001000
+#define NV50_3D_CLIP_DISTANCE_MODE_4__MASK 0x00010000
+#define NV50_3D_CLIP_DISTANCE_MODE_4__SHIFT 16
+#define NV50_3D_CLIP_DISTANCE_MODE_4_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_4_CULL 0x00010000
+#define NV50_3D_CLIP_DISTANCE_MODE_5__MASK 0x00100000
+#define NV50_3D_CLIP_DISTANCE_MODE_5__SHIFT 20
+#define NV50_3D_CLIP_DISTANCE_MODE_5_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_5_CULL 0x00100000
+#define NV50_3D_CLIP_DISTANCE_MODE_6__MASK 0x01000000
+#define NV50_3D_CLIP_DISTANCE_MODE_6__SHIFT 24
+#define NV50_3D_CLIP_DISTANCE_MODE_6_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_6_CULL 0x01000000
+#define NV50_3D_CLIP_DISTANCE_MODE_7__MASK 0x10000000
+#define NV50_3D_CLIP_DISTANCE_MODE_7__SHIFT 28
+#define NV50_3D_CLIP_DISTANCE_MODE_7_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_7_CULL 0x10000000
+
+#define NVA3_3D_UNK1944 0x00001944
+
+#define NV50_3D_CLIP_RECTS_EN 0x0000194c
+
+#define NV50_3D_CLIP_RECTS_MODE 0x00001950
+#define NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000
+#define NV50_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001
+#define NV50_3D_CLIP_RECTS_MODE_NEVER 0x00000002
+
+#define NV50_3D_ZCULL_VALIDATE 0x00001954
+#define NV50_3D_ZCULL_VALIDATE_CLEAR_UNK0 0x00000001
+#define NV50_3D_ZCULL_VALIDATE_CLEAR_UNK1 0x00000010
+
+#define NV50_3D_ZCULL_INVALIDATE 0x00001958
+
+#define NVA3_3D_UNK1960 0x00001960
+#define NVA3_3D_UNK1960_0 0x00000001
+#define NVA3_3D_UNK1960_1 0x00000010
+
+#define NV50_3D_UNK1968 0x00001968
+#define NV50_3D_UNK1968_0 0x00000001
+#define NV50_3D_UNK1968_1 0x00000010
+
+#define NV50_3D_FP_CTRL_UNK196C 0x0000196c
+#define NV50_3D_FP_CTRL_UNK196C_0 0x00000001
+#define NV50_3D_FP_CTRL_UNK196C_1 0x00000010
+
+#define NV50_3D_UNK1978 0x00001978
+
+#define NV50_3D_CLIPID_ENABLE 0x0000197c
+
+#define NV50_3D_CLIPID_WIDTH 0x00001980
+#define NV50_3D_CLIPID_WIDTH__MAX 0x00002000
+#define NV50_3D_CLIPID_WIDTH__ALIGN 0x00000040
+
+#define NV50_3D_CLIPID_ID 0x00001984
+
+#define NV50_3D_FP_INTERPOLANT_CTRL 0x00001988
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK__MASK 0xff000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK__SHIFT 24
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_X 0x01000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_Y 0x02000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_Z 0x04000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_W 0x08000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__MASK 0x00ff0000
+#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT 16
+#define NV50_3D_FP_INTERPOLANT_CTRL_OFFSET__MASK 0x0000ff00
+#define NV50_3D_FP_INTERPOLANT_CTRL_OFFSET__SHIFT 8
+#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT__MASK 0x000000ff
+#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT 0
+
+#define NV50_3D_FP_REG_ALLOC_TEMP 0x0000198c
+
+#define NV50_3D_REG_MODE 0x000019a0
+#define NV50_3D_REG_MODE_PACKED 0x00000001
+#define NV50_3D_REG_MODE_STRIPED 0x00000002
+
+#define NV50_3D_FP_CONTROL 0x000019a8
+#define NV50_3D_FP_CONTROL_MULTIPLE_RESULTS 0x00000001
+#define NV50_3D_FP_CONTROL_EXPORTS_Z 0x00000100
+#define NV50_3D_FP_CONTROL_USES_KIL 0x00100000
+
+#define NV50_3D_DEPTH_BOUNDS_EN 0x000019bc
+
+#define NV50_3D_UNK19C0 0x000019c0
+
+#define NV50_3D_LOGIC_OP_ENABLE 0x000019c4
+
+#define NV50_3D_LOGIC_OP 0x000019c8
+#define NV50_3D_LOGIC_OP_CLEAR 0x00001500
+#define NV50_3D_LOGIC_OP_AND 0x00001501
+#define NV50_3D_LOGIC_OP_AND_REVERSE 0x00001502
+#define NV50_3D_LOGIC_OP_COPY 0x00001503
+#define NV50_3D_LOGIC_OP_AND_INVERTED 0x00001504
+#define NV50_3D_LOGIC_OP_NOOP 0x00001505
+#define NV50_3D_LOGIC_OP_XOR 0x00001506
+#define NV50_3D_LOGIC_OP_OR 0x00001507
+#define NV50_3D_LOGIC_OP_NOR 0x00001508
+#define NV50_3D_LOGIC_OP_EQUIV 0x00001509
+#define NV50_3D_LOGIC_OP_INVERT 0x0000150a
+#define NV50_3D_LOGIC_OP_OR_REVERSE 0x0000150b
+#define NV50_3D_LOGIC_OP_COPY_INVERTED 0x0000150c
+#define NV50_3D_LOGIC_OP_OR_INVERTED 0x0000150d
+#define NV50_3D_LOGIC_OP_NAND 0x0000150e
+#define NV50_3D_LOGIC_OP_SET 0x0000150f
+
+#define NV50_3D_ZETA_COMP_ENABLE 0x000019cc
+
+#define NV50_3D_CLEAR_BUFFERS 0x000019d0
+#define NV50_3D_CLEAR_BUFFERS_Z 0x00000001
+#define NV50_3D_CLEAR_BUFFERS_S 0x00000002
+#define NV50_3D_CLEAR_BUFFERS_R 0x00000004
+#define NV50_3D_CLEAR_BUFFERS_G 0x00000008
+#define NV50_3D_CLEAR_BUFFERS_B 0x00000010
+#define NV50_3D_CLEAR_BUFFERS_A 0x00000020
+#define NV50_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0
+#define NV50_3D_CLEAR_BUFFERS_RT__SHIFT 6
+#define NV50_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00
+#define NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT 10
+
+#define NV50_3D_CLIPID_FILL 0x000019d4
+
+#define NV50_3D_UNK19D8(i0) (0x000019d8 + 0x4*(i0))
+#define NV50_3D_UNK19D8__ESIZE 0x00000004
+#define NV50_3D_UNK19D8__LEN 0x00000002
+
+#define NV50_3D_RT_COMP_ENABLE(i0) (0x000019e0 + 0x4*(i0))
+#define NV50_3D_RT_COMP_ENABLE__ESIZE 0x00000004
+#define NV50_3D_RT_COMP_ENABLE__LEN 0x00000008
+
+#define NV50_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0))
+#define NV50_3D_COLOR_MASK__ESIZE 0x00000004
+#define NV50_3D_COLOR_MASK__LEN 0x00000008
+#define NV50_3D_COLOR_MASK_R 0x0000000f
+#define NV50_3D_COLOR_MASK_G 0x000000f0
+#define NV50_3D_COLOR_MASK_B 0x00000f00
+#define NV50_3D_COLOR_MASK_A 0x0000f000
+
+#define NV50_3D_UNK1A20 0x00001a20
+
+#define NV50_3D_DELAY 0x00001a24
+
+#define NV50_3D_UNK1A28 0x00001a28
+#define NV50_3D_UNK1A28_0__MASK 0x000000ff
+#define NV50_3D_UNK1A28_0__SHIFT 0
+#define NV50_3D_UNK1A28_1 0x00000100
+
+#define NV50_3D_UNK1A2C 0x00001a2c
+
+#define NV50_3D_UNK1A30 0x00001a30
+
+#define NV50_3D_UNK1A34 0x00001a34
+
+#define NV50_3D_UNK1A38 0x00001a38
+
+#define NV50_3D_UNK1A3C 0x00001a3c
+
+#define NV50_3D_UNK1A40(i0) (0x00001a40 + 0x4*(i0))
+#define NV50_3D_UNK1A40__ESIZE 0x00000004
+#define NV50_3D_UNK1A40__LEN 0x00000010
+#define NV50_3D_UNK1A40_0__MASK 0x00000007
+#define NV50_3D_UNK1A40_0__SHIFT 0
+#define NV50_3D_UNK1A40_1__MASK 0x00000070
+#define NV50_3D_UNK1A40_1__SHIFT 4
+#define NV50_3D_UNK1A40_2__MASK 0x00000700
+#define NV50_3D_UNK1A40_2__SHIFT 8
+#define NV50_3D_UNK1A40_3__MASK 0x00007000
+#define NV50_3D_UNK1A40_3__SHIFT 12
+#define NV50_3D_UNK1A40_4__MASK 0x00070000
+#define NV50_3D_UNK1A40_4__SHIFT 16
+#define NV50_3D_UNK1A40_5__MASK 0x00700000
+#define NV50_3D_UNK1A40_5__SHIFT 20
+#define NV50_3D_UNK1A40_6__MASK 0x07000000
+#define NV50_3D_UNK1A40_6__SHIFT 24
+#define NV50_3D_UNK1A40_7__MASK 0x70000000
+#define NV50_3D_UNK1A40_7__SHIFT 28
+
+#define NV50_3D_STRMOUT_ADDRESS_HIGH(i0) (0x00001a80 + 0x10*(i0))
+#define NV50_3D_STRMOUT_ADDRESS_HIGH__ESIZE 0x00000010
+#define NV50_3D_STRMOUT_ADDRESS_HIGH__LEN 0x00000004
+
+#define NV50_3D_STRMOUT_ADDRESS_LOW(i0) (0x00001a84 + 0x10*(i0))
+#define NV50_3D_STRMOUT_ADDRESS_LOW__ESIZE 0x00000010
+#define NV50_3D_STRMOUT_ADDRESS_LOW__LEN 0x00000004
+
+#define NV50_3D_STRMOUT_NUM_ATTRIBS(i0) (0x00001a88 + 0x10*(i0))
+#define NV50_3D_STRMOUT_NUM_ATTRIBS__ESIZE 0x00000010
+#define NV50_3D_STRMOUT_NUM_ATTRIBS__LEN 0x00000004
+#define NV50_3D_STRMOUT_NUM_ATTRIBS__MAX 0x00000040
+
+#define NVA0_3D_STRMOUT_OFFSET_LIMIT(i0) (0x00001a8c + 0x10*(i0))
+#define NVA0_3D_STRMOUT_OFFSET_LIMIT__ESIZE 0x00000010
+#define NVA0_3D_STRMOUT_OFFSET_LIMIT__LEN 0x00000004
+
+#define NV50_3D_VERTEX_ARRAY_ATTRIB(i0) (0x00001ac0 + 0x4*(i0))
+#define NV50_3D_VERTEX_ARRAY_ATTRIB__ESIZE 0x00000004
+#define NV50_3D_VERTEX_ARRAY_ATTRIB__LEN 0x00000010
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_BUFFER__MASK 0x0000000f
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_BUFFER__SHIFT 0
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_CONST 0x00000010
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_OFFSET__MASK 0x0007ffe0
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_OFFSET__SHIFT 5
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT__MASK 0x01f80000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT__SHIFT 19
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 0x00080000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32 0x00100000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16 0x00180000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32 0x00200000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16 0x00280000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8 0x00500000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16 0x00780000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32 0x00900000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8 0x00980000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8 0x00c00000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16 0x00d80000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8 0x00e80000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_10_10_10_2 0x01800000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__MASK 0x7e000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__SHIFT 25
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x7e000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x24000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x12000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x5a000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x6c000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x48000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x36000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_BGRA 0x80000000
+
+#define NV50_3D_QUERY_ADDRESS_HIGH 0x00001b00
+
+#define NV50_3D_QUERY_ADDRESS_LOW 0x00001b04
+
+#define NV50_3D_QUERY_SEQUENCE 0x00001b08
+
+#define NV50_3D_QUERY_GET 0x00001b0c
+#define NV50_3D_QUERY_GET_MODE__MASK 0x00000003
+#define NV50_3D_QUERY_GET_MODE__SHIFT 0
+#define NV50_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000
+#define NV50_3D_QUERY_GET_MODE_SYNC 0x00000001
+#define NV50_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002
+#define NV50_3D_QUERY_GET_UNK4 0x00000010
+#define NVA0_3D_QUERY_GET_INDEX__MASK 0x000000e0
+#define NVA0_3D_QUERY_GET_INDEX__SHIFT 5
+#define NV50_3D_QUERY_GET_UNK8 0x00000100
+#define NV50_3D_QUERY_GET_UNIT__MASK 0x0000f000
+#define NV50_3D_QUERY_GET_UNIT__SHIFT 12
+#define NV50_3D_QUERY_GET_UNIT_UNK00 0x00000000
+#define NV50_3D_QUERY_GET_UNIT_VFETCH 0x00001000
+#define NV50_3D_QUERY_GET_UNIT_VP 0x00002000
+#define NV50_3D_QUERY_GET_UNIT_RAST 0x00004000
+#define NV50_3D_QUERY_GET_UNIT_STRMOUT 0x00005000
+#define NV50_3D_QUERY_GET_UNIT_GP 0x00006000
+#define NV50_3D_QUERY_GET_UNIT_ZCULL 0x00007000
+#define NV50_3D_QUERY_GET_UNIT_TPROP 0x0000a000
+#define NV50_3D_QUERY_GET_UNIT_UNK0C 0x0000c000
+#define NV50_3D_QUERY_GET_UNIT_CROP 0x0000f000
+#define NV50_3D_QUERY_GET_SYNC_COND__MASK 0x00010000
+#define NV50_3D_QUERY_GET_SYNC_COND__SHIFT 16
+#define NV50_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000
+#define NV50_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000
+#define NV50_3D_QUERY_GET_INTR 0x00100000
+#define NV50_3D_QUERY_GET_TYPE__MASK 0x00800000
+#define NV50_3D_QUERY_GET_TYPE__SHIFT 23
+#define NV50_3D_QUERY_GET_TYPE_QUERY 0x00000000
+#define NV50_3D_QUERY_GET_TYPE_COUNTER 0x00800000
+#define NV50_3D_QUERY_GET_QUERY_SELECT__MASK 0x0f000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT__SHIFT 24
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZERO 0x00000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_SAMPLECNT 0x01000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_STRMOUT_NO_OVERFLOW 0x02000000
+#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_DROPPED_PRIMITIVES 0x03000000
+#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_VERTICES 0x04000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK0 0x05000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK1 0x06000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK2 0x07000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK3 0x08000000
+#define NVA0_3D_QUERY_GET_QUERY_SELECT_RT_UNK14 0x0c000000
+#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_OFFSET 0x0d000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT__MASK 0x0f000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT__SHIFT 24
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_VFETCH_VERTICES 0x00000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_VFETCH_PRIMITIVES 0x01000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_VP_LAUNCHES 0x02000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_GP_LAUNCHES 0x03000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_GP_PRIMITIVES_OUT 0x04000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_TRANSFORM_FEEDBACK 0x05000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_GENERATED_PRIMITIVES 0x06000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_RAST_PRIMITIVES_PRECLIP 0x07000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_RAST_PRIMITIVES_POSTCLIP 0x08000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_FP_PIXELS 0x09000000
+#define NV84_3D_QUERY_GET_COUNTER_SELECT_UNK0A 0x0a000000
+#define NVA0_3D_QUERY_GET_COUNTER_SELECT_UNK0C 0x0c000000
+#define NV50_3D_QUERY_GET_SHORT 0x10000000
+
+#define NVA3_3D_VP_RESULT_MAP_ALT(i0) (0x00001b3c + 0x4*(i0))
+#define NVA3_3D_VP_RESULT_MAP_ALT__ESIZE 0x00000004
+#define NVA3_3D_VP_RESULT_MAP_ALT__LEN 0x00000020
+#define NVA3_3D_VP_RESULT_MAP_ALT_0__MASK 0x000000ff
+#define NVA3_3D_VP_RESULT_MAP_ALT_0__SHIFT 0
+#define NVA3_3D_VP_RESULT_MAP_ALT_1__MASK 0x0000ff00
+#define NVA3_3D_VP_RESULT_MAP_ALT_1__SHIFT 8
+#define NVA3_3D_VP_RESULT_MAP_ALT_2__MASK 0x00ff0000
+#define NVA3_3D_VP_RESULT_MAP_ALT_2__SHIFT 16
+#define NVA3_3D_VP_RESULT_MAP_ALT_3__MASK 0xff000000
+#define NVA3_3D_VP_RESULT_MAP_ALT_3__SHIFT 24
+
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT(i0) (0x00001c00 + 0x10*(i0))
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT__ESIZE 0x00000010
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT__LEN 0x00000020
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_STRIDE__MASK 0x00000fff
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_STRIDE__SHIFT 0
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_ENABLE 0x20000000
+
+#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT(i0) (0x00001c04 + 0x10*(i0))
+#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT__ESIZE 0x00000010
+#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT__LEN 0x00000020
+
+#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT(i0) (0x00001c08 + 0x10*(i0))
+#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT__ESIZE 0x00000010
+#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT__LEN 0x00000020
+
+#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT(i0) (0x00001c0c + 0x10*(i0))
+#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT__ESIZE 0x00000010
+#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT__LEN 0x00000020
+
+#define NVA3_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0))
+#define NVA3_3D_IBLEND__ESIZE 0x00000020
+#define NVA3_3D_IBLEND__LEN 0x00000008
+
+#define NVA3_3D_IBLEND_SEPARATE_ALPHA(i0) (0x00001e00 + 0x20*(i0))
+
+#define NVA3_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0))
+#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NVA3_3D_IBLEND_EQUATION_RGB_MIN 0x00008007
+#define NVA3_3D_IBLEND_EQUATION_RGB_MAX 0x00008008
+#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVA3_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0))
+
+#define NVA3_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0))
+
+#define NVA3_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0))
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVA3_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0))
+
+#define NVA3_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0))
+
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT(i0) (0x00001f00 + 0x8*(i0))
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT__ESIZE 0x00000008
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT__LEN 0x00000020
+
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT(i0) (0x00001f04 + 0x8*(i0))
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT__ESIZE 0x00000008
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT__LEN 0x00000020
+
+
+#endif /* RNNDB_NV50_3D_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h
new file mode 100644
index 00000000000..f26ac45da40
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h
@@ -0,0 +1,98 @@
+#ifndef NV_3DDEFS_XML
+#define NV_3DDEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nv50_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38)
+- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28)
+- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20)
+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro, curro_, currojerez)
+- imirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin Koƛcielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000
+#define NV50_3D_BLEND_FACTOR_ONE 0x00004001
+#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305
+#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308
+#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903
+
+#endif /* NV_3DDEFS_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_blit.h b/src/gallium/drivers/nouveau/nv50/nv50_blit.h
new file mode 100644
index 00000000000..bdd6a63d1f1
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_blit.h
@@ -0,0 +1,223 @@
+
+#ifndef __NV50_BLIT_H__
+#define __NV50_BLIT_H__
+
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+void *
+nv50_blitter_make_fp(struct pipe_context *,
+ unsigned mode,
+ enum pipe_texture_target);
+
+unsigned
+nv50_blit_select_mode(const struct pipe_blit_info *);
+
+/* Converted to a pipe->blit. */
+void
+nv50_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *);
+
+#define NV50_BLIT_MODE_PASS 0 /* pass through TEX $t0/$s0 output */
+#define NV50_BLIT_MODE_Z24S8 1 /* encode ZS values for RGBA unorm8 */
+#define NV50_BLIT_MODE_S8Z24 2
+#define NV50_BLIT_MODE_X24S8 3
+#define NV50_BLIT_MODE_S8X24 4
+#define NV50_BLIT_MODE_Z24X8 5
+#define NV50_BLIT_MODE_X8Z24 6
+#define NV50_BLIT_MODE_ZS 7 /* put $t0/$s0 into R, $t1/$s1 into G */
+#define NV50_BLIT_MODE_XS 8 /* put $t1/$s1 into G */
+#define NV50_BLIT_MODES 9
+
+/* CUBE and RECT textures are reinterpreted as 2D(_ARRAY) */
+#define NV50_BLIT_TEXTURE_BUFFER 0
+#define NV50_BLIT_TEXTURE_1D 1
+#define NV50_BLIT_TEXTURE_2D 2
+#define NV50_BLIT_TEXTURE_3D 3
+#define NV50_BLIT_TEXTURE_1D_ARRAY 4
+#define NV50_BLIT_TEXTURE_2D_ARRAY 5
+#define NV50_BLIT_MAX_TEXTURE_TYPES 6
+
+static INLINE unsigned
+nv50_blit_texture_type(enum pipe_texture_target target)
+{
+ switch (target) {
+ case PIPE_TEXTURE_1D: return NV50_BLIT_TEXTURE_1D;
+ case PIPE_TEXTURE_2D: return NV50_BLIT_TEXTURE_2D;
+ case PIPE_TEXTURE_3D: return NV50_BLIT_TEXTURE_3D;
+ case PIPE_TEXTURE_1D_ARRAY: return NV50_BLIT_TEXTURE_1D_ARRAY;
+ case PIPE_TEXTURE_2D_ARRAY: return NV50_BLIT_TEXTURE_2D_ARRAY;
+ default:
+ assert(target == PIPE_BUFFER);
+ return NV50_BLIT_TEXTURE_BUFFER;
+ }
+}
+
+static INLINE unsigned
+nv50_blit_get_tgsi_texture_target(enum pipe_texture_target target)
+{
+ switch (target) {
+ case PIPE_TEXTURE_1D: return TGSI_TEXTURE_1D;
+ case PIPE_TEXTURE_2D: return TGSI_TEXTURE_2D;
+ case PIPE_TEXTURE_3D: return TGSI_TEXTURE_3D;
+ case PIPE_TEXTURE_1D_ARRAY: return TGSI_TEXTURE_1D_ARRAY;
+ case PIPE_TEXTURE_2D_ARRAY: return TGSI_TEXTURE_2D_ARRAY;
+ default:
+ assert(target == PIPE_BUFFER);
+ return TGSI_TEXTURE_BUFFER;
+ }
+}
+
+static INLINE enum pipe_texture_target
+nv50_blit_reinterpret_pipe_texture_target(enum pipe_texture_target target)
+{
+ switch (target) {
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return PIPE_TEXTURE_2D_ARRAY;
+ case PIPE_TEXTURE_RECT:
+ return PIPE_TEXTURE_2D;
+ default:
+ return target;
+ }
+}
+
+static INLINE unsigned
+nv50_blit_get_filter(const struct pipe_blit_info *info)
+{
+ if (info->dst.resource->nr_samples < info->src.resource->nr_samples)
+ return util_format_is_depth_or_stencil(info->src.format) ? 0 : 1;
+
+ if (info->filter != PIPE_TEX_FILTER_LINEAR)
+ return 0;
+
+ if ((info->dst.box.width == info->src.box.width ||
+ info->dst.box.width == -info->src.box.width) &&
+ (info->dst.box.height == info->src.box.height ||
+ info->dst.box.height == -info->src.box.height))
+ return 0;
+
+ return 1;
+}
+
+/* Since shaders cannot export stencil, we cannot copy stencil values when
+ * rendering to ZETA, so we attach the ZS surface to a colour render target.
+ */
+static INLINE enum pipe_format
+nv50_blit_zeta_to_colour_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return PIPE_FORMAT_R16_UNORM;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return PIPE_FORMAT_R8G8B8A8_UNORM;
+ case PIPE_FORMAT_Z32_FLOAT:
+ return PIPE_FORMAT_R32_FLOAT;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return PIPE_FORMAT_R32G32_FLOAT;
+ default:
+ assert(0);
+ return PIPE_FORMAT_NONE;
+ }
+}
+
+
+static INLINE uint16_t
+nv50_blit_derive_color_mask(const struct pipe_blit_info *info)
+{
+ const unsigned mask = info->mask;
+
+ uint16_t color_mask = 0;
+
+ switch (info->dst.format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ if (mask & PIPE_MASK_S)
+ color_mask |= 0x1000;
+ /* fall through */
+ case PIPE_FORMAT_Z24X8_UNORM:
+ if (mask & PIPE_MASK_Z)
+ color_mask |= 0x0111;
+ break;
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ if (mask & PIPE_MASK_Z)
+ color_mask |= 0x1110;
+ if (mask & PIPE_MASK_S)
+ color_mask |= 0x0001;
+ break;
+ default:
+ if (mask & (PIPE_MASK_R | PIPE_MASK_Z)) color_mask |= 0x0001;
+ if (mask & (PIPE_MASK_G | PIPE_MASK_S)) color_mask |= 0x0010;
+ if (mask & PIPE_MASK_B) color_mask |= 0x0100;
+ if (mask & PIPE_MASK_A) color_mask |= 0x1000;
+ break;
+ }
+
+ return color_mask;
+}
+
+static INLINE uint32_t
+nv50_blit_eng2d_get_mask(const struct pipe_blit_info *info)
+{
+ uint32_t mask = 0;
+
+ switch (info->dst.format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ if (info->mask & PIPE_MASK_Z) mask |= 0x00ffffff;
+ if (info->mask & PIPE_MASK_S) mask |= 0xff000000;
+ break;
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ if (info->mask & PIPE_MASK_Z) mask |= 0xffffff00;
+ if (info->mask & PIPE_MASK_S) mask |= 0x000000ff;
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ if (info->mask & PIPE_MASK_Z) mask = 0x00ffffff;
+ break;
+ default:
+ mask = 0xffffffff;
+ break;
+ }
+ return mask;
+}
+
+#if NOUVEAU_DRIVER == 0xc0
+# define nv50_format_table nvc0_format_table
+#endif
+
+/* return TRUE for formats that can be converted among each other by NVC0_2D */
+static INLINE boolean
+nv50_2d_dst_format_faithful(enum pipe_format format)
+{
+ const uint64_t mask =
+ NV50_ENG2D_SUPPORTED_FORMATS &
+ ~NV50_ENG2D_NOCONVERT_FORMATS;
+ uint8_t id = nv50_format_table[format].rt;
+ return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
+}
+static INLINE boolean
+nv50_2d_src_format_faithful(enum pipe_format format)
+{
+ const uint64_t mask =
+ NV50_ENG2D_SUPPORTED_FORMATS &
+ ~(NV50_ENG2D_LUMINANCE_FORMATS | NV50_ENG2D_INTENSITY_FORMATS);
+ uint8_t id = nv50_format_table[format].rt;
+ return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
+}
+
+static INLINE boolean
+nv50_2d_format_supported(enum pipe_format format)
+{
+ uint8_t id = nv50_format_table[format].rt;
+ return (id >= 0xc0) &&
+ (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
+}
+
+static INLINE boolean
+nv50_2d_dst_format_ops_supported(enum pipe_format format)
+{
+ uint8_t id = nv50_format_table[format].rt;
+ return (id >= 0xc0) &&
+ (NV50_ENG2D_OPERATION_FORMATS & (1ULL << (id - 0xc0)));
+}
+
+#endif /* __NV50_BLIT_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
new file mode 100644
index 00000000000..b6bdf79b389
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -0,0 +1,317 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_framebuffer.h"
+
+#ifdef NV50_WITH_DRAW_MODULE
+#include "draw/draw_context.h"
+#endif
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_screen.h"
+#include "nv50/nv50_resource.h"
+
+static void
+nv50_flush(struct pipe_context *pipe,
+ struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ struct nouveau_screen *screen = nouveau_screen(pipe->screen);
+
+ if (fence)
+ nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
+
+ PUSH_KICK(screen->pushbuf);
+
+ nouveau_context_update_frame_stats(nouveau_context(pipe));
+}
+
+static void
+nv50_texture_barrier(struct pipe_context *pipe)
+{
+ struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
+
+ BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
+ PUSH_DATA (push, 0x20);
+}
+
+void
+nv50_default_kick_notify(struct nouveau_pushbuf *push)
+{
+ struct nv50_screen *screen = push->user_priv;
+
+ if (screen) {
+ nouveau_fence_next(&screen->base);
+ nouveau_fence_update(&screen->base, TRUE);
+ if (screen->cur_ctx)
+ screen->cur_ctx->state.flushed = TRUE;
+ }
+}
+
+static void
+nv50_context_unreference_resources(struct nv50_context *nv50)
+{
+ unsigned s, i;
+
+ nouveau_bufctx_del(&nv50->bufctx_3d);
+ nouveau_bufctx_del(&nv50->bufctx);
+
+ util_unreference_framebuffer_state(&nv50->framebuffer);
+
+ for (i = 0; i < nv50->num_vtxbufs; ++i)
+ pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL);
+
+ pipe_resource_reference(&nv50->idxbuf.buffer, NULL);
+
+ for (s = 0; s < 3; ++s) {
+ for (i = 0; i < nv50->num_textures[s]; ++i)
+ pipe_sampler_view_reference(&nv50->textures[s][i], NULL);
+
+ for (i = 0; i < NV50_MAX_PIPE_CONSTBUFS; ++i)
+ if (!nv50->constbuf[s][i].user)
+ pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL);
+ }
+}
+
+static void
+nv50_destroy(struct pipe_context *pipe)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ if (nv50_context_screen(nv50)->cur_ctx == nv50) {
+ nv50->base.pushbuf->kick_notify = NULL;
+ nv50_context_screen(nv50)->cur_ctx = NULL;
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, NULL);
+ }
+ /* need to flush before destroying the bufctx */
+ nouveau_pushbuf_kick(nv50->base.pushbuf, nv50->base.pushbuf->channel);
+
+ nv50_context_unreference_resources(nv50);
+
+#ifdef NV50_WITH_DRAW_MODULE
+ draw_destroy(nv50->draw);
+#endif
+
+ nouveau_context_destroy(&nv50->base);
+}
+
+static int
+nv50_invalidate_resource_storage(struct nouveau_context *ctx,
+ struct pipe_resource *res,
+ int ref)
+{
+ struct nv50_context *nv50 = nv50_context(&ctx->pipe);
+ unsigned s, i;
+
+ if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
+ if (nv50->framebuffer.cbufs[i] &&
+ nv50->framebuffer.cbufs[i]->texture == res) {
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (nv50->framebuffer.zsbuf &&
+ nv50->framebuffer.zsbuf->texture == res) {
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+
+ if (res->bind & PIPE_BIND_VERTEX_BUFFER) {
+ for (i = 0; i < nv50->num_vtxbufs; ++i) {
+ if (nv50->vtxbuf[i].buffer == res) {
+ nv50->dirty |= NV50_NEW_ARRAYS;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_INDEX_BUFFER) {
+ if (nv50->idxbuf.buffer == res)
+ if (!--ref)
+ return ref;
+ }
+
+ if (res->bind & PIPE_BIND_SAMPLER_VIEW) {
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < nv50->num_textures[s]; ++i) {
+ if (nv50->textures[s][i] &&
+ nv50->textures[s][i]->texture == res) {
+ nv50->dirty |= NV50_NEW_TEXTURES;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ }
+
+ if (res->bind & PIPE_BIND_CONSTANT_BUFFER) {
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < nv50->num_vtxbufs; ++i) {
+ if (!nv50->constbuf[s][i].user &&
+ nv50->constbuf[s][i].u.buf == res) {
+ nv50->dirty |= NV50_NEW_CONSTBUF;
+ nv50->constbuf_dirty[s] |= 1 << i;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i));
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ }
+
+ return ref;
+}
+
+struct pipe_context *
+nv50_create(struct pipe_screen *pscreen, void *priv)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nv50_context *nv50;
+ struct pipe_context *pipe;
+ int ret;
+ uint32_t flags;
+
+ nv50 = CALLOC_STRUCT(nv50_context);
+ if (!nv50)
+ return NULL;
+ pipe = &nv50->base.pipe;
+
+ if (!nv50_blitctx_create(nv50))
+ goto out_err;
+
+ nv50->base.pushbuf = screen->base.pushbuf;
+ nv50->base.client = screen->base.client;
+
+ ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT,
+ &nv50->bufctx_3d);
+ if (!ret)
+ ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx);
+ if (ret)
+ goto out_err;
+
+ nv50->base.screen = &screen->base;
+ nv50->base.copy_data = nv50_m2mf_copy_linear;
+ nv50->base.push_data = nv50_sifc_linear_u8;
+ nv50->base.push_cb = nv50_cb_push;
+
+ nv50->screen = screen;
+ pipe->screen = pscreen;
+ pipe->priv = priv;
+
+ pipe->destroy = nv50_destroy;
+
+ pipe->draw_vbo = nv50_draw_vbo;
+ pipe->clear = nv50_clear;
+
+ pipe->flush = nv50_flush;
+ pipe->texture_barrier = nv50_texture_barrier;
+
+ if (!screen->cur_ctx) {
+ screen->cur_ctx = nv50;
+ nouveau_pushbuf_bufctx(screen->base.pushbuf, nv50->bufctx);
+ }
+ nv50->base.pushbuf->kick_notify = nv50_default_kick_notify;
+
+ nv50_init_query_functions(nv50);
+ nv50_init_surface_functions(nv50);
+ nv50_init_state_functions(nv50);
+ nv50_init_resource_functions(pipe);
+
+ nv50->base.invalidate_resource_storage = nv50_invalidate_resource_storage;
+
+#ifdef NV50_WITH_DRAW_MODULE
+ /* no software fallbacks implemented */
+ nv50->draw = draw_create(pipe);
+ assert(nv50->draw);
+ draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50));
+#endif
+
+ if (screen->base.device->chipset < 0x84 ||
+ debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) {
+ /* PMPEG */
+ nouveau_context_init_vdec(&nv50->base);
+ } else if (screen->base.device->chipset < 0x98 ||
+ screen->base.device->chipset == 0xa0) {
+ /* VP2 */
+ pipe->create_video_codec = nv84_create_decoder;
+ pipe->create_video_buffer = nv84_video_buffer_create;
+ } else {
+ /* VP3/4 */
+ pipe->create_video_codec = nv98_create_decoder;
+ pipe->create_video_buffer = nv98_video_buffer_create;
+ }
+
+ flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+
+ BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->code);
+ BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms);
+ BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc);
+ BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo);
+
+ flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
+
+ BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo);
+ BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo);
+
+ nv50->base.scratch.bo_size = 2 << 20;
+
+ return pipe;
+
+out_err:
+ if (nv50) {
+ if (nv50->bufctx_3d)
+ nouveau_bufctx_del(&nv50->bufctx_3d);
+ if (nv50->bufctx)
+ nouveau_bufctx_del(&nv50->bufctx);
+ if (nv50->blit)
+ FREE(nv50->blit);
+ FREE(nv50);
+ }
+ return NULL;
+}
+
+void
+nv50_bufctx_fence(struct nouveau_bufctx *bufctx, boolean on_flush)
+{
+ struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending;
+ struct nouveau_list *it;
+
+ for (it = list->next; it != list; it = it->next) {
+ struct nouveau_bufref *ref = (struct nouveau_bufref *)it;
+ struct nv04_resource *res = ref->priv;
+ if (res)
+ nv50_resource_validate(res, (unsigned)ref->priv_data);
+ }
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
new file mode 100644
index 00000000000..ee6eb0ef715
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -0,0 +1,322 @@
+#ifndef __NV50_CONTEXT_H__
+#define __NV50_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_inlines.h"
+#include "util/u_dynarray.h"
+
+#ifdef NV50_WITH_DRAW_MODULE
+#include "draw/draw_vertex.h"
+#endif
+
+#include "nv50/nv50_debug.h"
+#include "nv50/nv50_winsys.h"
+#include "nv50/nv50_stateobj.h"
+#include "nv50/nv50_screen.h"
+#include "nv50/nv50_program.h"
+#include "nv50/nv50_resource.h"
+#include "nv50/nv50_transfer.h"
+
+#include "nouveau_context.h"
+#include "nv_object.xml.h"
+#include "nv_m2mf.xml.h"
+#include "nv50/nv50_3ddefs.xml.h"
+#include "nv50/nv50_3d.xml.h"
+#include "nv50/nv50_2d.xml.h"
+
+#define NV50_NEW_BLEND (1 << 0)
+#define NV50_NEW_RASTERIZER (1 << 1)
+#define NV50_NEW_ZSA (1 << 2)
+#define NV50_NEW_VERTPROG (1 << 3)
+#define NV50_NEW_GMTYPROG (1 << 6)
+#define NV50_NEW_FRAGPROG (1 << 7)
+#define NV50_NEW_BLEND_COLOUR (1 << 8)
+#define NV50_NEW_STENCIL_REF (1 << 9)
+#define NV50_NEW_CLIP (1 << 10)
+#define NV50_NEW_SAMPLE_MASK (1 << 11)
+#define NV50_NEW_FRAMEBUFFER (1 << 12)
+#define NV50_NEW_STIPPLE (1 << 13)
+#define NV50_NEW_SCISSOR (1 << 14)
+#define NV50_NEW_VIEWPORT (1 << 15)
+#define NV50_NEW_ARRAYS (1 << 16)
+#define NV50_NEW_VERTEX (1 << 17)
+#define NV50_NEW_CONSTBUF (1 << 18)
+#define NV50_NEW_TEXTURES (1 << 19)
+#define NV50_NEW_SAMPLERS (1 << 20)
+#define NV50_NEW_STRMOUT (1 << 21)
+#define NV50_NEW_CONTEXT (1 << 31)
+
+#define NV50_BIND_FB 0
+#define NV50_BIND_VERTEX 1
+#define NV50_BIND_VERTEX_TMP 2
+#define NV50_BIND_INDEX 3
+#define NV50_BIND_TEXTURES 4
+#define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i))
+#define NV50_BIND_SO 53
+#define NV50_BIND_SCREEN 54
+#define NV50_BIND_TLS 55
+#define NV50_BIND_COUNT 56
+#define NV50_BIND_2D 0
+#define NV50_BIND_M2MF 0
+#define NV50_BIND_FENCE 1
+
+#define NV50_CB_TMP 123
+/* fixed constant buffer binding points - low indices for user's constbufs */
+#define NV50_CB_PVP 124
+#define NV50_CB_PGP 126
+#define NV50_CB_PFP 125
+#define NV50_CB_AUX 127
+
+
+struct nv50_blitctx;
+
+boolean nv50_blitctx_create(struct nv50_context *);
+
+struct nv50_context {
+ struct nouveau_context base;
+
+ struct nv50_screen *screen;
+
+ struct nouveau_bufctx *bufctx_3d;
+ struct nouveau_bufctx *bufctx;
+
+ uint32_t dirty;
+
+ struct {
+ uint32_t instance_elts; /* bitmask of per-instance elements */
+ uint32_t instance_base;
+ uint32_t interpolant_ctrl;
+ uint32_t semantic_color;
+ uint32_t semantic_psize;
+ int32_t index_bias;
+ boolean uniform_buffer_bound[3];
+ boolean prim_restart;
+ boolean point_sprite;
+ boolean rt_serialize;
+ boolean flushed;
+ boolean rasterizer_discard;
+ uint8_t tls_required;
+ boolean new_tls_space;
+ uint8_t num_vtxbufs;
+ uint8_t num_vtxelts;
+ uint8_t num_textures[3];
+ uint8_t num_samplers[3];
+ uint8_t prim_size;
+ uint16_t scissor;
+ } state;
+
+ struct nv50_blend_stateobj *blend;
+ struct nv50_rasterizer_stateobj *rast;
+ struct nv50_zsa_stateobj *zsa;
+ struct nv50_vertex_stateobj *vertex;
+
+ struct nv50_program *vertprog;
+ struct nv50_program *gmtyprog;
+ struct nv50_program *fragprog;
+
+ struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS];
+ uint16_t constbuf_dirty[3];
+ uint16_t constbuf_valid[3];
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned num_vtxbufs;
+ struct pipe_index_buffer idxbuf;
+ uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
+ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
+ uint32_t vbo_constant; /* bitmask of user buffers with stride 0 */
+ uint32_t vb_elt_first; /* from pipe_draw_info, for vertex upload */
+ uint32_t vb_elt_limit; /* max - min element (count - 1) */
+ uint32_t instance_off; /* base vertex for instanced arrays */
+ uint32_t instance_max; /* max instance for current draw call */
+
+ struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS];
+ unsigned num_textures[3];
+ struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
+ unsigned num_samplers[3];
+
+ uint8_t num_so_targets;
+ uint8_t so_targets_dirty;
+ struct pipe_stream_output_target *so_target[4];
+
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_blend_color blend_colour;
+ struct pipe_stencil_ref stencil_ref;
+ struct pipe_poly_stipple stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ struct pipe_clip_state clip;
+
+ unsigned sample_mask;
+
+ boolean vbo_push_hint;
+
+ struct pipe_query *cond_query;
+ boolean cond_cond;
+ uint cond_mode;
+
+ struct nv50_blitctx *blit;
+
+#ifdef NV50_WITH_DRAW_MODULE
+ struct draw_context *draw;
+#endif
+};
+
+static INLINE struct nv50_context *
+nv50_context(struct pipe_context *pipe)
+{
+ return (struct nv50_context *)pipe;
+}
+
+static INLINE struct nv50_screen *
+nv50_context_screen(struct nv50_context *nv50)
+{
+ return nv50_screen(&nv50->base.screen->base);
+}
+
+/* return index used in nv50_context arrays for a specific shader type */
+static INLINE unsigned
+nv50_context_shader_stage(unsigned pipe)
+{
+ switch (pipe) {
+ case PIPE_SHADER_VERTEX: return 0;
+ case PIPE_SHADER_FRAGMENT: return 1;
+ case PIPE_SHADER_GEOMETRY: return 2;
+ case PIPE_SHADER_COMPUTE: return 3;
+ default:
+ assert(!"invalid/unhandled shader type");
+ return 0;
+ }
+}
+
+/* nv50_context.c */
+struct pipe_context *nv50_create(struct pipe_screen *, void *);
+
+void nv50_bufctx_fence(struct nouveau_bufctx *, boolean on_flush);
+
+void nv50_default_kick_notify(struct nouveau_pushbuf *);
+
+/* nv50_draw.c */
+extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
+
+/* nv50_query.c */
+void nv50_init_query_functions(struct nv50_context *);
+void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
+ struct pipe_query *, unsigned result_offset);
+void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
+void nva0_so_target_save_offset(struct pipe_context *,
+ struct pipe_stream_output_target *,
+ unsigned index, boolean seralize);
+
+#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
+
+/* nv50_shader_state.c */
+void nv50_vertprog_validate(struct nv50_context *);
+void nv50_gmtyprog_validate(struct nv50_context *);
+void nv50_fragprog_validate(struct nv50_context *);
+void nv50_fp_linkage_validate(struct nv50_context *);
+void nv50_gp_linkage_validate(struct nv50_context *);
+void nv50_constbufs_validate(struct nv50_context *);
+void nv50_validate_derived_rs(struct nv50_context *);
+void nv50_stream_output_validate(struct nv50_context *);
+
+/* nv50_state.c */
+extern void nv50_init_state_functions(struct nv50_context *);
+
+/* nv50_state_validate.c */
+/* @words: check for space before emitting relocs */
+extern boolean nv50_state_validate(struct nv50_context *, uint32_t state_mask,
+ unsigned space_words);
+
+/* nv50_surface.c */
+extern void nv50_clear(struct pipe_context *, unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil);
+extern void nv50_init_surface_functions(struct nv50_context *);
+
+/* nv50_tex.c */
+void nv50_validate_textures(struct nv50_context *);
+void nv50_validate_samplers(struct nv50_context *);
+
+struct pipe_sampler_view *
+nv50_create_texture_view(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_sampler_view *,
+ uint32_t flags,
+ enum pipe_texture_target);
+struct pipe_sampler_view *
+nv50_create_sampler_view(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_sampler_view *);
+
+/* nv50_transfer.c */
+void
+nv50_m2mf_transfer_rect(struct nv50_context *,
+ const struct nv50_m2mf_rect *dst,
+ const struct nv50_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy);
+void
+nv50_sifc_linear_u8(struct nouveau_context *pipe,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data);
+void
+nv50_m2mf_copy_linear(struct nouveau_context *pipe,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size);
+void
+nv50_cb_push(struct nouveau_context *nv,
+ struct nouveau_bo *bo, unsigned domain,
+ unsigned base, unsigned size,
+ unsigned offset, unsigned words, const uint32_t *data);
+
+/* nv50_vbo.c */
+void nv50_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
+
+void *
+nv50_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements);
+void
+nv50_vertex_state_delete(struct pipe_context *pipe, void *hwcso);
+
+void nv50_vertex_arrays_validate(struct nv50_context *nv50);
+
+/* nv50_push.c */
+void nv50_push_vbo(struct nv50_context *, const struct pipe_draw_info *);
+
+/* nv84_video.c */
+struct pipe_video_codec *
+nv84_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ);
+
+struct pipe_video_buffer *
+nv84_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *template);
+
+int
+nv84_screen_get_video_param(struct pipe_screen *pscreen,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_cap param);
+
+boolean
+nv84_screen_video_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint);
+
+/* nv98_video.c */
+struct pipe_video_codec *
+nv98_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ);
+
+struct pipe_video_buffer *
+nv98_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *template);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_debug.h b/src/gallium/drivers/nouveau/nv50/nv50_debug.h
new file mode 100644
index 00000000000..f3dee621519
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_debug.h
@@ -0,0 +1,25 @@
+
+#ifndef __NV50_DEBUG_H__
+#define __NV50_DEBUG_H__
+
+#include <stdio.h>
+
+#include "util/u_debug.h"
+
+#define NV50_DEBUG_MISC 0x0001
+#define NV50_DEBUG_SHADER 0x0100
+#define NV50_DEBUG_PROG_IR 0x0200
+#define NV50_DEBUG_PROG_RA 0x0400
+#define NV50_DEBUG_PROG_CFLOW 0x0800
+#define NV50_DEBUG_PROG_ALL 0x1f00
+
+#define NV50_DEBUG 0
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args)
+
+#define NV50_DBGMSG(ch, args...) \
+ if ((NV50_DEBUG) & (NV50_DEBUG_##ch)) \
+ debug_printf(args)
+
+#endif /* __NV50_DEBUG_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h
new file mode 100644
index 00000000000..2e42843fa56
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h
@@ -0,0 +1,200 @@
+#ifndef NV50_DEFS_XML
+#define NV50_DEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/nv50_defs.xml ( 7783 bytes, from 2013-02-14 13:56:25)
+- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nvchipsets.xml ( 3704 bytes, from 2012-08-18 12:48:55)
+
+Copyright (C) 2006-2013 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- imirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin Koƛcielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_VSTATUS_IDLE 0x00000000
+#define NV50_VSTATUS_BUSY 0x00000001
+#define NV50_VSTATUS_UNK2 0x00000002
+#define NV50_VSTATUS_WAITING 0x00000003
+#define NV50_VSTATUS_BLOCKED 0x00000005
+#define NV50_VSTATUS_FAULTED 0x00000006
+#define NV50_VSTATUS_PAUSED 0x00000007
+#define NV50_SURFACE_FORMAT_BITMAP 0x0000001c
+#define NV50_SURFACE_FORMAT_UNK1D 0x0000001d
+#define NV50_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0
+#define NV50_SURFACE_FORMAT_RGBA32_SINT 0x000000c1
+#define NV50_SURFACE_FORMAT_RGBA32_UINT 0x000000c2
+#define NV50_SURFACE_FORMAT_RGBX32_FLOAT 0x000000c3
+#define NV50_SURFACE_FORMAT_RGBX32_SINT 0x000000c4
+#define NV50_SURFACE_FORMAT_RGBX32_UINT 0x000000c5
+#define NV50_SURFACE_FORMAT_RGBA16_UNORM 0x000000c6
+#define NV50_SURFACE_FORMAT_RGBA16_SNORM 0x000000c7
+#define NV50_SURFACE_FORMAT_RGBA16_SINT 0x000000c8
+#define NV50_SURFACE_FORMAT_RGBA16_UINT 0x000000c9
+#define NV50_SURFACE_FORMAT_RGBA16_FLOAT 0x000000ca
+#define NV50_SURFACE_FORMAT_RG32_FLOAT 0x000000cb
+#define NV50_SURFACE_FORMAT_RG32_SINT 0x000000cc
+#define NV50_SURFACE_FORMAT_RG32_UINT 0x000000cd
+#define NV50_SURFACE_FORMAT_RGBX16_FLOAT 0x000000ce
+#define NV50_SURFACE_FORMAT_BGRA8_UNORM 0x000000cf
+#define NV50_SURFACE_FORMAT_BGRA8_SRGB 0x000000d0
+#define NV50_SURFACE_FORMAT_RGB10_A2_UNORM 0x000000d1
+#define NV50_SURFACE_FORMAT_RGB10_A2_UINT 0x000000d2
+#define NV50_SURFACE_FORMAT_RGBA8_UNORM 0x000000d5
+#define NV50_SURFACE_FORMAT_RGBA8_SRGB 0x000000d6
+#define NV50_SURFACE_FORMAT_RGBA8_SNORM 0x000000d7
+#define NV50_SURFACE_FORMAT_RGBA8_SINT 0x000000d8
+#define NV50_SURFACE_FORMAT_RGBA8_UINT 0x000000d9
+#define NV50_SURFACE_FORMAT_RG16_UNORM 0x000000da
+#define NV50_SURFACE_FORMAT_RG16_SNORM 0x000000db
+#define NV50_SURFACE_FORMAT_RG16_SINT 0x000000dc
+#define NV50_SURFACE_FORMAT_RG16_UINT 0x000000dd
+#define NV50_SURFACE_FORMAT_RG16_FLOAT 0x000000de
+#define NV50_SURFACE_FORMAT_BGR10_A2_UNORM 0x000000df
+#define NV50_SURFACE_FORMAT_R11G11B10_FLOAT 0x000000e0
+#define NV50_SURFACE_FORMAT_R32_SINT 0x000000e3
+#define NV50_SURFACE_FORMAT_R32_UINT 0x000000e4
+#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5
+#define NV50_SURFACE_FORMAT_BGRX8_UNORM 0x000000e6
+#define NV50_SURFACE_FORMAT_BGRX8_SRGB 0x000000e7
+#define NV50_SURFACE_FORMAT_B5G6R5_UNORM 0x000000e8
+#define NV50_SURFACE_FORMAT_BGR5_A1_UNORM 0x000000e9
+#define NV50_SURFACE_FORMAT_RG8_UNORM 0x000000ea
+#define NV50_SURFACE_FORMAT_RG8_SNORM 0x000000eb
+#define NV50_SURFACE_FORMAT_RG8_SINT 0x000000ec
+#define NV50_SURFACE_FORMAT_RG8_UINT 0x000000ed
+#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee
+#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef
+#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0
+#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1
+#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2
+#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3
+#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4
+#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5
+#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6
+#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7
+#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM 0x000000f8
+#define NV50_SURFACE_FORMAT_RGBX8_UNORM 0x000000f9
+#define NV50_SURFACE_FORMAT_RGBX8_SRGB 0x000000fa
+#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFB 0x000000fb
+#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFC 0x000000fc
+#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFD 0x000000fd
+#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFE 0x000000fe
+#define NV50_SURFACE_FORMAT_Y32_UINT_UNKFF 0x000000ff
+#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a
+#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013
+#define NV50_ZETA_FORMAT_S8_Z24_UNORM 0x00000014
+#define NV50_ZETA_FORMAT_Z24_X8_UNORM 0x00000015
+#define NV50_ZETA_FORMAT_Z24_S8_UNORM 0x00000016
+#define NV50_ZETA_FORMAT_Z24_C8_UNORM 0x00000018
+#define NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT 0x00000019
+#define NV50_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d
+#define NV50_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e
+#define NV50_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f
+#define NVE4_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002
+#define NVE4_IMAGE_FORMAT_RGBA32_SINT 0x00000003
+#define NVE4_IMAGE_FORMAT_RGBA32_UINT 0x00000004
+#define NVE4_IMAGE_FORMAT_RGBA16_UNORM 0x00000008
+#define NVE4_IMAGE_FORMAT_RGBA16_SNORM 0x00000009
+#define NVE4_IMAGE_FORMAT_RGBA16_SINT 0x0000000a
+#define NVE4_IMAGE_FORMAT_RGBA16_UINT 0x0000000b
+#define NVE4_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c
+#define NVE4_IMAGE_FORMAT_RG32_FLOAT 0x0000000d
+#define NVE4_IMAGE_FORMAT_RG32_SINT 0x0000000e
+#define NVE4_IMAGE_FORMAT_RG32_UINT 0x0000000f
+#define NVE4_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013
+#define NVE4_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015
+#define NVE4_IMAGE_FORMAT_RGBA8_UNORM 0x00000018
+#define NVE4_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a
+#define NVE4_IMAGE_FORMAT_RGBA8_SINT 0x0000001b
+#define NVE4_IMAGE_FORMAT_RGBA8_UINT 0x0000001c
+#define NVE4_IMAGE_FORMAT_RG16_UNORM 0x0000001d
+#define NVE4_IMAGE_FORMAT_RG16_SNORM 0x0000001e
+#define NVE4_IMAGE_FORMAT_RG16_SINT 0x0000001f
+#define NVE4_IMAGE_FORMAT_RG16_UINT 0x00000020
+#define NVE4_IMAGE_FORMAT_RG16_FLOAT 0x00000021
+#define NVE4_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024
+#define NVE4_IMAGE_FORMAT_R32_SINT 0x00000027
+#define NVE4_IMAGE_FORMAT_R32_UINT 0x00000028
+#define NVE4_IMAGE_FORMAT_R32_FLOAT 0x00000029
+#define NVE4_IMAGE_FORMAT_RG8_UNORM 0x0000002e
+#define NVE4_IMAGE_FORMAT_RG8_SNORM 0x0000002f
+#define NVE4_IMAGE_FORMAT_RG8_SINT 0x00000030
+#define NVE4_IMAGE_FORMAT_RG8_UINT 0x00000031
+#define NVE4_IMAGE_FORMAT_R16_UNORM 0x00000032
+#define NVE4_IMAGE_FORMAT_R16_SNORM 0x00000033
+#define NVE4_IMAGE_FORMAT_R16_SINT 0x00000034
+#define NVE4_IMAGE_FORMAT_R16_UINT 0x00000035
+#define NVE4_IMAGE_FORMAT_R16_FLOAT 0x00000036
+#define NVE4_IMAGE_FORMAT_R8_UNORM 0x00000037
+#define NVE4_IMAGE_FORMAT_R8_SNORM 0x00000038
+#define NVE4_IMAGE_FORMAT_R8_SINT 0x00000039
+#define NVE4_IMAGE_FORMAT_R8_UINT 0x0000003a
+#define NV50_QUERY__SIZE 0x00000010
+#define NV50_QUERY_COUNTER 0x00000000
+
+#define NV50_QUERY_RES 0x00000004
+
+#define NV50_QUERY_TIME 0x00000008
+
+
+#endif /* NV50_DEFS_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_draw.c b/src/gallium/drivers/nouveau/nv50/nv50_draw.c
new file mode 100644
index 00000000000..fa68cd8ee6a
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_draw.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "draw/draw_pipe.h"
+
+#include "nv50/nv50_context.h"
+
+struct nv50_render_stage {
+ struct draw_stage stage;
+ struct nv50_context *nv50;
+};
+
+static INLINE struct nv50_render_stage *
+nv50_render_stage(struct draw_stage *stage)
+{
+ return (struct nv50_render_stage *)stage;
+}
+
+static void
+nv50_render_point(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_line(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_tri(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_flush(struct draw_stage *stage, unsigned flags)
+{
+}
+
+static void
+nv50_render_reset_stipple_counter(struct draw_stage *stage)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_destroy(struct draw_stage *stage)
+{
+ FREE(stage);
+}
+
+struct draw_stage *
+nv50_draw_render_stage(struct nv50_context *nv50)
+{
+ struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage);
+
+ rs->nv50 = nv50;
+ rs->stage.draw = nv50->draw;
+ rs->stage.destroy = nv50_render_destroy;
+ rs->stage.point = nv50_render_point;
+ rs->stage.line = nv50_render_line;
+ rs->stage.tri = nv50_render_tri;
+ rs->stage.flush = nv50_render_flush;
+ rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter;
+
+ return &rs->stage;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
new file mode 100644
index 00000000000..0a7e812ba13
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -0,0 +1,504 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if NOUVEAU_DRIVER == 0xc0
+# include "nvc0/nvc0_screen.h"
+# include "nvc0/nvc0_3d.xml.h"
+#else
+# include "nv50/nv50_screen.h"
+# include "nv50/nv50_3d.xml.h"
+#endif
+#include "nv50/nv50_texture.xml.h"
+#include "nv50/nv50_defs.xml.h"
+
+#include "pipe/p_defines.h"
+
+/* Abbreviated usage masks:
+ * T: texturing
+ * R: render target
+ * B: render target, blendable
+ * C: render target (color), blendable only on nvc0
+ * D: scanout/display target, blendable
+ * Z: depth/stencil
+ * V: vertex fetch
+ * I: image / surface, implies T
+ */
+#define U_V PIPE_BIND_VERTEX_BUFFER
+#define U_T PIPE_BIND_SAMPLER_VIEW
+#define U_I PIPE_BIND_SHADER_RESOURCE | PIPE_BIND_COMPUTE_RESOURCE
+#define U_TR PIPE_BIND_RENDER_TARGET | U_T
+#define U_IR U_TR | U_I
+#define U_TB PIPE_BIND_BLENDABLE | U_TR
+#define U_IB PIPE_BIND_BLENDABLE | U_IR
+#define U_TD PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB
+#define U_TZ PIPE_BIND_DEPTH_STENCIL | U_T
+#define U_TV U_V | U_T
+#define U_TRV U_V | U_TR
+#define U_IRV U_V | U_IR
+#define U_TBV U_V | U_TB
+#define U_IBV U_V | U_IB
+#define U_TDV U_V | U_TD
+#if NOUVEAU_DRIVER == 0xc0
+# define U_TC U_TB
+# define U_IC U_IB
+# define U_TCV U_TBV
+# define U_ICV U_IBV
+# define U_tV U_TV
+#else
+# define U_TC U_TR
+# define U_IC U_IR
+# define U_TCV U_TRV
+# define U_ICV U_IRV
+# define U_tV U_V
+#endif
+
+#define NV50_SURFACE_FORMAT_NONE 0
+#define NV50_ZETA_FORMAT_NONE 0
+
+/* for vertex buffers: */
+#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8
+#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16
+#define NV50_TIC_0_FMT_32_32_32 NVC0_TIC_0_FMT_32_32_32
+
+#if NOUVEAU_DRIVER == 0xc0
+# define NVXX_3D_VAF_SIZE(s) NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##s
+# define NVXX_3D_VAF_TYPE(t) NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_##t
+#else
+# define NVXX_3D_VAF_SIZE(s) NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_##s
+# define NVXX_3D_VAF_TYPE(t) NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_##t
+#endif
+
+#define TBLENT_A_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u, br) \
+ [PIPE_FORMAT_##pf] = { \
+ sf, \
+ (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \
+ (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \
+ (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \
+ (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \
+ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
+ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
+ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
+ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
+ NV50_TIC_0_FMT_##sz, \
+ NVXX_3D_VAF_SIZE(sz) | \
+ NVXX_3D_VAF_TYPE(t0) | (br << 31), \
+ U_##u \
+ }
+
+#define TBLENT_B_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \
+ [PIPE_FORMAT_##pf] = { \
+ sf, \
+ (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \
+ (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \
+ (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \
+ (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \
+ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
+ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
+ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
+ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
+ NV50_TIC_0_FMT_##sz, 0, U_##u \
+ }
+
+#define C4A(p, n, r, g, b, a, t, s, u, br) \
+ TBLENT_A_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u, br)
+#define C4B(p, n, r, g, b, a, t, s, u) \
+ TBLENT_B_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u)
+
+#define ZXB(p, n, r, g, b, a, t, s, u) \
+ TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \
+ r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
+#define ZSB(p, n, r, g, b, a, t, s, u) \
+ TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \
+ r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
+#define SZB(p, n, r, g, b, a, t, s, u) \
+ TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \
+ r, g, b, ONE_FLOAT, UINT, t, UINT, UINT, s, u)
+
+#define F3A(p, n, r, g, b, a, t, s, u) \
+ C4A(p, n, r, g, b, ONE_FLOAT, t, s, u, 0)
+#define I3A(p, n, r, g, b, a, t, s, u) \
+ C4A(p, n, r, g, b, ONE_INT, t, s, u, 0)
+#define F3B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, r, g, b, ONE_FLOAT, t, s, u)
+#define I3B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, r, g, b, ONE_INT, t, s, u)
+
+#define F2A(p, n, r, g, b, a, t, s, u) \
+ C4A(p, n, r, g, ZERO, ONE_FLOAT, t, s, u, 0)
+#define I2A(p, n, r, g, b, a, t, s, u) \
+ C4A(p, n, r, g, ZERO, ONE_INT, t, s, u, 0)
+#define F2B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, r, g, ZERO, ONE_FLOAT, t, s, u)
+#define I2B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, r, g, ZERO, ONE_INT, t, s, u)
+
+#define F1A(p, n, r, g, b, a, t, s, u) \
+ C4A(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u, 0)
+#define I1A(p, n, r, g, b, a, t, s, u) \
+ C4A(p, n, r, ZERO, ZERO, ONE_INT, t, s, u, 0)
+#define F1B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u)
+#define I1B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, r, ZERO, ZERO, ONE_INT, t, s, u)
+
+#define A1B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, ZERO, ZERO, ZERO, a, t, s, u)
+
+#if NOUVEAU_DRIVER == 0xc0
+const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] =
+#else
+const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
+#endif
+{
+ C4A(B8G8R8A8_UNORM, BGRA8_UNORM, C2, C1, C0, C3, UNORM, 8_8_8_8, TDV, 1),
+ F3A(B8G8R8X8_UNORM, BGRX8_UNORM, C2, C1, C0, xx, UNORM, 8_8_8_8, TD),
+ C4A(B8G8R8A8_SRGB, BGRA8_SRGB, C2, C1, C0, C3, UNORM, 8_8_8_8, TD, 1),
+ F3A(B8G8R8X8_SRGB, BGRX8_SRGB, C2, C1, C0, xx, UNORM, 8_8_8_8, TD),
+ C4A(R8G8B8A8_UNORM, RGBA8_UNORM, C0, C1, C2, C3, UNORM, 8_8_8_8, IBV, 0),
+ F3A(R8G8B8X8_UNORM, RGBX8_UNORM, C0, C1, C2, xx, UNORM, 8_8_8_8, TB),
+ C4A(R8G8B8A8_SRGB, RGBA8_SRGB, C0, C1, C2, C3, UNORM, 8_8_8_8, TB, 0),
+ F3B(R8G8B8X8_SRGB, RGBX8_SRGB, C0, C1, C2, xx, UNORM, 8_8_8_8, TB),
+
+ ZXB(Z16_UNORM, Z16_UNORM, C0, C0, C0, xx, UNORM, Z16, TZ),
+ ZXB(Z32_FLOAT, Z32_FLOAT, C0, C0, C0, xx, FLOAT, Z32, TZ),
+ ZXB(Z24X8_UNORM, Z24_X8_UNORM, C0, C0, C0, xx, UNORM, Z24_X8, TZ),
+ ZSB(Z24_UNORM_S8_UINT, Z24_S8_UNORM, C0, C0, C0, xx, UNORM, Z24_S8, TZ),
+ ZSB(X24S8_UINT, NONE, C1, C1, C1, xx, UNORM, Z24_S8, T),
+ SZB(S8_UINT_Z24_UNORM, S8_Z24_UNORM, C1, C1, C1, xx, UNORM, S8_Z24, TZ),
+ SZB(S8X24_UINT, NONE, C0, C0, C0, xx, UNORM, S8_Z24, T),
+ ZSB(Z32_FLOAT_S8X24_UINT, Z32_S8_X24_FLOAT, C0, C0, C0, xx, FLOAT,
+ Z32_S8_X24, TZ),
+ ZSB(X32_S8X24_UINT, NONE, C1, C1, C1, xx, FLOAT, Z32_S8_X24, T),
+
+ F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, T),
+ C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TB),
+ F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TB),
+ C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
+ F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
+ F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),
+
+ C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
+ IBV, 0),
+ C4A(B10G10R10A2_UNORM, BGR10_A2_UNORM, C2, C1, C0, C3, UNORM, 10_10_10_2,
+ TBV, 1),
+ C4A(R10G10B10A2_SNORM, NONE, C0, C1, C2, C3, SNORM, 10_10_10_2, TV, 0),
+ C4A(B10G10R10A2_SNORM, NONE, C2, C1, C0, C3, SNORM, 10_10_10_2, TV, 1),
+
+ F3B(R11G11B10_FLOAT, R11G11B10_FLOAT, C0, C1, C2, xx, FLOAT, 11_11_10, IB),
+
+ F3B(L8_UNORM, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB),
+ F3B(L8_SRGB, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB),
+ F3B(L8_SNORM, R8_SNORM, C0, C0, C0, xx, SNORM, 8, TC),
+ I3B(L8_SINT, R8_SINT, C0, C0, C0, xx, SINT, 8, TR),
+ I3B(L8_UINT, R8_UINT, C0, C0, C0, xx, UINT, 8, TR),
+ F3B(L16_UNORM, R16_UNORM, C0, C0, C0, xx, UNORM, 16, TC),
+ F3B(L16_SNORM, R16_SNORM, C0, C0, C0, xx, SNORM, 16, TC),
+ F3B(L16_FLOAT, R16_FLOAT, C0, C0, C0, xx, FLOAT, 16, TB),
+ I3B(L16_SINT, R16_SINT, C0, C0, C0, xx, SINT, 16, TR),
+ I3B(L16_UINT, R16_UINT, C0, C0, C0, xx, UINT, 16, TR),
+ F3B(L32_FLOAT, R32_FLOAT, C0, C0, C0, xx, FLOAT, 32, TB),
+ I3B(L32_SINT, R32_SINT, C0, C0, C0, xx, SINT, 32, TR),
+ I3B(L32_UINT, R32_UINT, C0, C0, C0, xx, UINT, 32, TR),
+
+ C4B(I8_UNORM, R8_UNORM, C0, C0, C0, C0, UNORM, 8, TR),
+ C4B(I8_SNORM, R8_SNORM, C0, C0, C0, C0, SNORM, 8, TR),
+ C4B(I8_SINT, R8_SINT, C0, C0, C0, C0, SINT, 8, TR),
+ C4B(I8_UINT, R8_UINT, C0, C0, C0, C0, UINT, 8, TR),
+ C4B(I16_UNORM, R16_UNORM, C0, C0, C0, C0, UNORM, 16, TR),
+ C4B(I16_SNORM, R16_SNORM, C0, C0, C0, C0, SNORM, 16, TR),
+ C4B(I16_FLOAT, R16_FLOAT, C0, C0, C0, C0, FLOAT, 16, TR),
+ C4B(I16_SINT, R16_SINT, C0, C0, C0, C0, SINT, 16, TR),
+ C4B(I16_UINT, R16_UINT, C0, C0, C0, C0, UINT, 16, TR),
+ C4B(I32_FLOAT, R32_FLOAT, C0, C0, C0, C0, FLOAT, 32, TR),
+ C4B(I32_SINT, R32_SINT, C0, C0, C0, C0, SINT, 32, TR),
+ C4B(I32_UINT, R32_UINT, C0, C0, C0, C0, UINT, 32, TR),
+
+ A1B(A8_UNORM, A8_UNORM, xx, xx, xx, C0, UNORM, 8, TB),
+ A1B(A8_SNORM, R8_SNORM, xx, xx, xx, C0, SNORM, 8, T),
+ A1B(A8_SINT, R8_SINT, xx, xx, xx, C0, SINT, 8, T),
+ A1B(A8_UINT, R8_UINT, xx, xx, xx, C0, UINT, 8, T),
+ A1B(A16_UNORM, R16_UNORM, xx, xx, xx, C0, UNORM, 16, T),
+ A1B(A16_SNORM, R16_SNORM, xx, xx, xx, C0, SNORM, 16, T),
+ A1B(A16_FLOAT, R16_FLOAT, xx, xx, xx, C0, FLOAT, 16, T),
+ A1B(A16_SINT, R16_SINT, xx, xx, xx, C0, SINT, 16, T),
+ A1B(A16_UINT, R16_UINT, xx, xx, xx, C0, UINT, 16, T),
+ A1B(A32_FLOAT, R32_FLOAT, xx, xx, xx, C0, FLOAT, 32, T),
+ A1B(A32_SINT, R32_SINT, xx, xx, xx, C0, SINT, 32, T),
+ A1B(A32_UINT, R32_UINT, xx, xx, xx, C0, UINT, 32, T),
+
+ C4B(L4A4_UNORM, NONE, C0, C0, C0, C1, UNORM, 4_4, T),
+ C4B(L8A8_UNORM, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T),
+ C4B(L8A8_SNORM, RG8_SNORM, C0, C0, C0, C1, SNORM, 8_8, T),
+ C4B(L8A8_SRGB, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T),
+ C4B(L8A8_SINT, RG8_SINT, C0, C0, C0, C1, SINT, 8_8, T),
+ C4B(L8A8_UINT, RG8_UINT, C0, C0, C0, C1, UINT, 8_8, T),
+ C4B(L16A16_UNORM, RG16_UNORM, C0, C0, C0, C1, UNORM, 16_16, T),
+ C4B(L16A16_SNORM, RG16_SNORM, C0, C0, C0, C1, SNORM, 16_16, T),
+ C4B(L16A16_FLOAT, RG16_FLOAT, C0, C0, C0, C1, FLOAT, 16_16, T),
+ C4B(L16A16_SINT, RG16_SINT, C0, C0, C0, C1, SINT, 16_16, T),
+ C4B(L16A16_UINT, RG16_UINT, C0, C0, C0, C1, UINT, 16_16, T),
+ C4B(L32A32_FLOAT, RG32_FLOAT, C0, C0, C0, C1, FLOAT, 32_32, T),
+ C4B(L32A32_SINT, RG32_SINT, C0, C0, C0, C1, SINT, 32_32, T),
+ C4B(L32A32_UINT, RG32_UINT, C0, C0, C0, C1, UINT, 32_32, T),
+
+ F3B(DXT1_RGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T),
+ F3B(DXT1_SRGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T),
+ C4B(DXT1_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T),
+ C4B(DXT1_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T),
+ C4B(DXT3_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T),
+ C4B(DXT3_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T),
+ C4B(DXT5_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T),
+ C4B(DXT5_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T),
+
+ F1B(RGTC1_UNORM, NONE, C0, xx, xx, xx, UNORM, RGTC1, T),
+ F1B(RGTC1_SNORM, NONE, C0, xx, xx, xx, SNORM, RGTC1, T),
+ F2B(RGTC2_UNORM, NONE, C0, C1, xx, xx, UNORM, RGTC2, T),
+ F2B(RGTC2_SNORM, NONE, C0, C1, xx, xx, SNORM, RGTC2, T),
+ F3B(LATC1_UNORM, NONE, C0, C0, C0, xx, UNORM, RGTC1, T),
+ F3B(LATC1_SNORM, NONE, C0, C0, C0, xx, SNORM, RGTC1, T),
+ C4B(LATC2_UNORM, NONE, C0, C0, C0, C1, UNORM, RGTC2, T),
+ C4B(LATC2_SNORM, NONE, C0, C0, C0, C1, SNORM, RGTC2, T),
+
+ C4A(R32G32B32A32_FLOAT, RGBA32_FLOAT, C0, C1, C2, C3, FLOAT, 32_32_32_32,
+ IBV, 0),
+ C4A(R32G32B32A32_UNORM, NONE, C0, C1, C2, C3, UNORM, 32_32_32_32, TV, 0),
+ C4A(R32G32B32A32_SNORM, NONE, C0, C1, C2, C3, SNORM, 32_32_32_32, TV, 0),
+ C4A(R32G32B32A32_SINT, RGBA32_SINT, C0, C1, C2, C3, SINT, 32_32_32_32,
+ IRV, 0),
+ C4A(R32G32B32A32_UINT, RGBA32_UINT, C0, C1, C2, C3, UINT, 32_32_32_32,
+ IRV, 0),
+ F3B(R32G32B32X32_FLOAT, RGBX32_FLOAT, C0, C1, C2, xx, FLOAT, 32_32_32_32, TB),
+ I3B(R32G32B32X32_SINT, RGBX32_SINT, C0, C1, C2, xx, SINT, 32_32_32_32, TR),
+ I3B(R32G32B32X32_UINT, RGBX32_UINT, C0, C1, C2, xx, UINT, 32_32_32_32, TR),
+
+ F2A(R32G32_FLOAT, RG32_FLOAT, C0, C1, xx, xx, FLOAT, 32_32, IBV),
+ F2A(R32G32_UNORM, NONE, C0, C1, xx, xx, UNORM, 32_32, TV),
+ F2A(R32G32_SNORM, NONE, C0, C1, xx, xx, SNORM, 32_32, TV),
+ I2A(R32G32_SINT, RG32_SINT, C0, C1, xx, xx, SINT, 32_32, IRV),
+ I2A(R32G32_UINT, RG32_UINT, C0, C1, xx, xx, UINT, 32_32, IRV),
+
+ F1A(R32_FLOAT, R32_FLOAT, C0, xx, xx, xx, FLOAT, 32, IBV),
+ F1A(R32_UNORM, NONE, C0, xx, xx, xx, UNORM, 32, TV),
+ F1A(R32_SNORM, NONE, C0, xx, xx, xx, SNORM, 32, TV),
+ I1A(R32_SINT, R32_SINT, C0, xx, xx, xx, SINT, 32, IRV),
+ I1A(R32_UINT, R32_UINT, C0, xx, xx, xx, UINT, 32, IRV),
+
+ C4A(R16G16B16A16_FLOAT, RGBA16_FLOAT, C0, C1, C2, C3, FLOAT, 16_16_16_16,
+ IBV, 0),
+ C4A(R16G16B16A16_UNORM, RGBA16_UNORM, C0, C1, C2, C3, UNORM, 16_16_16_16,
+ ICV, 0),
+ C4A(R16G16B16A16_SNORM, RGBA16_SNORM, C0, C1, C2, C3, SNORM, 16_16_16_16,
+ ICV, 0),
+ C4A(R16G16B16A16_SINT, RGBA16_SINT, C0, C1, C2, C3, SINT, 16_16_16_16,
+ IRV, 0),
+ C4A(R16G16B16A16_UINT, RGBA16_UINT, C0, C1, C2, C3, UINT, 16_16_16_16,
+ IRV, 0),
+ F3B(R16G16B16X16_FLOAT, RGBX16_FLOAT, C0, C1, C2, xx, FLOAT, 16_16_16_16, TB),
+ F3B(R16G16B16X16_UNORM, RGBA16_UNORM, C0, C1, C2, xx, UNORM, 16_16_16_16, T),
+ F3B(R16G16B16X16_SNORM, RGBA16_SNORM, C0, C1, C2, xx, SNORM, 16_16_16_16, T),
+ I3B(R16G16B16X16_SINT, RGBA16_SINT, C0, C1, C2, xx, SINT, 16_16_16_16, T),
+ I3B(R16G16B16X16_UINT, RGBA16_UINT, C0, C1, C2, xx, UINT, 16_16_16_16, T),
+
+ F2A(R16G16_FLOAT, RG16_FLOAT, C0, C1, xx, xx, FLOAT, 16_16, IBV),
+ F2A(R16G16_UNORM, RG16_UNORM, C0, C1, xx, xx, UNORM, 16_16, ICV),
+ F2A(R16G16_SNORM, RG16_SNORM, C0, C1, xx, xx, SNORM, 16_16, ICV),
+ I2A(R16G16_SINT, RG16_SINT, C0, C1, xx, xx, SINT, 16_16, IRV),
+ I2A(R16G16_UINT, RG16_UINT, C0, C1, xx, xx, UINT, 16_16, IRV),
+
+ F1A(R16_FLOAT, R16_FLOAT, C0, xx, xx, xx, FLOAT, 16, IBV),
+ F1A(R16_UNORM, R16_UNORM, C0, xx, xx, xx, UNORM, 16, ICV),
+ F1A(R16_SNORM, R16_SNORM, C0, xx, xx, xx, SNORM, 16, ICV),
+ I1A(R16_SINT, R16_SINT, C0, xx, xx, xx, SINT, 16, IRV),
+ I1A(R16_UINT, R16_UINT, C0, xx, xx, xx, UINT, 16, IRV),
+
+ C4A(R8G8B8A8_SNORM, RGBA8_SNORM, C0, C1, C2, C3, SNORM, 8_8_8_8, ICV, 0),
+ C4A(R8G8B8A8_SINT, RGBA8_SINT, C0, C1, C2, C3, SINT, 8_8_8_8, IRV, 0),
+ C4A(R8G8B8A8_UINT, RGBA8_UINT, C0, C1, C2, C3, UINT, 8_8_8_8, IRV, 0),
+ F3B(R8G8B8X8_SNORM, RGBA8_SNORM, C0, C1, C2, xx, SNORM, 8_8_8_8, T),
+ I3B(R8G8B8X8_SINT, RGBA8_SINT, C0, C1, C2, xx, SINT, 8_8_8_8, T),
+ I3B(R8G8B8X8_UINT, RGBA8_UINT, C0, C1, C2, xx, UINT, 8_8_8_8, T),
+
+ F2A(R8G8_UNORM, RG8_UNORM, C0, C1, xx, xx, UNORM, 8_8, IBV),
+ F2A(R8G8_SNORM, RG8_SNORM, C0, C1, xx, xx, SNORM, 8_8, ICV),
+ I2A(R8G8_SINT, RG8_SINT, C0, C1, xx, xx, SINT, 8_8, IRV),
+ I2A(R8G8_UINT, RG8_UINT, C0, C1, xx, xx, UINT, 8_8, IRV),
+
+ F1A(R8_UNORM, R8_UNORM, C0, xx, xx, xx, UNORM, 8, IBV),
+ F1A(R8_SNORM, R8_SNORM, C0, xx, xx, xx, SNORM, 8, ICV),
+ I1A(R8_SINT, R8_SINT, C0, xx, xx, xx, SINT, 8, IRV),
+ I1A(R8_UINT, R8_UINT, C0, xx, xx, xx, UINT, 8, IRV),
+
+ F3B(R8G8_B8G8_UNORM, NONE, C0, C1, C2, xx, UNORM, U8_YA8_V8_YB8, T),
+ F3B(G8R8_B8R8_UNORM, NONE, C1, C0, C2, xx, UNORM, U8_YA8_V8_YB8, T),
+ F3B(G8R8_G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, YA8_U8_YB8_V8, T),
+ F3B(R8G8_R8B8_UNORM, NONE, C1, C0, C2, xx, UNORM, YA8_U8_YB8_V8, T),
+
+ F1B(R1_UNORM, BITMAP, C0, xx, xx, xx, UNORM, BITMAP, T),
+
+ C4B(R4A4_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 4_4, T),
+ C4B(R8A8_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 8_8, T),
+ C4B(A4R4_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 4_4, T),
+ C4B(A8R8_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 8_8, T),
+
+ TBLENT_B_(R8SG8SB8UX8U_NORM, 0,
+ C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 8_8_8_8, T),
+ TBLENT_B_(R5SG5SB6U_NORM, 0,
+ C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 5_5_6, T),
+
+ /* vertex-only formats: */
+
+ C4A(R32G32B32A32_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 32_32_32_32, V, 0),
+ C4A(R32G32B32A32_USCALED, NONE, C0, C1, C2, C3, USCALED, 32_32_32_32, V, 0),
+ F3A(R32G32B32_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, tV),
+ F3A(R32G32B32_UNORM, NONE, C0, C1, C2, xx, UNORM, 32_32_32, V),
+ F3A(R32G32B32_SNORM, NONE, C0, C1, C2, xx, SNORM, 32_32_32, V),
+ I3A(R32G32B32_SINT, NONE, C0, C1, C2, xx, SINT, 32_32_32, tV),
+ I3A(R32G32B32_UINT, NONE, C0, C1, C2, xx, UINT, 32_32_32, tV),
+ F3A(R32G32B32_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 32_32_32, V),
+ F3A(R32G32B32_USCALED, NONE, C0, C1, C2, xx, USCALED, 32_32_32, V),
+ F2A(R32G32_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 32_32, V),
+ F2A(R32G32_USCALED, NONE, C0, C1, xx, xx, USCALED, 32_32, V),
+ F1A(R32_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 32, V),
+ F1A(R32_USCALED, NONE, C0, xx, xx, xx, USCALED, 32, V),
+
+ C4A(R16G16B16A16_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 16_16_16_16, V, 0),
+ C4A(R16G16B16A16_USCALED, NONE, C0, C1, C2, C3, USCALED, 16_16_16_16, V, 0),
+ F3A(R16G16B16_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 16_16_16, V),
+ F3A(R16G16B16_UNORM, NONE, C0, C1, C2, xx, UNORM, 16_16_16, V),
+ F3A(R16G16B16_SNORM, NONE, C0, C1, C2, xx, SNORM, 16_16_16, V),
+ I3A(R16G16B16_SINT, NONE, C0, C1, C2, xx, SINT, 16_16_16, V),
+ I3A(R16G16B16_UINT, NONE, C0, C1, C2, xx, UINT, 16_16_16, V),
+ F3A(R16G16B16_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 16_16_16, V),
+ F3A(R16G16B16_USCALED, NONE, C0, C1, C2, xx, USCALED, 16_16_16, V),
+ F2A(R16G16_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 16_16, V),
+ F2A(R16G16_USCALED, NONE, C0, C1, xx, xx, USCALED, 16_16, V),
+ F1A(R16_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 16, V),
+ F1A(R16_USCALED, NONE, C0, xx, xx, xx, USCALED, 16, V),
+
+ C4A(R8G8B8A8_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 8_8_8_8, V, 0),
+ C4A(R8G8B8A8_USCALED, NONE, C0, C1, C2, C3, USCALED, 8_8_8_8, V, 0),
+ F3A(R8G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, 8_8_8, V),
+ F3A(R8G8B8_SNORM, NONE, C0, C1, C2, xx, SNORM, 8_8_8, V),
+ I2A(R8G8B8_SINT, NONE, C0, C1, C2, xx, SINT, 8_8_8, V),
+ I2A(R8G8B8_UINT, NONE, C0, C1, C2, xx, UINT, 8_8_8, V),
+ F3A(R8G8B8_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 8_8_8, V),
+ F3A(R8G8B8_USCALED, NONE, C0, C1, C2, xx, USCALED, 8_8_8, V),
+ F2A(R8G8_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 8_8, V),
+ F2A(R8G8_USCALED, NONE, C0, C1, xx, xx, USCALED, 8_8, V),
+ F1A(R8_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 8, V),
+ F1A(R8_USCALED, NONE, C0, xx, xx, xx, USCALED, 8, V),
+
+ /* FIXED types: not supported natively, converted on VBO push */
+
+ C4B(R32G32B32A32_FIXED, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V),
+ F3B(R32G32B32_FIXED, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V),
+ F2B(R32G32_FIXED, NONE, C0, C1, xx, xx, FLOAT, 32_32, V),
+ F1B(R32_FIXED, NONE, C0, xx, xx, xx, FLOAT, 32, V),
+
+ C4B(R64G64B64A64_FLOAT, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V),
+ F3B(R64G64B64_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V),
+ F2B(R64G64_FLOAT, NONE, C0, C1, xx, xx, FLOAT, 32_32, V),
+ F1B(R64_FLOAT, NONE, C0, xx, xx, xx, FLOAT, 32, V),
+};
+
+#if 0
+const uint8_t nv50_rt_format_map[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_Z16_UNORM] = NV50_ZETA_FORMAT_Z16_UNORM,
+ [PIPE_FORMAT_Z24X8_UNORM] = NV50_ZETA_FORMAT_Z24_X8_UNORM,
+ [PIPE_FORMAT_Z24_UNORM_S8_UINT] = NV50_ZETA_FORMAT_Z24_S8_UNORM,
+ [PIPE_FORMAT_S8_UINT_Z24_UNORM] = NV50_ZETA_FORMAT_S8_Z24_UNORM,
+ [PIPE_FORMAT_Z32_FLOAT] = NV50_ZETA_FORMAT_Z32_FLOAT,
+ [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT,
+
+ [PIPE_FORMAT_R1_UNORM] = NV50_SURFACE_FORMAT_BITMAP,
+
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV50_SURFACE_FORMAT_RGBA32_FLOAT,
+ [PIPE_FORMAT_R32G32B32X32_FLOAT] = NV50_SURFACE_FORMAT_RGBX32_FLOAT,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = NV50_SURFACE_FORMAT_RGBA32_SINT,
+ [PIPE_FORMAT_R32G32B32X32_SINT] = NV50_SURFACE_FORMAT_RGBX32_SINT,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = NV50_SURFACE_FORMAT_RGBA32_UINT,
+ [PIPE_FORMAT_R32G32B32X32_UINT] = NV50_SURFACE_FORMAT_RGBX32_UINT,
+
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV50_SURFACE_FORMAT_RGBA16_FLOAT,
+ [PIPE_FORMAT_R16G16B16X16_FLOAT] = NV50_SURFACE_FORMAT_RGBX16_FLOAT,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = NV50_SURFACE_FORMAT_RGBA16_UNORM,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = NV50_SURFACE_FORMAT_RGBA16_SNORM,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = NV50_SURFACE_FORMAT_RGBA16_SINT,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = NV50_SURFACE_FORMAT_RGBA16_UINT,
+
+ [PIPE_FORMAT_B8G8R8A8_UNORM] = NV50_SURFACE_FORMAT_BGRA8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = NV50_SURFACE_FORMAT_RGBA8_UNORM,
+ [PIPE_FORMAT_B8G8R8X8_UNORM] = NV50_SURFACE_FORMAT_BGRX8_UNORM,
+ [PIPE_FORMAT_R8G8B8X8_UNORM] = NV50_SURFACE_FORMAT_RGBX8_UNORM,
+ [PIPE_FORMAT_B8G8R8A8_SRGB] = NV50_SURFACE_FORMAT_BGRA8_SRGB,
+ [PIPE_FORMAT_R8G8B8A8_SRGB] = NV50_SURFACE_FORMAT_RGBA8_SRGB,
+ [PIPE_FORMAT_B8G8R8X8_SRGB] = NV50_SURFACE_FORMAT_BGRX8_SRGB,
+ [PIPE_FORMAT_R8G8B8X8_SRGB] = NV50_SURFACE_FORMAT_RGBX8_SRGB,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = NV50_SURFACE_FORMAT_RGBA8_SNORM,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = NV50_SURFACE_FORMAT_RGBA8_SINT,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = NV50_SURFACE_FORMAT_RGBA8_UINT,
+
+ [PIPE_FORMAT_R11G11B10_FLOAT] = NV50_SURFACE_FORMAT_R11G11B10_FLOAT,
+
+ [PIPE_FORMAT_B10G10R10A2_UNORM] = NV50_SURFACE_FORMAT_BGR10_A2_UNORM,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = NV50_SURFACE_FORMAT_RGB10_A2_UNORM,
+ [PIPE_FORMAT_R10G10B10A2_UINT] = NV50_SURFACE_FORMAT_RGB10_A2_UINT,
+
+ [PIPE_FORMAT_B5G6R5_UNORM] = NV50_SURFACE_FORMAT_B5G6R5_UNORM,
+
+ [PIPE_FORMAT_B5G5R5A1_UNORM] = NV50_SURFACE_FORMAT_BGR5_A1_UNORM,
+ [PIPE_FORMAT_B5G5R5X1_UNORM] = NV50_SURFACE_FORMAT_BGR5_X1_UNORM,
+
+ [PIPE_FORMAT_R32G32_FLOAT] = NV50_SURFACE_FORMAT_RG32_FLOAT,
+ [PIPE_FORMAT_R32G32_SINT] = NV50_SURFACE_FORMAT_RG32_SINT,
+ [PIPE_FORMAT_R32G32_UINT] = NV50_SURFACE_FORMAT_RG32_UINT,
+
+ [PIPE_FORMAT_R16G16_FLOAT] = NV50_SURFACE_FORMAT_RG16_FLOAT,
+ [PIPE_FORMAT_R16G16_UNORM] = NV50_SURFACE_FORMAT_RG16_UNORM,
+ [PIPE_FORMAT_R16G16_SNORM] = NV50_SURFACE_FORMAT_RG16_SNORM,
+ [PIPE_FORMAT_R16G16_SINT] = NV50_SURFACE_FORMAT_RG16_SINT,
+ [PIPE_FORMAT_R16G16_UINT] = NV50_SURFACE_FORMAT_RG16_UINT,
+
+ [PIPE_FORMAT_R8G8_UNORM] = NV50_SURFACE_FORMAT_RG8_UNORM,
+ [PIPE_FORMAT_R8G8_SNORM] = NV50_SURFACE_FORMAT_RG8_SNORM,
+ [PIPE_FORMAT_R8G8_SINT] = NV50_SURFACE_FORMAT_RG8_SINT,
+ [PIPE_FORMAT_R8G8_UINT] = NV50_SURFACE_FORMAT_RG8_UINT,
+
+ [PIPE_FORMAT_R32_FLOAT] = NV50_SURFACE_FORMAT_R32_FLOAT,
+ [PIPE_FORMAT_R32_SINT] = NV50_SURFACE_FORMAT_R32_SINT,
+ [PIPE_FORMAT_R32_UINT] = NV50_SURFACE_FORMAT_R32_UINT,
+
+ [PIPE_FORMAT_R16_FLOAT] = NV50_SURFACE_FORMAT_R16_FLOAT,
+ [PIPE_FORMAT_R16_UNORM] = NV50_SURFACE_FORMAT_R16_UNORM,
+ [PIPE_FORMAT_R16_SNORM] = NV50_SURFACE_FORMAT_R16_SNORM,
+ [PIPE_FORMAT_R16_SINT] = NV50_SURFACE_FORMAT_R16_SINT,
+ [PIPE_FORMAT_R16_UINT] = NV50_SURFACE_FORMAT_R16_UINT,
+
+ [PIPE_FORMAT_R8_UNORM] = NV50_SURFACE_FORMAT_R8_UNORM,
+ [PIPE_FORMAT_R8_SNORM] = NV50_SURFACE_FORMAT_R8_SNORM,
+ [PIPE_FORMAT_R8_SINT] = NV50_SURFACE_FORMAT_R8_SINT,
+ [PIPE_FORMAT_R8_UINT] = NV50_SURFACE_FORMAT_R8_UINT,
+
+ [PIPE_FORMAT_A8_UNORM] = NV50_SURFACE_FORMAT_A8_UNORM
+};
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
new file mode 100644
index 00000000000..513d8f96aac
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_resource.h"
+
+uint32_t
+nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz)
+{
+ uint32_t tile_mode = 0x000;
+
+ if (ny > 64) tile_mode = 0x040; /* height 128 tiles */
+ else
+ if (ny > 32) tile_mode = 0x030; /* height 64 tiles */
+ else
+ if (ny > 16) tile_mode = 0x020; /* height 32 tiles */
+ else
+ if (ny > 8) tile_mode = 0x010; /* height 16 tiles */
+
+ if (nz == 1)
+ return tile_mode;
+ else
+ if (tile_mode > 0x020)
+ tile_mode = 0x020;
+
+ if (nz > 16 && tile_mode < 0x020)
+ return tile_mode | 0x500; /* depth 32 tiles */
+ if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */
+ if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */
+ if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */
+
+ return tile_mode | 0x100;
+}
+
+static uint32_t
+nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz)
+{
+ return nv50_tex_choose_tile_dims_helper(nx, ny * 2, nz);
+}
+
+static uint32_t
+nv50_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed)
+{
+ const unsigned ms = mt->ms_x + mt->ms_y;
+
+ uint32_t tile_flags;
+
+ if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR))
+ return 0;
+ if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR))
+ return 0;
+
+ switch (mt->base.base.format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ tile_flags = 0x6c + ms;
+ break;
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ tile_flags = 0x18 + ms;
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ tile_flags = 0x128 + ms;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ tile_flags = 0x40 + ms;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ tile_flags = 0x60 + ms;
+ break;
+ default:
+ switch (util_format_get_blocksizebits(mt->base.base.format)) {
+ case 128:
+ assert(ms < 3);
+ tile_flags = 0x74;
+ break;
+ case 64:
+ switch (ms) {
+ case 2: tile_flags = 0xfc; break;
+ case 3: tile_flags = 0xfd; break;
+ default:
+ tile_flags = 0x70;
+ break;
+ }
+ break;
+ case 32:
+ if (mt->base.base.bind & PIPE_BIND_SCANOUT) {
+ assert(ms == 0);
+ tile_flags = 0x7a;
+ } else {
+ switch (ms) {
+ case 2: tile_flags = 0xf8; break;
+ case 3: tile_flags = 0xf9; break;
+ default:
+ tile_flags = 0x70;
+ break;
+ }
+ }
+ break;
+ case 16:
+ case 8:
+ tile_flags = 0x70;
+ break;
+ default:
+ return 0;
+ }
+ if (mt->base.base.bind & PIPE_BIND_CURSOR)
+ tile_flags = 0;
+ }
+
+ if (!compressed)
+ tile_flags &= ~0x180;
+
+ return tile_flags;
+}
+
+void
+nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
+{
+ struct nv50_miptree *mt = nv50_miptree(pt);
+
+ nouveau_bo_ref(NULL, &mt->base.bo);
+
+ nouveau_fence_ref(NULL, &mt->base.fence);
+ nouveau_fence_ref(NULL, &mt->base.fence_wr);
+
+ NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_count, -1);
+ NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_bytes,
+ -(uint64_t)mt->total_size);
+
+ FREE(mt);
+}
+
+boolean
+nv50_miptree_get_handle(struct pipe_screen *pscreen,
+ struct pipe_resource *pt,
+ struct winsys_handle *whandle)
+{
+ struct nv50_miptree *mt = nv50_miptree(pt);
+ unsigned stride;
+
+ if (!mt || !mt->base.bo)
+ return FALSE;
+
+ stride = mt->level[0].pitch;
+
+ return nouveau_screen_bo_get_handle(pscreen,
+ mt->base.bo,
+ stride,
+ whandle);
+}
+
+const struct u_resource_vtbl nv50_miptree_vtbl =
+{
+ nv50_miptree_get_handle, /* get_handle */
+ nv50_miptree_destroy, /* resource_destroy */
+ nv50_miptree_transfer_map, /* transfer_map */
+ u_default_transfer_flush_region, /* transfer_flush_region */
+ nv50_miptree_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
+static INLINE boolean
+nv50_miptree_init_ms_mode(struct nv50_miptree *mt)
+{
+ switch (mt->base.base.nr_samples) {
+ case 8:
+ mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS8;
+ mt->ms_x = 2;
+ mt->ms_y = 1;
+ break;
+ case 4:
+ mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS4;
+ mt->ms_x = 1;
+ mt->ms_y = 1;
+ break;
+ case 2:
+ mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS2;
+ mt->ms_x = 1;
+ break;
+ case 1:
+ case 0:
+ mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1;
+ break;
+ default:
+ NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples);
+ return FALSE;
+ }
+ return TRUE;
+}
+
+boolean
+nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align)
+{
+ struct pipe_resource *pt = &mt->base.base;
+ const unsigned blocksize = util_format_get_blocksize(pt->format);
+ unsigned h = pt->height0;
+
+ if (util_format_is_depth_or_stencil(pt->format))
+ return FALSE;
+
+ if ((pt->last_level > 0) || (pt->depth0 > 1) || (pt->array_size > 1))
+ return FALSE;
+ if (mt->ms_x | mt->ms_y)
+ return FALSE;
+
+ mt->level[0].pitch = align(pt->width0 * blocksize, pitch_align);
+
+ /* Account for very generous prefetch (allocate size as if tiled). */
+ h = MAX2(h, 8);
+ h = util_next_power_of_two(h);
+
+ mt->total_size = mt->level[0].pitch * h;
+
+ return TRUE;
+}
+
+static void
+nv50_miptree_init_layout_video(struct nv50_miptree *mt)
+{
+ const struct pipe_resource *pt = &mt->base.base;
+ const unsigned blocksize = util_format_get_blocksize(pt->format);
+
+ assert(pt->last_level == 0);
+ assert(mt->ms_x == 0 && mt->ms_y == 0);
+ assert(!util_format_is_compressed(pt->format));
+
+ mt->layout_3d = pt->target == PIPE_TEXTURE_3D;
+
+ mt->level[0].tile_mode = 0x20;
+ mt->level[0].pitch = align(pt->width0 * blocksize, 64);
+ mt->total_size = align(pt->height0, 16) * mt->level[0].pitch * (mt->layout_3d ? pt->depth0 : 1);
+
+ if (pt->array_size > 1) {
+ mt->layer_stride = align(mt->total_size, NV50_TILE_SIZE(0x20));
+ mt->total_size = mt->layer_stride * pt->array_size;
+ }
+}
+
+static void
+nv50_miptree_init_layout_tiled(struct nv50_miptree *mt)
+{
+ struct pipe_resource *pt = &mt->base.base;
+ unsigned w, h, d, l;
+ const unsigned blocksize = util_format_get_blocksize(pt->format);
+
+ mt->layout_3d = pt->target == PIPE_TEXTURE_3D;
+
+ w = pt->width0 << mt->ms_x;
+ h = pt->height0 << mt->ms_y;
+
+ /* For 3D textures, a mipmap is spanned by all the layers, for array
+ * textures and cube maps, each layer contains its own mipmaps.
+ */
+ d = mt->layout_3d ? pt->depth0 : 1;
+
+ for (l = 0; l <= pt->last_level; ++l) {
+ struct nv50_miptree_level *lvl = &mt->level[l];
+ unsigned tsx, tsy, tsz;
+ unsigned nbx = util_format_get_nblocksx(pt->format, w);
+ unsigned nby = util_format_get_nblocksy(pt->format, h);
+
+ lvl->offset = mt->total_size;
+
+ lvl->tile_mode = nv50_tex_choose_tile_dims(nbx, nby, d);
+
+ tsx = NV50_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */
+ tsy = NV50_TILE_SIZE_Y(lvl->tile_mode);
+ tsz = NV50_TILE_SIZE_Z(lvl->tile_mode);
+
+ lvl->pitch = align(nbx * blocksize, tsx);
+
+ mt->total_size += lvl->pitch * align(nby, tsy) * align(d, tsz);
+
+ w = u_minify(w, 1);
+ h = u_minify(h, 1);
+ d = u_minify(d, 1);
+ }
+
+ if (pt->array_size > 1) {
+ mt->layer_stride = align(mt->total_size,
+ NV50_TILE_SIZE(mt->level[0].tile_mode));
+ mt->total_size = mt->layer_stride * pt->array_size;
+ }
+}
+
+struct pipe_resource *
+nv50_miptree_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ)
+{
+ struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+ struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
+ struct pipe_resource *pt = &mt->base.base;
+ int ret;
+ union nouveau_bo_config bo_config;
+ uint32_t bo_flags;
+
+ if (!mt)
+ return NULL;
+
+ mt->base.vtbl = &nv50_miptree_vtbl;
+ *pt = *templ;
+ pipe_reference_init(&pt->reference, 1);
+ pt->screen = pscreen;
+
+ if (pt->bind & PIPE_BIND_LINEAR)
+ pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;
+
+ bo_config.nv50.memtype = nv50_mt_choose_storage_type(mt, TRUE);
+
+ if (!nv50_miptree_init_ms_mode(mt)) {
+ FREE(mt);
+ return NULL;
+ }
+
+ if (unlikely(pt->flags & NV50_RESOURCE_FLAG_VIDEO)) {
+ nv50_miptree_init_layout_video(mt);
+ if (pt->flags & NV50_RESOURCE_FLAG_NOALLOC) {
+ /* BO allocation done by client */
+ return pt;
+ }
+ } else
+ if (bo_config.nv50.memtype != 0) {
+ nv50_miptree_init_layout_tiled(mt);
+ } else
+ if (!nv50_miptree_init_layout_linear(mt, 64)) {
+ FREE(mt);
+ return NULL;
+ }
+ bo_config.nv50.tile_mode = mt->level[0].tile_mode;
+
+ bo_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP;
+ if (mt->base.base.bind & (PIPE_BIND_CURSOR | PIPE_BIND_DISPLAY_TARGET))
+ bo_flags |= NOUVEAU_BO_CONTIG;
+
+ ret = nouveau_bo_new(dev, bo_flags, 4096, mt->total_size, &bo_config,
+ &mt->base.bo);
+ if (ret) {
+ FREE(mt);
+ return NULL;
+ }
+ mt->base.domain = NOUVEAU_BO_VRAM;
+ mt->base.address = mt->base.bo->offset;
+
+ return pt;
+}
+
+struct pipe_resource *
+nv50_miptree_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ struct nv50_miptree *mt;
+ unsigned stride;
+
+ /* only supports 2D, non-mipmapped textures for the moment */
+ if ((templ->target != PIPE_TEXTURE_2D &&
+ templ->target != PIPE_TEXTURE_RECT) ||
+ templ->last_level != 0 ||
+ templ->depth0 != 1 ||
+ templ->array_size > 1)
+ return NULL;
+
+ mt = CALLOC_STRUCT(nv50_miptree);
+ if (!mt)
+ return NULL;
+
+ mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
+ if (mt->base.bo == NULL) {
+ FREE(mt);
+ return NULL;
+ }
+ mt->base.domain = NOUVEAU_BO_VRAM;
+ mt->base.address = mt->base.bo->offset;
+
+ mt->base.base = *templ;
+ mt->base.vtbl = &nv50_miptree_vtbl;
+ pipe_reference_init(&mt->base.base.reference, 1);
+ mt->base.base.screen = pscreen;
+ mt->level[0].pitch = stride;
+ mt->level[0].offset = 0;
+ mt->level[0].tile_mode = mt->base.bo->config.nv50.tile_mode;
+
+ /* no need to adjust bo reference count */
+ return &mt->base.base;
+}
+
+
+/* Offset of zslice @z from start of level @l. */
+INLINE unsigned
+nv50_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z)
+{
+ const struct pipe_resource *pt = &mt->base.base;
+
+ unsigned tds = NV50_TILE_SHIFT_Z(mt->level[l].tile_mode);
+ unsigned ths = NV50_TILE_SHIFT_Y(mt->level[l].tile_mode);
+
+ unsigned nby = util_format_get_nblocksy(pt->format,
+ u_minify(pt->height0, l));
+
+ /* to next 2D tile slice within a 3D tile */
+ unsigned stride_2d = NV50_TILE_SIZE_2D(mt->level[l].tile_mode);
+
+ /* to slice in the next (in z direction) 3D tile */
+ unsigned stride_3d = (align(nby, (1 << ths)) * mt->level[l].pitch) << tds;
+
+ return (z & ((1 << tds) - 1)) * stride_2d + (z >> tds) * stride_3d;
+}
+
+/* Surface functions.
+ */
+
+struct nv50_surface *
+nv50_surface_from_miptree(struct nv50_miptree *mt,
+ const struct pipe_surface *templ)
+{
+ struct pipe_surface *ps;
+ struct nv50_surface *ns = CALLOC_STRUCT(nv50_surface);
+ if (!ns)
+ return NULL;
+ ps = &ns->base;
+
+ pipe_reference_init(&ps->reference, 1);
+ pipe_resource_reference(&ps->texture, &mt->base.base);
+
+ ps->format = templ->format;
+ ps->writable = templ->writable;
+ ps->u.tex.level = templ->u.tex.level;
+ ps->u.tex.first_layer = templ->u.tex.first_layer;
+ ps->u.tex.last_layer = templ->u.tex.last_layer;
+
+ ns->width = u_minify(mt->base.base.width0, ps->u.tex.level);
+ ns->height = u_minify(mt->base.base.height0, ps->u.tex.level);
+ ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1;
+ ns->offset = mt->level[templ->u.tex.level].offset;
+
+ /* comment says there are going to be removed, but they're used by the st */
+ ps->width = ns->width;
+ ps->height = ns->height;
+
+ ns->width <<= mt->ms_x;
+ ns->height <<= mt->ms_y;
+
+ return ns;
+}
+
+struct pipe_surface *
+nv50_miptree_surface_new(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *templ)
+{
+ struct nv50_miptree *mt = nv50_miptree(pt);
+ struct nv50_surface *ns = nv50_surface_from_miptree(mt, templ);
+ if (!ns)
+ return NULL;
+ ns->base.context = pipe;
+
+ if (ns->base.u.tex.first_layer) {
+ const unsigned l = ns->base.u.tex.level;
+ const unsigned z = ns->base.u.tex.first_layer;
+
+ if (mt->layout_3d) {
+ ns->offset += nv50_mt_zslice_offset(mt, l, z);
+
+ /* TODO: switch to depth 1 tiles; but actually this shouldn't happen */
+ if (ns->depth > 1 &&
+ (z & (NV50_TILE_SIZE_Z(mt->level[l].tile_mode) - 1)))
+ NOUVEAU_ERR("Creating unsupported 3D surface !\n");
+ } else {
+ ns->offset += mt->layer_stride * z;
+ }
+ }
+
+ return &ns->base;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
new file mode 100644
index 00000000000..73df71c61e2
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv50_program.h"
+#include "nv50/nv50_context.h"
+
+#include "codegen/nv50_ir_driver.h"
+
+static INLINE unsigned
+bitcount4(const uint32_t val)
+{
+ static const uint8_t cnt[16]
+ = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
+ return cnt[val & 0xf];
+}
+
+static int
+nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
+{
+ struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
+ unsigned i, n, c;
+
+ n = 0;
+ for (i = 0; i < info->numInputs; ++i) {
+ prog->in[i].id = i;
+ prog->in[i].sn = info->in[i].sn;
+ prog->in[i].si = info->in[i].si;
+ prog->in[i].hw = n;
+ prog->in[i].mask = info->in[i].mask;
+
+ prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
+
+ for (c = 0; c < 4; ++c)
+ if (info->in[i].mask & (1 << c))
+ info->in[i].slot[c] = n++;
+ }
+ prog->in_nr = info->numInputs;
+
+ for (i = 0; i < info->numSysVals; ++i) {
+ switch (info->sv[i].sn) {
+ case TGSI_SEMANTIC_INSTANCEID:
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
+ continue;
+ case TGSI_SEMANTIC_VERTEXID:
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12;
+ continue;
+ default:
+ break;
+ }
+ }
+
+ /*
+ * Corner case: VP has no inputs, but we will still need to submit data to
+ * draw it. HW will shout at us and won't draw anything if we don't enable
+ * any input, so let's just pretend it's the first one.
+ */
+ if (prog->vp.attrs[0] == 0 &&
+ prog->vp.attrs[1] == 0 &&
+ prog->vp.attrs[2] == 0)
+ prog->vp.attrs[0] |= 0xf;
+
+ /* VertexID before InstanceID */
+ if (info->io.vertexId < info->numSysVals)
+ info->sv[info->io.vertexId].slot[0] = n++;
+ if (info->io.instanceId < info->numSysVals)
+ info->sv[info->io.instanceId].slot[0] = n++;
+
+ n = 0;
+ for (i = 0; i < info->numOutputs; ++i) {
+ switch (info->out[i].sn) {
+ case TGSI_SEMANTIC_PSIZE:
+ prog->vp.psiz = i;
+ break;
+ case TGSI_SEMANTIC_CLIPDIST:
+ prog->vp.clpd[info->out[i].si] = n;
+ break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ prog->vp.edgeflag = i;
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ prog->vp.bfc[info->out[i].si] = i;
+ break;
+ default:
+ break;
+ }
+ prog->out[i].id = i;
+ prog->out[i].sn = info->out[i].sn;
+ prog->out[i].si = info->out[i].si;
+ prog->out[i].hw = n;
+ prog->out[i].mask = info->out[i].mask;
+
+ for (c = 0; c < 4; ++c)
+ if (info->out[i].mask & (1 << c))
+ info->out[i].slot[c] = n++;
+ }
+ prog->out_nr = info->numOutputs;
+ prog->max_out = n;
+
+ if (prog->vp.psiz < info->numOutputs)
+ prog->vp.psiz = prog->out[prog->vp.psiz].hw;
+
+ return 0;
+}
+
+static int
+nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info)
+{
+ struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
+ unsigned i, n, m, c;
+ unsigned nvary;
+ unsigned nflat;
+ unsigned nintp = 0;
+
+ /* count recorded non-flat inputs */
+ for (m = 0, i = 0; i < info->numInputs; ++i) {
+ switch (info->in[i].sn) {
+ case TGSI_SEMANTIC_POSITION:
+ case TGSI_SEMANTIC_FACE:
+ continue;
+ default:
+ m += info->in[i].flat ? 0 : 1;
+ break;
+ }
+ }
+ /* careful: id may be != i in info->in[prog->in[i].id] */
+
+ /* Fill prog->in[] so that non-flat inputs are first and
+ * kick out special inputs that don't use the RESULT_MAP.
+ */
+ for (n = 0, i = 0; i < info->numInputs; ++i) {
+ if (info->in[i].sn == TGSI_SEMANTIC_POSITION) {
+ prog->fp.interp |= info->in[i].mask << 24;
+ for (c = 0; c < 4; ++c)
+ if (info->in[i].mask & (1 << c))
+ info->in[i].slot[c] = nintp++;
+ } else
+ if (info->in[i].sn == TGSI_SEMANTIC_FACE) {
+ info->in[i].slot[0] = 255;
+ } else {
+ unsigned j = info->in[i].flat ? m++ : n++;
+
+ if (info->in[i].sn == TGSI_SEMANTIC_COLOR)
+ prog->vp.bfc[info->in[i].si] = j;
+
+ prog->in[j].id = i;
+ prog->in[j].mask = info->in[i].mask;
+ prog->in[j].sn = info->in[i].sn;
+ prog->in[j].si = info->in[i].si;
+ prog->in[j].linear = info->in[i].linear;
+
+ prog->in_nr++;
+ }
+ }
+ if (!(prog->fp.interp & (8 << 24))) {
+ ++nintp;
+ prog->fp.interp |= 8 << 24;
+ }
+
+ for (i = 0; i < prog->in_nr; ++i) {
+ int j = prog->in[i].id;
+
+ prog->in[i].hw = nintp;
+ for (c = 0; c < 4; ++c)
+ if (prog->in[i].mask & (1 << c))
+ info->in[j].slot[c] = nintp++;
+ }
+ /* (n == m) if m never increased, i.e. no flat inputs */
+ nflat = (n < m) ? (nintp - prog->in[n].hw) : 0;
+ nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */
+ nvary = nintp - nflat;
+
+ prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
+ prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
+
+ /* put front/back colors right after HPOS */
+ prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT;
+ for (i = 0; i < 2; ++i)
+ if (prog->vp.bfc[i] < 0xff)
+ prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16;
+
+ /* FP outputs */
+
+ if (info->prop.fp.numColourResults > 1)
+ prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
+
+ for (i = 0; i < info->numOutputs; ++i) {
+ prog->out[i].id = i;
+ prog->out[i].sn = info->out[i].sn;
+ prog->out[i].si = info->out[i].si;
+ prog->out[i].mask = info->out[i].mask;
+
+ if (i == info->io.fragDepth || i == info->io.sampleMask)
+ continue;
+ prog->out[i].hw = info->out[i].si * 4;
+
+ for (c = 0; c < 4; ++c)
+ info->out[i].slot[c] = prog->out[i].hw + c;
+
+ prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4);
+ }
+
+ if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+ info->out[info->io.sampleMask].slot[0] = prog->max_out++;
+
+ if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
+ info->out[info->io.fragDepth].slot[2] = prog->max_out++;
+
+ if (!prog->max_out)
+ prog->max_out = 4;
+
+ return 0;
+}
+
+static int
+nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
+{
+ switch (info->type) {
+ case PIPE_SHADER_VERTEX:
+ return nv50_vertprog_assign_slots(info);
+ case PIPE_SHADER_GEOMETRY:
+ return nv50_vertprog_assign_slots(info);
+ case PIPE_SHADER_FRAGMENT:
+ return nv50_fragprog_assign_slots(info);
+ default:
+ return -1;
+ }
+}
+
+static struct nv50_stream_output_state *
+nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
+ const struct pipe_stream_output_info *pso)
+{
+ struct nv50_stream_output_state *so;
+ unsigned b, i, c;
+ unsigned base[4];
+
+ so = MALLOC_STRUCT(nv50_stream_output_state);
+ if (!so)
+ return NULL;
+ memset(so->map, 0xff, sizeof(so->map));
+
+ for (b = 0; b < 4; ++b)
+ so->num_attribs[b] = 0;
+ for (i = 0; i < pso->num_outputs; ++i) {
+ unsigned end = pso->output[i].dst_offset + pso->output[i].num_components;
+ b = pso->output[i].output_buffer;
+ assert(b < 4);
+ so->num_attribs[b] = MAX2(so->num_attribs[b], end);
+ }
+
+ so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED;
+
+ so->stride[0] = pso->stride[0] * 4;
+ base[0] = 0;
+ for (b = 1; b < 4; ++b) {
+ assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]);
+ so->stride[b] = so->num_attribs[b] * 4;
+ if (so->num_attribs[b])
+ so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT;
+ base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4);
+ }
+ if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) {
+ assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX);
+ so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT;
+ }
+
+ so->map_size = base[3] + so->num_attribs[3];
+
+ for (i = 0; i < pso->num_outputs; ++i) {
+ const unsigned s = pso->output[i].start_component;
+ const unsigned p = pso->output[i].dst_offset;
+ const unsigned r = pso->output[i].register_index;
+ b = pso->output[i].output_buffer;
+
+ for (c = 0; c < pso->output[i].num_components; ++c)
+ so->map[base[b] + p + c] = info->out[r].slot[s + c];
+ }
+
+ return so;
+}
+
+boolean
+nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
+{
+ struct nv50_ir_prog_info *info;
+ int ret;
+ const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;
+
+ info = CALLOC_STRUCT(nv50_ir_prog_info);
+ if (!info)
+ return FALSE;
+
+ info->type = prog->type;
+ info->target = chipset;
+ info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
+ info->bin.source = (void *)prog->pipe.tokens;
+
+ info->io.ucpCBSlot = 15;
+ info->io.ucpBase = 0;
+ info->io.genUserClip = prog->vp.clpd_nr;
+
+ info->assignSlots = nv50_program_assign_varying_slots;
+
+ prog->vp.bfc[0] = 0xff;
+ prog->vp.bfc[1] = 0xff;
+ prog->vp.edgeflag = 0xff;
+ prog->vp.clpd[0] = map_undef;
+ prog->vp.clpd[1] = map_undef;
+ prog->vp.psiz = map_undef;
+ prog->gp.primid = 0x80;
+
+ info->driverPriv = prog;
+
+#ifdef DEBUG
+ info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
+ info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
+#else
+ info->optLevel = 3;
+#endif
+
+ ret = nv50_ir_generate_code(info);
+ if (ret) {
+ NOUVEAU_ERR("shader translation failed: %i\n", ret);
+ goto out;
+ }
+ FREE(info->bin.syms);
+
+ prog->code = info->bin.code;
+ prog->code_size = info->bin.codeSize;
+ prog->fixups = info->bin.relocData;
+ prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
+ prog->tls_space = info->bin.tlsSpace;
+
+ if (prog->type == PIPE_SHADER_FRAGMENT) {
+ if (info->prop.fp.writesDepth) {
+ prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
+ prog->fp.flags[1] = 0x11;
+ }
+ if (info->prop.fp.usesDiscard)
+ prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
+ }
+
+ if (prog->pipe.stream_output.num_outputs)
+ prog->so = nv50_program_create_strmout_state(info,
+ &prog->pipe.stream_output);
+
+out:
+ FREE(info);
+ return !ret;
+}
+
+boolean
+nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
+{
+ struct nouveau_heap *heap;
+ int ret;
+ uint32_t size = align(prog->code_size, 0x40);
+
+ switch (prog->type) {
+ case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break;
+ case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break;
+ case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
+ default:
+ assert(!"invalid program type");
+ return FALSE;
+ }
+
+ ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
+ if (ret) {
+ /* Out of space: evict everything to compactify the code segment, hoping
+ * the working set is much smaller and drifts slowly. Improve me !
+ */
+ while (heap->next) {
+ struct nv50_program *evict = heap->next->priv;
+ if (evict)
+ nouveau_heap_free(&evict->mem);
+ }
+ debug_printf("WARNING: out of code space, evicting all shaders.\n");
+ ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
+ if (ret) {
+ NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
+ return FALSE;
+ }
+ }
+ prog->code_base = prog->mem->start;
+
+ ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
+ if (ret < 0)
+ return FALSE;
+ if (ret > 0)
+ nv50->state.new_tls_space = TRUE;
+
+ if (prog->fixups)
+ nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
+
+ nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
+ (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
+ NOUVEAU_BO_VRAM, prog->code_size, prog->code);
+
+ BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
+ PUSH_DATA (nv50->base.pushbuf, 0);
+
+ return TRUE;
+}
+
+void
+nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
+{
+ const struct pipe_shader_state pipe = p->pipe;
+ const ubyte type = p->type;
+
+ if (p->mem)
+ nouveau_heap_free(&p->mem);
+
+ FREE(p->code);
+
+ FREE(p->fixups);
+
+ FREE(p->so);
+
+ memset(p, 0, sizeof(*p));
+
+ p->pipe = pipe;
+ p->type = type;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
new file mode 100644
index 00000000000..13b9516a3e4
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2010 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NV50_PROG_H__
+#define __NV50_PROG_H__
+
+struct nv50_context;
+
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+
+struct nv50_varying {
+ uint8_t id; /* tgsi index */
+ uint8_t hw; /* hw index, nv50 wants flat FP inputs last */
+
+ unsigned mask : 4;
+ unsigned linear : 1;
+ unsigned pad : 3;
+
+ ubyte sn; /* semantic name */
+ ubyte si; /* semantic index */
+};
+
+struct nv50_stream_output_state
+{
+ uint32_t ctrl;
+ uint16_t stride[4];
+ uint8_t num_attribs[4];
+ uint8_t map_size;
+ uint8_t map[128];
+};
+
+struct nv50_program {
+ struct pipe_shader_state pipe;
+
+ ubyte type;
+ boolean translated;
+
+ uint32_t *code;
+ unsigned code_size;
+ unsigned code_base;
+ uint32_t *immd;
+ unsigned immd_size;
+ unsigned parm_size; /* size limit of uniform buffer */
+ uint32_t tls_space; /* required local memory per thread */
+
+ ubyte max_gpr; /* REG_ALLOC_TEMP */
+ ubyte max_out; /* REG_ALLOC_RESULT or FP_RESULT_COUNT */
+
+ ubyte in_nr;
+ ubyte out_nr;
+ struct nv50_varying in[16];
+ struct nv50_varying out[16];
+
+ struct {
+ uint32_t attrs[3]; /* VP_ATTR_EN_0,1 and VP_GP_BUILTIN_ATTR_EN */
+ ubyte psiz; /* output slot of point size */
+ ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */
+ ubyte edgeflag;
+ ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */
+ ubyte clpd_nr;
+ } vp;
+
+ struct {
+ uint32_t flags[2]; /* 0x19a8, 196c */
+ uint32_t interp; /* 0x1988 */
+ uint32_t colors; /* 0x1904 */
+ } fp;
+
+ struct {
+ ubyte primid; /* primitive id output register */
+ uint8_t vert_count;
+ uint8_t prim_type; /* point, line strip or tri strip */
+ } gp;
+
+ void *fixups; /* relocation records */
+
+ struct nouveau_heap *mem;
+
+ struct nv50_stream_output_state *so;
+};
+
+boolean nv50_program_translate(struct nv50_program *, uint16_t chipset);
+boolean nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
+void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
+
+#endif /* __NV50_PROG_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c
new file mode 100644
index 00000000000..3e9a4096cf0
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c
@@ -0,0 +1,309 @@
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_resource.h"
+
+#include "nv50/nv50_3d.xml.h"
+
+struct push_context {
+ struct nouveau_pushbuf *push;
+
+ const void *idxbuf;
+
+ float edgeflag;
+ int edgeflag_attr;
+
+ uint32_t vertex_words;
+ uint32_t packet_vertex_limit;
+
+ struct translate *translate;
+
+ boolean primitive_restart;
+ uint32_t prim;
+ uint32_t restart_index;
+ uint32_t instance_id;
+};
+
+static INLINE unsigned
+prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static void
+emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i08(elts, push, ctx->restart_index);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id,
+ ctx->push->cur);
+
+ ctx->push->cur += size;
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (ctx->push, ctx->restart_index);
+ }
+ }
+}
+
+static void
+emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i16(elts, push, ctx->restart_index);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id,
+ ctx->push->cur);
+
+ ctx->push->cur += size;
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (ctx->push, ctx->restart_index);
+ }
+ }
+}
+
+static void
+emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i32(elts, push, ctx->restart_index);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id,
+ ctx->push->cur);
+
+ ctx->push->cur += size;
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (ctx->push, ctx->restart_index);
+ }
+ }
+}
+
+static void
+emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
+{
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size = ctx->vertex_words * push;
+
+ BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
+
+ ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id,
+ ctx->push->cur);
+ ctx->push->cur += size;
+ count -= push;
+ start += push;
+ }
+}
+
+
+#define NV50_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nv50_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NV50_PRIM_GL_CASE(POINTS);
+ NV50_PRIM_GL_CASE(LINES);
+ NV50_PRIM_GL_CASE(LINE_LOOP);
+ NV50_PRIM_GL_CASE(LINE_STRIP);
+ NV50_PRIM_GL_CASE(TRIANGLES);
+ NV50_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NV50_PRIM_GL_CASE(TRIANGLE_FAN);
+ NV50_PRIM_GL_CASE(QUADS);
+ NV50_PRIM_GL_CASE(QUAD_STRIP);
+ NV50_PRIM_GL_CASE(POLYGON);
+ NV50_PRIM_GL_CASE(LINES_ADJACENCY);
+ NV50_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NV50_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NV50_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NV50_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
+}
+
+void
+nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
+{
+ struct push_context ctx;
+ unsigned i, index_size;
+ unsigned inst_count = info->instance_count;
+ unsigned vert_count = info->count;
+ boolean apply_bias = info->indexed && info->index_bias;
+
+ ctx.push = nv50->base.pushbuf;
+ ctx.translate = nv50->vertex->translate;
+ ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit;
+ ctx.vertex_words = nv50->vertex->vertex_size;
+
+ for (i = 0; i < nv50->num_vtxbufs; ++i) {
+ const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
+ const uint8_t *data;
+
+ if (unlikely(vb->buffer))
+ data = nouveau_resource_map_offset(&nv50->base,
+ nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD);
+ else
+ data = vb->user_buffer;
+
+ if (apply_bias && likely(!(nv50->vertex->instance_bufs & (1 << i))))
+ data += (ptrdiff_t)info->index_bias * vb->stride;
+
+ ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
+ }
+
+ if (info->indexed) {
+ if (nv50->idxbuf.buffer) {
+ ctx.idxbuf = nouveau_resource_map_offset(&nv50->base,
+ nv04_resource(nv50->idxbuf.buffer), nv50->idxbuf.offset,
+ NOUVEAU_BO_RD);
+ } else {
+ ctx.idxbuf = nv50->idxbuf.user_buffer;
+ }
+ if (!ctx.idxbuf)
+ return;
+ index_size = nv50->idxbuf.index_size;
+ ctx.primitive_restart = info->primitive_restart;
+ ctx.restart_index = info->restart_index;
+ } else {
+ if (unlikely(info->count_from_stream_output)) {
+ struct pipe_context *pipe = &nv50->base.pipe;
+ struct nv50_so_target *targ;
+ targ = nv50_so_target(info->count_from_stream_output);
+ if (!targ->pq) {
+ NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
+ return;
+ }
+ pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
+ vert_count /= targ->stride;
+ }
+ ctx.idxbuf = NULL;
+ index_size = 0;
+ ctx.primitive_restart = FALSE;
+ ctx.restart_index = 0;
+ }
+
+ ctx.instance_id = info->start_instance;
+ ctx.prim = nv50_prim_gl(info->mode);
+
+ if (info->primitive_restart) {
+ BEGIN_NV04(ctx.push, NV50_3D(PRIM_RESTART_ENABLE), 2);
+ PUSH_DATA (ctx.push, 1);
+ PUSH_DATA (ctx.push, info->restart_index);
+ } else
+ if (nv50->state.prim_restart) {
+ BEGIN_NV04(ctx.push, NV50_3D(PRIM_RESTART_ENABLE), 1);
+ PUSH_DATA (ctx.push, 0);
+ }
+ nv50->state.prim_restart = info->primitive_restart;
+
+ while (inst_count--) {
+ BEGIN_NV04(ctx.push, NV50_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (ctx.push, ctx.prim);
+ switch (index_size) {
+ case 0:
+ emit_vertices_seq(&ctx, info->start, vert_count);
+ break;
+ case 1:
+ emit_vertices_i08(&ctx, info->start, vert_count);
+ break;
+ case 2:
+ emit_vertices_i16(&ctx, info->start, vert_count);
+ break;
+ case 4:
+ emit_vertices_i32(&ctx, info->start, vert_count);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ BEGIN_NV04(ctx.push, NV50_3D(VERTEX_END_GL), 1);
+ PUSH_DATA (ctx.push, 0);
+
+ ctx.instance_id++;
+ ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
new file mode 100644
index 00000000000..6f25a0822c4
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -0,0 +1,399 @@
+/*
+ * Copyright 2011 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christoph Bumiller
+ */
+
+#define NV50_PUSH_EXPLICIT_SPACE_CHECKING
+
+#include "nv50/nv50_context.h"
+#include "nv_object.xml.h"
+
+/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
+ * (since we use only a single GPU channel per screen) will not work properly.
+ *
+ * The first is not that big of an issue because OpenGL does not allow nested
+ * queries anyway.
+ */
+
+struct nv50_query {
+ uint32_t *data;
+ uint16_t type;
+ uint16_t index;
+ uint32_t sequence;
+ struct nouveau_bo *bo;
+ uint32_t base;
+ uint32_t offset; /* base + i * 16 */
+ boolean ready;
+ boolean flushed;
+ boolean is64bit;
+ struct nouveau_mm_allocation *mm;
+};
+
+#define NV50_QUERY_ALLOC_SPACE 128
+
+static INLINE struct nv50_query *
+nv50_query(struct pipe_query *pipe)
+{
+ return (struct nv50_query *)pipe;
+}
+
+static boolean
+nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
+{
+ struct nv50_screen *screen = nv50->screen;
+ int ret;
+
+ if (q->bo) {
+ nouveau_bo_ref(NULL, &q->bo);
+ if (q->mm) {
+ if (q->ready)
+ nouveau_mm_free(q->mm);
+ else
+ nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work,
+ q->mm);
+ }
+ }
+ if (size) {
+ q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
+ if (!q->bo)
+ return FALSE;
+ q->offset = q->base;
+
+ ret = nouveau_bo_map(q->bo, 0, screen->base.client);
+ if (ret) {
+ nv50_query_allocate(nv50, q, 0);
+ return FALSE;
+ }
+ q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
+ }
+ return TRUE;
+}
+
+static void
+nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
+ FREE(nv50_query(pq));
+}
+
+static struct pipe_query *
+nv50_query_create(struct pipe_context *pipe, unsigned type)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_query *q;
+
+ q = CALLOC_STRUCT(nv50_query);
+ if (!q)
+ return NULL;
+
+ if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) {
+ FREE(q);
+ return NULL;
+ }
+
+ q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
+ type == PIPE_QUERY_PRIMITIVES_EMITTED ||
+ type == PIPE_QUERY_SO_STATISTICS);
+ q->type = type;
+
+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ q->offset -= 16;
+ q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
+ }
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
+ unsigned offset, uint32_t get)
+{
+ offset += q->offset;
+
+ PUSH_SPACE(push, 5);
+ PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, q->bo->offset + offset);
+ PUSH_DATA (push, q->bo->offset + offset);
+ PUSH_DATA (push, q->sequence);
+ PUSH_DATA (push, get);
+}
+
+static void
+nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_query *q = nv50_query(pq);
+
+ /* For occlusion queries we have to change the storage, because a previous
+ * query might set the initial render conition to FALSE even *after* we re-
+ * initialized it to TRUE.
+ */
+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ q->offset += 16;
+ q->data += 16 / sizeof(*q->data);
+ if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE)
+ nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE);
+
+ /* XXX: can we do this with the GPU, and sync with respect to a previous
+ * query ?
+ */
+ q->data[1] = 1; /* initial render condition = TRUE */
+ }
+ if (!q->is64bit)
+ q->data[0] = q->sequence++; /* the previously used one */
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ PUSH_SPACE(push, 4);
+ BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
+ PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nv50_query_get(push, q, 0x10, 0x06805002);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nv50_query_get(push, q, 0x10, 0x05805002);
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nv50_query_get(push, q, 0x20, 0x05805002);
+ nv50_query_get(push, q, 0x30, 0x06805002);
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ nv50_query_get(push, q, 0x10, 0x00005002);
+ break;
+ default:
+ break;
+ }
+ q->ready = FALSE;
+}
+
+static void
+nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_query *q = nv50_query(pq);
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ nv50_query_get(push, q, 0, 0x0100f002);
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nv50_query_get(push, q, 0, 0x06805002);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nv50_query_get(push, q, 0, 0x05805002);
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nv50_query_get(push, q, 0x00, 0x05805002);
+ nv50_query_get(push, q, 0x10, 0x06805002);
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ q->sequence++;
+ /* fall through */
+ case PIPE_QUERY_TIME_ELAPSED:
+ nv50_query_get(push, q, 0, 0x00005002);
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ q->sequence++;
+ nv50_query_get(push, q, 0, 0x1000f010);
+ break;
+ case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+ nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ q->ready = q->flushed = FALSE;
+}
+
+static INLINE boolean
+nv50_query_ready(struct nv50_query *q)
+{
+ return q->ready || (!q->is64bit && (q->data[0] == q->sequence));
+}
+
+static boolean
+nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, union pipe_query_result *result)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_query *q = nv50_query(pq);
+ uint64_t *res64 = (uint64_t *)result;
+ uint32_t *res32 = (uint32_t *)result;
+ boolean *res8 = (boolean *)result;
+ uint64_t *data64 = (uint64_t *)q->data;
+
+ if (!q->ready) /* update ? */
+ q->ready = nv50_query_ready(q);
+ if (!q->ready) {
+ if (!wait) {
+ /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
+ if (!q->flushed) {
+ q->flushed = TRUE;
+ PUSH_KICK(nv50->base.pushbuf);
+ }
+ return FALSE;
+ }
+ if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
+ return FALSE;
+ }
+ q->ready = TRUE;
+
+ switch (q->type) {
+ case PIPE_QUERY_GPU_FINISHED:
+ res8[0] = TRUE;
+ break;
+ case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
+ res64[0] = q->data[1];
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
+ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
+ res64[0] = data64[0] - data64[2];
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ res64[0] = data64[0] - data64[4];
+ res64[1] = data64[2] - data64[6];
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ res64[0] = data64[1];
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ res64[0] = 1000000000;
+ res8[8] = FALSE;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ res64[0] = data64[1] - data64[3];
+ break;
+ case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+ res32[0] = q->data[1];
+ break;
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+void
+nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
+{
+ struct nv50_query *q = nv50_query(pq);
+ unsigned offset = q->offset;
+
+ PUSH_SPACE(push, 5);
+ PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, q->bo->offset + offset);
+ PUSH_DATA (push, q->bo->offset + offset);
+ PUSH_DATA (push, q->sequence);
+ PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
+}
+
+static void
+nv50_render_condition(struct pipe_context *pipe,
+ struct pipe_query *pq,
+ boolean condition, uint mode)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_query *q;
+
+ nv50->cond_query = pq;
+ nv50->cond_cond = condition;
+ nv50->cond_mode = mode;
+
+ PUSH_SPACE(push, 6);
+
+ if (!pq) {
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+ return;
+ }
+ q = nv50_query(pq);
+
+ if (mode == PIPE_RENDER_COND_WAIT ||
+ mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+ BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, q->bo->offset + q->offset);
+ PUSH_DATA (push, q->bo->offset + q->offset);
+ PUSH_DATA (push, NV50_3D_COND_MODE_RES_NON_ZERO);
+}
+
+void
+nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
+ struct pipe_query *pq, unsigned result_offset)
+{
+ struct nv50_query *q = nv50_query(pq);
+
+ /* XXX: does this exist ? */
+#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
+
+ nouveau_pushbuf_space(push, 0, 0, 1);
+ nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
+ NV50_IB_ENTRY_1_NO_PREFETCH);
+}
+
+void
+nva0_so_target_save_offset(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg,
+ unsigned index, boolean serialize)
+{
+ struct nv50_so_target *targ = nv50_so_target(ptarg);
+
+ if (serialize) {
+ struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ nv50_query(targ->pq)->index = index;
+ nv50_query_end(pipe, targ->pq);
+}
+
+void
+nv50_init_query_functions(struct nv50_context *nv50)
+{
+ struct pipe_context *pipe = &nv50->base.pipe;
+
+ pipe->create_query = nv50_query_create;
+ pipe->destroy_query = nv50_query_destroy;
+ pipe->begin_query = nv50_query_begin;
+ pipe->end_query = nv50_query_end;
+ pipe->get_query_result = nv50_query_result;
+ pipe->render_condition = nv50_render_condition;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.c b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
new file mode 100644
index 00000000000..7fbb0a92bf6
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
@@ -0,0 +1,104 @@
+
+#include "pipe/p_context.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nouveau_screen.h"
+
+#include "nv50/nv50_resource.h"
+
+static struct pipe_resource *
+nv50_resource_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ switch (templ->target) {
+ case PIPE_BUFFER:
+ return nouveau_buffer_create(screen, templ);
+ default:
+ return nv50_miptree_create(screen, templ);
+ }
+}
+
+static struct pipe_resource *
+nv50_resource_from_handle(struct pipe_screen * screen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ if (templ->target == PIPE_BUFFER)
+ return NULL;
+ else
+ return nv50_miptree_from_handle(screen, templ, whandle);
+}
+
+struct pipe_surface *
+nv50_surface_from_buffer(struct pipe_context *pipe,
+ struct pipe_resource *pbuf,
+ const struct pipe_surface *templ)
+{
+ struct nv50_surface *sf = CALLOC_STRUCT(nv50_surface);
+ if (!sf)
+ return NULL;
+
+ pipe_reference_init(&sf->base.reference, 1);
+ pipe_resource_reference(&sf->base.texture, pbuf);
+
+ sf->base.format = templ->format;
+ sf->base.writable = templ->writable;
+ sf->base.u.buf.first_element = templ->u.buf.first_element;
+ sf->base.u.buf.last_element = templ->u.buf.last_element;
+
+ sf->offset =
+ templ->u.buf.first_element * util_format_get_blocksize(sf->base.format);
+
+ sf->offset &= ~0x7f; /* FIXME: RT_ADDRESS requires 128 byte alignment */
+
+ sf->width = templ->u.buf.last_element - templ->u.buf.first_element + 1;
+ sf->height = 1;
+ sf->depth = 1;
+
+ sf->base.width = sf->width;
+ sf->base.height = sf->height;
+
+ sf->base.context = pipe;
+ return &sf->base;
+}
+
+static struct pipe_surface *
+nv50_surface_create(struct pipe_context *pipe,
+ struct pipe_resource *pres,
+ const struct pipe_surface *templ)
+{
+ if (unlikely(pres->target == PIPE_BUFFER))
+ return nv50_surface_from_buffer(pipe, pres, templ);
+ return nv50_miptree_surface_new(pipe, pres, templ);
+}
+
+void
+nv50_surface_destroy(struct pipe_context *pipe, struct pipe_surface *ps)
+{
+ struct nv50_surface *s = nv50_surface(ps);
+
+ pipe_resource_reference(&ps->texture, NULL);
+
+ FREE(s);
+}
+
+void
+nv50_init_resource_functions(struct pipe_context *pcontext)
+{
+ pcontext->transfer_map = u_transfer_map_vtbl;
+ pcontext->transfer_flush_region = u_transfer_flush_region_vtbl;
+ pcontext->transfer_unmap = u_transfer_unmap_vtbl;
+ pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
+ pcontext->create_surface = nv50_surface_create;
+ pcontext->surface_destroy = nv50_surface_destroy;
+}
+
+void
+nv50_screen_init_resource_functions(struct pipe_screen *pscreen)
+{
+ pscreen->resource_create = nv50_resource_create;
+ pscreen->resource_from_handle = nv50_resource_from_handle;
+ pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+ pscreen->resource_destroy = u_resource_destroy_vtbl;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
new file mode 100644
index 00000000000..c06daa31c5d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
@@ -0,0 +1,153 @@
+
+#ifndef __NV50_RESOURCE_H__
+#define __NV50_RESOURCE_H__
+
+#include "util/u_transfer.h"
+#include "util/u_double_list.h"
+
+#include "nouveau_winsys.h"
+#include "nouveau_buffer.h"
+
+#ifndef __NVC0_RESOURCE_H__ /* make sure we don't use these in nvc0: */
+
+void
+nv50_init_resource_functions(struct pipe_context *pcontext);
+
+void
+nv50_screen_init_resource_functions(struct pipe_screen *pscreen);
+
+#define NV50_RESOURCE_FLAG_VIDEO (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 0)
+#define NV50_RESOURCE_FLAG_NOALLOC (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 1)
+
+#define NV50_TILE_SHIFT_X(m) 6
+#define NV50_TILE_SHIFT_Y(m) ((((m) >> 4) & 0xf) + 2)
+#define NV50_TILE_SHIFT_Z(m) ((((m) >> 8) & 0xf) + 0)
+
+#define NV50_TILE_SIZE_X(m) 64
+#define NV50_TILE_SIZE_Y(m) ( 4 << (((m) >> 4) & 0xf))
+#define NV50_TILE_SIZE_Z(m) ( 1 << (((m) >> 8) & 0xf))
+
+#define NV50_TILE_SIZE_2D(m) (NV50_TILE_SIZE_X(m) << NV50_TILE_SHIFT_Y(m))
+
+#define NV50_TILE_SIZE(m) (NV50_TILE_SIZE_2D(m) << NV50_TILE_SHIFT_Z(m))
+
+#endif /* __NVC0_RESOURCE_H__ */
+
+uint32_t
+nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz);
+
+struct nv50_miptree_level {
+ uint32_t offset;
+ uint32_t pitch;
+ uint32_t tile_mode;
+};
+
+#define NV50_MAX_TEXTURE_LEVELS 16
+
+struct nv50_miptree {
+ struct nv04_resource base;
+ struct nv50_miptree_level level[NV50_MAX_TEXTURE_LEVELS];
+ uint32_t total_size;
+ uint32_t layer_stride;
+ boolean layout_3d; /* TRUE if layer count varies with mip level */
+ uint8_t ms_x; /* log2 of number of samples in x/y dimension */
+ uint8_t ms_y;
+ uint8_t ms_mode;
+};
+
+static INLINE struct nv50_miptree *
+nv50_miptree(struct pipe_resource *pt)
+{
+ return (struct nv50_miptree *)pt;
+}
+
+
+#define NV50_TEXVIEW_SCALED_COORDS (1 << 0)
+#define NV50_TEXVIEW_FILTER_MSAA8 (1 << 1)
+#define NV50_TEXVIEW_ACCESS_RESOLVE (1 << 2)
+
+
+/* Internal functions:
+ */
+boolean
+nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align);
+
+struct pipe_resource *
+nv50_miptree_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmp);
+
+void
+nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt);
+
+struct pipe_resource *
+nv50_miptree_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *template,
+ struct winsys_handle *whandle);
+
+boolean
+nv50_miptree_get_handle(struct pipe_screen *pscreen,
+ struct pipe_resource *pt,
+ struct winsys_handle *whandle);
+
+struct nv50_surface {
+ struct pipe_surface base;
+ uint32_t offset;
+ uint32_t width;
+ uint16_t height;
+ uint16_t depth;
+};
+
+static INLINE struct nv50_surface *
+nv50_surface(struct pipe_surface *ps)
+{
+ return (struct nv50_surface *)ps;
+}
+
+static INLINE enum pipe_format
+nv50_zs_to_s_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT: return PIPE_FORMAT_X24S8_UINT;
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM: return PIPE_FORMAT_S8X24_UINT;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return PIPE_FORMAT_X32_S8X24_UINT;
+ default:
+ return format;
+ }
+}
+
+#ifndef __NVC0_RESOURCE_H__
+
+unsigned
+nv50_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z);
+
+struct pipe_surface *
+nv50_miptree_surface_new(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_surface *templ);
+
+void *
+nv50_miptree_transfer_map(struct pipe_context *pctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer);
+void
+nv50_miptree_transfer_unmap(struct pipe_context *pcontext,
+ struct pipe_transfer *ptx);
+
+#endif /* __NVC0_RESOURCE_H__ */
+
+struct nv50_surface *
+nv50_surface_from_miptree(struct nv50_miptree *mt,
+ const struct pipe_surface *templ);
+
+struct pipe_surface *
+nv50_surface_from_buffer(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *templ);
+
+void
+nv50_surface_destroy(struct pipe_context *, struct pipe_surface *);
+
+#endif /* __NV50_RESOURCE_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
new file mode 100644
index 00000000000..f454ec77656
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -0,0 +1,845 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "pipe/p_screen.h"
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_screen.h"
+
+#include "nouveau_vp3_video.h"
+
+#include "nv_object.xml.h"
+#include <errno.h>
+
+#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
+# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
+#endif
+
+/* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */
+#define LOCAL_WARPS_ALLOC 32
+/* affected by STACK_WARPS_LOG_ALLOC / STACK_WARPS_NO_CLAMP */
+#define STACK_WARPS_ALLOC 32
+
+#define THREADS_IN_WARP 32
+
+#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
+
+static boolean
+nv50_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned bindings)
+{
+ if (sample_count > 8)
+ return FALSE;
+ if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
+ return FALSE;
+ if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
+ return FALSE;
+
+ if (!util_format_is_supported(format, bindings))
+ return FALSE;
+
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+
+ /* transfers & shared are always supported */
+ bindings &= ~(PIPE_BIND_TRANSFER_READ |
+ PIPE_BIND_TRANSFER_WRITE |
+ PIPE_BIND_SHARED);
+
+ return (nv50_format_table[format].usage & bindings) == bindings;
+}
+
+static int
+nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+ const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
+
+ switch (param) {
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 64;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 14;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 12;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 14;
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return 512;
+ case PIPE_CAP_MIN_TEXEL_OFFSET:
+ return -8;
+ case PIPE_CAP_MAX_TEXEL_OFFSET:
+ return 7;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ case PIPE_CAP_SCALED_RESOLVE:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ return 65536;
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS;
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ return 0;
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ return 0;
+ /*
+ return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS;
+ */
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_SM3:
+ return 1;
+ case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ return 140;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 8;
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+ return 1;
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ return 4;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ return 64;
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ return (class_3d >= NVA0_3D_CLASS) ? 1 : 0;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ return 1;
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return 0;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return 0;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_TEXTURE_BARRIER:
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ case PIPE_CAP_START_INSTANCE:
+ return 1;
+ case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+ return 0; /* state trackers will know better */
+ case PIPE_CAP_USER_CONSTANT_BUFFERS:
+ case PIPE_CAP_USER_INDEX_BUFFERS:
+ case PIPE_CAP_USER_VERTEX_BUFFERS:
+ return 1;
+ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+ return 256;
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ return 1; /* 256 for binding as RT, but that's not possible in GL */
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_TGSI_TEXCOORD:
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ return 0;
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ return 1;
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ return 0;
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+ return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
+ case PIPE_CAP_ENDIANNESS:
+ return PIPE_ENDIAN_LITTLE;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static int
+nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+ enum pipe_shader_cap param)
+{
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_GEOMETRY:
+ case PIPE_SHADER_FRAGMENT:
+ break;
+ default:
+ return 0;
+ }
+
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 16384;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return 4;
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (shader == PIPE_SHADER_VERTEX)
+ return 32;
+ return 0x300 / 16;
+ case PIPE_SHADER_CAP_MAX_CONSTS:
+ return 65536 / 16;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return NV50_MAX_PIPE_CONSTBUFS;
+ case PIPE_SHADER_CAP_MAX_ADDRS:
+ return 1;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ return shader != PIPE_SHADER_FRAGMENT;
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+ return 0;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ return 0; /* please inline, or provide function declarations */
+ case PIPE_SHADER_CAP_INTEGERS:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+ return 32;
+ default:
+ NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
+{
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ return 10.0f;
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ return 64.0f;
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 4.0f;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0.0f;
+ }
+}
+
+static void
+nv50_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+
+ if (screen->base.fence.current) {
+ nouveau_fence_wait(screen->base.fence.current);
+ nouveau_fence_ref (NULL, &screen->base.fence.current);
+ }
+ if (screen->base.pushbuf)
+ screen->base.pushbuf->user_priv = NULL;
+
+ if (screen->blitter)
+ nv50_blitter_destroy(screen);
+
+ nouveau_bo_ref(NULL, &screen->code);
+ nouveau_bo_ref(NULL, &screen->tls_bo);
+ nouveau_bo_ref(NULL, &screen->stack_bo);
+ nouveau_bo_ref(NULL, &screen->txc);
+ nouveau_bo_ref(NULL, &screen->uniforms);
+ nouveau_bo_ref(NULL, &screen->fence.bo);
+
+ nouveau_heap_destroy(&screen->vp_code_heap);
+ nouveau_heap_destroy(&screen->gp_code_heap);
+ nouveau_heap_destroy(&screen->fp_code_heap);
+
+ FREE(screen->tic.entries);
+
+ nouveau_object_del(&screen->tesla);
+ nouveau_object_del(&screen->eng2d);
+ nouveau_object_del(&screen->m2mf);
+ nouveau_object_del(&screen->sync);
+
+ nouveau_screen_fini(&screen->base);
+
+ FREE(screen);
+}
+
+static void
+nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+
+ /* we need to do it after possible flush in MARK_RING */
+ *sequence = ++screen->base.fence.sequence;
+
+ PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
+ PUSH_DATAh(push, screen->fence.bo->offset);
+ PUSH_DATA (push, screen->fence.bo->offset);
+ PUSH_DATA (push, *sequence);
+ PUSH_DATA (push, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 |
+ NV50_3D_QUERY_GET_UNK4 |
+ NV50_3D_QUERY_GET_UNIT_CROP |
+ NV50_3D_QUERY_GET_TYPE_QUERY |
+ NV50_3D_QUERY_GET_QUERY_SELECT_ZERO |
+ NV50_3D_QUERY_GET_SHORT);
+}
+
+static u32
+nv50_screen_fence_update(struct pipe_screen *pscreen)
+{
+ return nv50_screen(pscreen)->fence.map[0];
+}
+
+static void
+nv50_screen_init_hwctx(struct nv50_screen *screen)
+{
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+ struct nv04_fifo *fifo;
+ unsigned i;
+
+ fifo = (struct nv04_fifo *)screen->base.channel->data;
+
+ BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->m2mf->handle);
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3);
+ PUSH_DATA (push, screen->sync->handle);
+ PUSH_DATA (push, fifo->vram);
+ PUSH_DATA (push, fifo->vram);
+
+ BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->eng2d->handle);
+ BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4);
+ PUSH_DATA (push, screen->sync->handle);
+ PUSH_DATA (push, fifo->vram);
+ PUSH_DATA (push, fifo->vram);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_2D(OPERATION), 1);
+ PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY);
+ BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, SUBC_2D(0x0888), 1);
+ PUSH_DATA (push, 1);
+
+ BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->tesla->handle);
+
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
+ BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1);
+ PUSH_DATA (push, screen->sync->handle);
+ BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11);
+ for (i = 0; i < 11; ++i)
+ PUSH_DATA(push, fifo->vram);
+ BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN);
+ for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i)
+ PUSH_DATA(push, fifo->vram);
+
+ BEGIN_NV04(push, NV50_3D(REG_MODE), 1);
+ PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED);
+ BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);
+ PUSH_DATA (push, 0xf);
+
+ if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
+ BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);
+ PUSH_DATA (push, 0x18);
+ }
+
+ BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, 1);
+
+ BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
+ PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1);
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(LINE_LAST_PIXEL), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1);
+ PUSH_DATA (push, 1);
+
+ if (screen->tesla->oclass >= NVA0_3D_CLASS) {
+ BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
+ PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
+ }
+
+ BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1);
+ PUSH_DATA (push, 0x3f);
+
+ BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));
+ PUSH_DATA (push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));
+
+ BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));
+ PUSH_DATA (push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));
+
+ BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));
+ PUSH_DATA (push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));
+
+ BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->tls_bo->offset);
+ PUSH_DATA (push, screen->tls_bo->offset);
+ PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
+
+ BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->stack_bo->offset);
+ PUSH_DATA (push, screen->stack_bo->offset);
+ PUSH_DATA (push, 4);
+
+ BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->uniforms->offset + (0 << 16));
+ PUSH_DATA (push, screen->uniforms->offset + (0 << 16));
+ PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000);
+
+ BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->uniforms->offset + (1 << 16));
+ PUSH_DATA (push, screen->uniforms->offset + (1 << 16));
+ PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000);
+
+ BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->uniforms->offset + (2 << 16));
+ PUSH_DATA (push, screen->uniforms->offset + (2 << 16));
+ PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000);
+
+ BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->uniforms->offset + (3 << 16));
+ PUSH_DATA (push, screen->uniforms->offset + (3 << 16));
+ PUSH_DATA (push, (NV50_CB_AUX << 16) | 0x0200);
+
+ BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3);
+ PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01);
+ PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21);
+ PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31);
+
+ /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */
+ BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
+ PUSH_DATA (push, ((1 << 9) << 6) | NV50_CB_AUX);
+ BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + (1 << 9));
+ PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + (1 << 9));
+
+ /* max TIC (bits 4:8) & TSC bindings, per program type */
+ for (i = 0; i < 3; ++i) {
+ BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1);
+ PUSH_DATA (push, 0x54);
+ }
+
+ BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
+
+ BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
+
+ BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1);
+ PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY);
+ BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
+ for (i = 0; i < 8 * 2; ++i)
+ PUSH_DATA(push, 0);
+ BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(0)), 2);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 1.0f);
+
+ BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+#ifdef NV50_SCISSORS_CLIPPING
+ PUSH_DATA (push, 0x0000);
+#else
+ PUSH_DATA (push, 0x1080);
+#endif
+
+ BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1);
+ PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT);
+
+ /* We use scissors instead of exact view volume clipping,
+ * so they're always enabled.
+ */
+ BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 3);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 8192 << 16);
+ PUSH_DATA (push, 8192 << 16);
+
+ BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1);
+ PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL);
+ BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1);
+ PUSH_DATA (push, 0x11111111);
+ BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);
+ PUSH_DATA (push, 1);
+
+ PUSH_KICK (push);
+}
+
+static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space,
+ uint64_t *tls_size)
+{
+ struct nouveau_device *dev = screen->base.device;
+ int ret;
+
+ screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) *
+ ONE_TEMP_SIZE;
+ if (nouveau_mesa_debug)
+ debug_printf("allocating space for %u temps\n",
+ util_next_power_of_two(tls_space / ONE_TEMP_SIZE));
+ *tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) *
+ screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
+ *tls_size, NULL, &screen->tls_bo);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
+{
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+ int ret;
+ uint64_t tls_size;
+
+ if (tls_space < screen->cur_tls_space)
+ return 0;
+ if (tls_space > screen->max_tls_space) {
+ /* fixable by limiting number of warps (LOCAL_WARPS_LOG_ALLOC /
+ * LOCAL_WARPS_NO_CLAMP) */
+ NOUVEAU_ERR("Unsupported number of temporaries (%u > %u). Fixable if someone cares.\n",
+ (unsigned)(tls_space / ONE_TEMP_SIZE),
+ (unsigned)(screen->max_tls_space / ONE_TEMP_SIZE));
+ return -ENOMEM;
+ }
+
+ nouveau_bo_ref(NULL, &screen->tls_bo);
+ ret = nv50_tls_alloc(screen, tls_space, &tls_size);
+ if (ret)
+ return ret;
+
+ BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->tls_bo->offset);
+ PUSH_DATA (push, screen->tls_bo->offset);
+ PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
+
+ return 1;
+}
+
+struct pipe_screen *
+nv50_screen_create(struct nouveau_device *dev)
+{
+ struct nv50_screen *screen;
+ struct pipe_screen *pscreen;
+ struct nouveau_object *chan;
+ uint64_t value;
+ uint32_t tesla_class;
+ unsigned stack_size;
+ int ret;
+
+ screen = CALLOC_STRUCT(nv50_screen);
+ if (!screen)
+ return NULL;
+ pscreen = &screen->base.base;
+
+ ret = nouveau_screen_init(&screen->base, dev);
+ if (ret) {
+ NOUVEAU_ERR("nouveau_screen_init failed: %d\n", ret);
+ goto fail;
+ }
+
+ /* TODO: Prevent FIFO prefetch before transfer of index buffers and
+ * admit them to VRAM.
+ */
+ screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_VERTEX_BUFFER;
+ screen->base.sysmem_bindings |=
+ PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
+
+ screen->base.pushbuf->user_priv = screen;
+ screen->base.pushbuf->rsvd_kick = 5;
+
+ chan = screen->base.channel;
+
+ pscreen->destroy = nv50_screen_destroy;
+ pscreen->context_create = nv50_create;
+ pscreen->is_format_supported = nv50_screen_is_format_supported;
+ pscreen->get_param = nv50_screen_get_param;
+ pscreen->get_shader_param = nv50_screen_get_shader_param;
+ pscreen->get_paramf = nv50_screen_get_paramf;
+
+ nv50_screen_init_resource_functions(pscreen);
+
+ if (screen->base.device->chipset < 0x84 ||
+ debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) {
+ /* PMPEG */
+ nouveau_screen_init_vdec(&screen->base);
+ } else if (screen->base.device->chipset < 0x98 ||
+ screen->base.device->chipset == 0xa0) {
+ /* VP2 */
+ screen->base.base.get_video_param = nv84_screen_get_video_param;
+ screen->base.base.is_video_format_supported = nv84_screen_video_supported;
+ } else {
+ /* VP3/4 */
+ screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;
+ screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
+ }
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
+ NULL, &screen->fence.bo);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate fence bo: %d\n", ret);
+ goto fail;
+ }
+
+ nouveau_bo_map(screen->fence.bo, 0, NULL);
+ screen->fence.map = screen->fence.bo->map;
+ screen->base.fence.emit = nv50_screen_fence_emit;
+ screen->base.fence.update = nv50_screen_fence_update;
+
+ ret = nouveau_object_new(chan, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS,
+ &(struct nv04_notify){ .length = 32 },
+ sizeof(struct nv04_notify), &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate notifier: %d\n", ret);
+ goto fail;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS,
+ NULL, 0, &screen->m2mf);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate PGRAPH context for M2MF: %d\n", ret);
+ goto fail;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef502d, NV50_2D_CLASS,
+ NULL, 0, &screen->eng2d);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate PGRAPH context for 2D: %d\n", ret);
+ goto fail;
+ }
+
+ switch (dev->chipset & 0xf0) {
+ case 0x50:
+ tesla_class = NV50_3D_CLASS;
+ break;
+ case 0x80:
+ case 0x90:
+ tesla_class = NV84_3D_CLASS;
+ break;
+ case 0xa0:
+ switch (dev->chipset) {
+ case 0xa0:
+ case 0xaa:
+ case 0xac:
+ tesla_class = NVA0_3D_CLASS;
+ break;
+ case 0xaf:
+ tesla_class = NVAF_3D_CLASS;
+ break;
+ default:
+ tesla_class = NVA3_3D_CLASS;
+ break;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", dev->chipset);
+ goto fail;
+ }
+ screen->base.class_3d = tesla_class;
+
+ ret = nouveau_object_new(chan, 0xbeef5097, tesla_class,
+ NULL, 0, &screen->tesla);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate PGRAPH context for 3D: %d\n", ret);
+ goto fail;
+ }
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
+ 3 << NV50_CODE_BO_SIZE_LOG2, NULL, &screen->code);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate code bo: %d\n", ret);
+ goto fail;
+ }
+
+ nouveau_heap_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
+ nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
+ nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
+
+ nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
+
+ screen->TPs = util_bitcount(value & 0xffff);
+ screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
+
+ stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
+ STACK_WARPS_ALLOC * 64 * 8;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, NULL,
+ &screen->stack_bo);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate stack bo: %d\n", ret);
+ goto fail;
+ }
+
+ uint64_t size_of_one_temp = util_next_power_of_two(screen->TPs) *
+ screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP *
+ ONE_TEMP_SIZE;
+ screen->max_tls_space = dev->vram_size / size_of_one_temp * ONE_TEMP_SIZE;
+ screen->max_tls_space /= 2; /* half of vram */
+
+ /* hw can address max 64 KiB */
+ screen->max_tls_space = MIN2(screen->max_tls_space, 64 << 10);
+
+ uint64_t tls_size;
+ unsigned tls_space = 4/*temps*/ * ONE_TEMP_SIZE;
+ ret = nv50_tls_alloc(screen, tls_space, &tls_size);
+ if (ret)
+ goto fail;
+
+ if (nouveau_mesa_debug)
+ debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n",
+ screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16, NULL,
+ &screen->uniforms);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret);
+ goto fail;
+ }
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16, NULL,
+ &screen->txc);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate TIC/TSC bo: %d\n", ret);
+ goto fail;
+ }
+
+ screen->tic.entries = CALLOC(4096, sizeof(void *));
+ screen->tsc.entries = screen->tic.entries + 2048;
+
+ if (!nv50_blitter_create(screen))
+ goto fail;
+
+ nv50_screen_init_hwctx(screen);
+
+ nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+
+ return pscreen;
+
+fail:
+ nv50_screen_destroy(pscreen);
+ return NULL;
+}
+
+int
+nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry)
+{
+ int i = screen->tic.next;
+
+ while (screen->tic.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
+
+ screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
+
+ if (screen->tic.entries[i])
+ nv50_tic_entry(screen->tic.entries[i])->id = -1;
+
+ screen->tic.entries[i] = entry;
+ return i;
+}
+
+int
+nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry)
+{
+ int i = screen->tsc.next;
+
+ while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
+
+ screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
+
+ if (screen->tsc.entries[i])
+ nv50_tsc_entry(screen->tsc.entries[i])->id = -1;
+
+ screen->tsc.entries[i] = entry;
+ return i;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
new file mode 100644
index 00000000000..091a3921a4b
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -0,0 +1,153 @@
+#ifndef __NV50_SCREEN_H__
+#define __NV50_SCREEN_H__
+
+#include "nouveau_screen.h"
+#include "nouveau_fence.h"
+#include "nouveau_mm.h"
+#include "nouveau_heap.h"
+
+#include "nv50/nv50_winsys.h"
+#include "nv50/nv50_stateobj.h"
+
+#define NV50_TIC_MAX_ENTRIES 2048
+#define NV50_TSC_MAX_ENTRIES 2048
+
+/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
+#define NV50_MAX_PIPE_CONSTBUFS 14
+
+struct nv50_context;
+
+#define NV50_CODE_BO_SIZE_LOG2 19
+
+#define NV50_SCREEN_RESIDENT_BO_COUNT 5
+
+struct nv50_blitter;
+
+struct nv50_screen {
+ struct nouveau_screen base;
+
+ struct nv50_context *cur_ctx;
+
+ struct nouveau_bo *code;
+ struct nouveau_bo *uniforms;
+ struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
+ struct nouveau_bo *stack_bo;
+ struct nouveau_bo *tls_bo;
+
+ unsigned TPs;
+ unsigned MPsInTP;
+ unsigned max_tls_space;
+ unsigned cur_tls_space;
+
+ struct nouveau_heap *vp_code_heap;
+ struct nouveau_heap *gp_code_heap;
+ struct nouveau_heap *fp_code_heap;
+
+ struct nv50_blitter *blitter;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NV50_TIC_MAX_ENTRIES / 32];
+ } tic;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NV50_TSC_MAX_ENTRIES / 32];
+ } tsc;
+
+ struct {
+ uint32_t *map;
+ struct nouveau_bo *bo;
+ } fence;
+
+ struct nouveau_object *sync;
+
+ struct nouveau_object *tesla;
+ struct nouveau_object *eng2d;
+ struct nouveau_object *m2mf;
+};
+
+static INLINE struct nv50_screen *
+nv50_screen(struct pipe_screen *screen)
+{
+ return (struct nv50_screen *)screen;
+}
+
+boolean nv50_blitter_create(struct nv50_screen *);
+void nv50_blitter_destroy(struct nv50_screen *);
+
+int nv50_screen_tic_alloc(struct nv50_screen *, void *);
+int nv50_screen_tsc_alloc(struct nv50_screen *, void *);
+
+static INLINE void
+nv50_resource_fence(struct nv04_resource *res, uint32_t flags)
+{
+ struct nv50_screen *screen = nv50_screen(res->base.screen);
+
+ if (res->mm) {
+ nouveau_fence_ref(screen->base.fence.current, &res->fence);
+ if (flags & NOUVEAU_BO_WR)
+ nouveau_fence_ref(screen->base.fence.current, &res->fence_wr);
+ }
+}
+
+static INLINE void
+nv50_resource_validate(struct nv04_resource *res, uint32_t flags)
+{
+ if (likely(res->bo)) {
+ if (flags & NOUVEAU_BO_WR)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING |
+ NOUVEAU_BUFFER_STATUS_DIRTY;
+ if (flags & NOUVEAU_BO_RD)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nv50_resource_fence(res, flags);
+ }
+}
+
+struct nv50_format {
+ uint32_t rt;
+ uint32_t tic;
+ uint32_t vtx;
+ uint32_t usage;
+};
+
+extern const struct nv50_format nv50_format_table[];
+
+static INLINE void
+nv50_screen_tic_unlock(struct nv50_screen *screen, struct nv50_tic_entry *tic)
+{
+ if (tic->id >= 0)
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+}
+
+static INLINE void
+nv50_screen_tsc_unlock(struct nv50_screen *screen, struct nv50_tsc_entry *tsc)
+{
+ if (tsc->id >= 0)
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+}
+
+static INLINE void
+nv50_screen_tic_free(struct nv50_screen *screen, struct nv50_tic_entry *tic)
+{
+ if (tic->id >= 0) {
+ screen->tic.entries[tic->id] = NULL;
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+ }
+}
+
+static INLINE void
+nv50_screen_tsc_free(struct nv50_screen *screen, struct nv50_tsc_entry *tsc)
+{
+ if (tsc->id >= 0) {
+ screen->tsc.entries[tsc->id] = NULL;
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+ }
+}
+
+extern int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
new file mode 100644
index 00000000000..9144fc48d95
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -0,0 +1,623 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+
+#include "nv50/nv50_context.h"
+
+void
+nv50_constbufs_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned s;
+
+ for (s = 0; s < 3; ++s) {
+ unsigned p;
+
+ if (s == PIPE_SHADER_FRAGMENT)
+ p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT;
+ else
+ if (s == PIPE_SHADER_GEOMETRY)
+ p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY;
+ else
+ p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX;
+
+ while (nv50->constbuf_dirty[s]) {
+ const int i = ffs(nv50->constbuf_dirty[s]) - 1;
+ nv50->constbuf_dirty[s] &= ~(1 << i);
+
+ if (nv50->constbuf[s][i].user) {
+ const unsigned b = NV50_CB_PVP + s;
+ unsigned start = 0;
+ unsigned words = nv50->constbuf[s][0].size / 4;
+ if (i) {
+ NOUVEAU_ERR("user constbufs only supported in slot 0\n");
+ continue;
+ }
+ if (!nv50->state.uniform_buffer_bound[s]) {
+ nv50->state.uniform_buffer_bound[s] = TRUE;
+ BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
+ PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
+ }
+ while (words) {
+ unsigned nr;
+
+ if (!PUSH_SPACE(push, 16))
+ break;
+ nr = PUSH_AVAIL(push);
+ assert(nr >= 16);
+ nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
+
+ BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
+ PUSH_DATA (push, (start << 8) | b);
+ BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
+ PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr);
+
+ start += nr;
+ words -= nr;
+ }
+ } else {
+ struct nv04_resource *res =
+ nv04_resource(nv50->constbuf[s][i].u.buf);
+ if (res) {
+ /* TODO: allocate persistent bindings */
+ const unsigned b = s * 16 + i;
+
+ assert(nouveau_resource_mapped_by_gpu(&res->base));
+
+ BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset);
+ PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset);
+ PUSH_DATA (push, (b << 16) |
+ (nv50->constbuf[s][i].size & 0xffff));
+ BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
+ PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
+
+ BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD);
+ } else {
+ BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
+ PUSH_DATA (push, (i << 8) | p | 0);
+ }
+ if (i == 0)
+ nv50->state.uniform_buffer_bound[s] = FALSE;
+ }
+ }
+ }
+}
+
+static boolean
+nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
+{
+ if (!prog->translated) {
+ prog->translated = nv50_program_translate(
+ prog, nv50->screen->base.device->chipset);
+ if (!prog->translated)
+ return FALSE;
+ } else
+ if (prog->mem)
+ return TRUE;
+
+ return nv50_program_upload_code(nv50, prog);
+}
+
+static INLINE void
+nv50_program_update_context_state(struct nv50_context *nv50,
+ struct nv50_program *prog, int stage)
+{
+ const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+
+ if (prog && prog->tls_space) {
+ if (nv50->state.new_tls_space)
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
+ if (!nv50->state.tls_required || nv50->state.new_tls_space)
+ BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo);
+ nv50->state.new_tls_space = FALSE;
+ nv50->state.tls_required |= 1 << stage;
+ } else {
+ if (nv50->state.tls_required == (1 << stage))
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
+ nv50->state.tls_required &= ~(1 << stage);
+ }
+}
+
+void
+nv50_vertprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *vp = nv50->vertprog;
+
+ if (!nv50_program_validate(nv50, vp))
+ return;
+ nv50_program_update_context_state(nv50, vp, 0);
+
+ BEGIN_NV04(push, NV50_3D(VP_ATTR_EN(0)), 2);
+ PUSH_DATA (push, vp->vp.attrs[0]);
+ PUSH_DATA (push, vp->vp.attrs[1]);
+ BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_RESULT), 1);
+ PUSH_DATA (push, vp->max_out);
+ BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_TEMP), 1);
+ PUSH_DATA (push, vp->max_gpr);
+ BEGIN_NV04(push, NV50_3D(VP_START_ID), 1);
+ PUSH_DATA (push, vp->code_base);
+}
+
+void
+nv50_fragprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *fp = nv50->fragprog;
+
+ if (!nv50_program_validate(nv50, fp))
+ return;
+ nv50_program_update_context_state(nv50, fp, 1);
+
+ BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1);
+ PUSH_DATA (push, fp->max_gpr);
+ BEGIN_NV04(push, NV50_3D(FP_RESULT_COUNT), 1);
+ PUSH_DATA (push, fp->max_out);
+ BEGIN_NV04(push, NV50_3D(FP_CONTROL), 1);
+ PUSH_DATA (push, fp->fp.flags[0]);
+ BEGIN_NV04(push, NV50_3D(FP_CTRL_UNK196C), 1);
+ PUSH_DATA (push, fp->fp.flags[1]);
+ BEGIN_NV04(push, NV50_3D(FP_START_ID), 1);
+ PUSH_DATA (push, fp->code_base);
+}
+
+void
+nv50_gmtyprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *gp = nv50->gmtyprog;
+
+ if (gp) {
+ BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_TEMP), 1);
+ PUSH_DATA (push, gp->max_gpr);
+ BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_RESULT), 1);
+ PUSH_DATA (push, gp->max_out);
+ BEGIN_NV04(push, NV50_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1);
+ PUSH_DATA (push, gp->gp.prim_type);
+ BEGIN_NV04(push, NV50_3D(GP_VERTEX_OUTPUT_COUNT), 1);
+ PUSH_DATA (push, gp->gp.vert_count);
+ BEGIN_NV04(push, NV50_3D(GP_START_ID), 1);
+ PUSH_DATA (push, gp->code_base);
+
+ nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */
+ }
+ nv50_program_update_context_state(nv50, gp, 2);
+
+ /* GP_ENABLE is updated in linkage validation */
+}
+
+static void
+nv50_sprite_coords_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ uint32_t pntc[8], mode;
+ struct nv50_program *fp = nv50->fragprog;
+ unsigned i, c;
+ unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff;
+
+ if (!nv50->rast->pipe.point_quad_rasterization) {
+ if (nv50->state.point_sprite) {
+ BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
+ for (i = 0; i < 8; ++i)
+ PUSH_DATA(push, 0);
+
+ nv50->state.point_sprite = FALSE;
+ }
+ return;
+ } else {
+ nv50->state.point_sprite = TRUE;
+ }
+
+ memset(pntc, 0, sizeof(pntc));
+
+ for (i = 0; i < fp->in_nr; i++) {
+ unsigned n = util_bitcount(fp->in[i].mask);
+
+ if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
+ m += n;
+ continue;
+ }
+ if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) {
+ m += n;
+ continue;
+ }
+
+ for (c = 0; c < 4; ++c) {
+ if (fp->in[i].mask & (1 << c)) {
+ pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
+ ++m;
+ }
+ }
+ }
+
+ if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
+ mode = 0x00;
+ else
+ mode = 0x10;
+
+ BEGIN_NV04(push, NV50_3D(POINT_SPRITE_CTRL), 1);
+ PUSH_DATA (push, mode);
+
+ BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
+ PUSH_DATAp(push, pntc, 8);
+}
+
+/* Validate state derived from shaders and the rasterizer cso. */
+void
+nv50_validate_derived_rs(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ uint32_t color, psize;
+
+ nv50_sprite_coords_validate(nv50);
+
+ if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) {
+ nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard;
+ BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
+ PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);
+ }
+
+ if (nv50->dirty & NV50_NEW_FRAGPROG)
+ return;
+ psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
+ color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN;
+
+ if (nv50->rast->pipe.clamp_vertex_color)
+ color |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
+
+ if (color != nv50->state.semantic_color) {
+ nv50->state.semantic_color = color;
+ BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 1);
+ PUSH_DATA (push, color);
+ }
+
+ if (nv50->rast->pipe.point_size_per_vertex)
+ psize |= NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
+
+ if (psize != nv50->state.semantic_psize) {
+ nv50->state.semantic_psize = psize;
+ BEGIN_NV04(push, NV50_3D(SEMANTIC_PTSZ), 1);
+ PUSH_DATA (push, psize);
+ }
+}
+
+static int
+nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4],
+ struct nv50_varying *in, struct nv50_varying *out)
+{
+ int c;
+ uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
+
+ for (c = 0; c < 4; ++c) {
+ if (mf & 1) {
+ if (in->linear)
+ lin[mid / 32] |= 1 << (mid % 32);
+ if (mv & 1)
+ map[mid] = oid;
+ else
+ if (c == 3)
+ map[mid] |= 1;
+ ++mid;
+ }
+
+ oid += mv & 1;
+ mf >>= 1;
+ mv >>= 1;
+ }
+
+ return mid;
+}
+
+void
+nv50_fp_linkage_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog;
+ struct nv50_program *fp = nv50->fragprog;
+ struct nv50_varying dummy;
+ int i, n, c, m;
+ uint32_t primid = 0;
+ uint32_t psiz = 0x000;
+ uint32_t interp = fp->fp.interp;
+ uint32_t colors = fp->fp.colors;
+ uint32_t lin[4];
+ uint8_t map[64];
+ uint8_t so_map[64];
+
+ if (!(nv50->dirty & (NV50_NEW_VERTPROG |
+ NV50_NEW_FRAGPROG |
+ NV50_NEW_GMTYPROG))) {
+ uint8_t bfc, ffc;
+ ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK);
+ bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK)
+ >> 8;
+ if (nv50->rast->pipe.light_twoside == ((ffc == bfc) ? 0 : 1))
+ return;
+ }
+
+ memset(lin, 0x00, sizeof(lin));
+
+ /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx
+ * or is it the first byte ?
+ */
+ memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map));
+
+ dummy.mask = 0xf; /* map all components of HPOS */
+ dummy.linear = 0;
+ m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
+
+ for (c = 0; c < vp->vp.clpd_nr; ++c)
+ map[m++] = vp->vp.clpd[c / 4] + (c % 4);
+
+ colors |= m << 8; /* adjust BFC0 id */
+
+ dummy.mask = 0x0;
+
+ /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */
+ if (nv50->rast->pipe.light_twoside) {
+ for (i = 0; i < 2; ++i) {
+ n = vp->vp.bfc[i];
+ if (fp->vp.bfc[i] >= fp->in_nr)
+ continue;
+ m = nv50_vec4_map(map, m, lin, &fp->in[fp->vp.bfc[i]],
+ (n < vp->out_nr) ? &vp->out[n] : &dummy);
+ }
+ }
+ colors += m - 4; /* adjust FFC0 id */
+ interp |= m << 8; /* set map id where 'normal' FP inputs start */
+
+ for (i = 0; i < fp->in_nr; ++i) {
+ for (n = 0; n < vp->out_nr; ++n)
+ if (vp->out[n].sn == fp->in[i].sn &&
+ vp->out[n].si == fp->in[i].si)
+ break;
+ m = nv50_vec4_map(map, m, lin,
+ &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
+ }
+
+ /* PrimitiveID either is replaced by the system value, or
+ * written by the geometry shader into an output register
+ */
+ if (fp->gp.primid < 0x80) {
+ primid = m;
+ map[m++] = vp->gp.primid;
+ }
+
+ if (nv50->rast->pipe.point_size_per_vertex) {
+ psiz = (m << 4) | 1;
+ map[m++] = vp->vp.psiz;
+ }
+
+ if (nv50->rast->pipe.clamp_vertex_color)
+ colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
+
+ if (unlikely(vp->so)) {
+ /* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP
+ * gets written.
+ *
+ * TODO:
+ * Inverting vp->so->map (output -> offset) would probably speed this up.
+ */
+ memset(so_map, 0, sizeof(so_map));
+ for (i = 0; i < vp->so->map_size; ++i) {
+ if (vp->so->map[i] == 0xff)
+ continue;
+ for (c = 0; c < m; ++c)
+ if (map[c] == vp->so->map[i] && !so_map[c])
+ break;
+ if (c == m) {
+ c = m;
+ map[m++] = vp->so->map[i];
+ }
+ so_map[c] = 0x80 | i;
+ }
+ for (c = m; c & 3; ++c)
+ so_map[c] = 0;
+ }
+
+ n = (m + 3) / 4;
+ assert(m <= 64);
+
+ if (unlikely(nv50->gmtyprog)) {
+ BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP_SIZE), 1);
+ PUSH_DATA (push, m);
+ BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP(0)), n);
+ PUSH_DATAp(push, map, n);
+ } else {
+ BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);
+ PUSH_DATA (push, vp->vp.attrs[2]);
+
+ BEGIN_NV04(push, NV50_3D(SEMANTIC_PRIM_ID), 1);
+ PUSH_DATA (push, primid);
+
+ BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);
+ PUSH_DATA (push, m);
+ BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
+ PUSH_DATAp(push, map, n);
+ }
+
+ BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 4);
+ PUSH_DATA (push, colors);
+ PUSH_DATA (push, (vp->vp.clpd_nr << 8) | 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, psiz);
+
+ BEGIN_NV04(push, NV50_3D(FP_INTERPOLANT_CTRL), 1);
+ PUSH_DATA (push, interp);
+
+ nv50->state.interpolant_ctrl = interp;
+
+ nv50->state.semantic_color = colors;
+ nv50->state.semantic_psize = psiz;
+
+ BEGIN_NV04(push, NV50_3D(NOPERSPECTIVE_BITMAP(0)), 4);
+ PUSH_DATAp(push, lin, 4);
+
+ BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1);
+ PUSH_DATA (push, nv50->gmtyprog ? 1 : 0);
+
+ if (vp->so) {
+ BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n);
+ PUSH_DATAp(push, so_map, n);
+ }
+}
+
+static int
+nv50_vp_gp_mapping(uint8_t *map, int m,
+ struct nv50_program *vp, struct nv50_program *gp)
+{
+ int i, j, c;
+
+ for (i = 0; i < gp->in_nr; ++i) {
+ uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
+
+ for (j = 0; j < vp->out_nr; ++j) {
+ if (vp->out[j].sn == gp->in[i].sn &&
+ vp->out[j].si == gp->in[i].si) {
+ mv = vp->out[j].mask;
+ oid = vp->out[j].hw;
+ break;
+ }
+ }
+
+ for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
+ if (mg & mv & 1)
+ map[m++] = oid;
+ else
+ if (mg & 1)
+ map[m++] = (c == 3) ? 0x41 : 0x40;
+ oid += mv & 1;
+ }
+ }
+ return m;
+}
+
+void
+nv50_gp_linkage_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *vp = nv50->vertprog;
+ struct nv50_program *gp = nv50->gmtyprog;
+ int m = 0;
+ int n;
+ uint8_t map[64];
+
+ if (!gp)
+ return;
+ memset(map, 0, sizeof(map));
+
+ m = nv50_vp_gp_mapping(map, m, vp, gp);
+
+ n = (m + 3) / 4;
+
+ BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);
+ PUSH_DATA (push, vp->vp.attrs[2] | gp->vp.attrs[2]);
+
+ BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);
+ PUSH_DATA (push, m);
+ BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
+ PUSH_DATAp(push, map, n);
+}
+
+void
+nv50_stream_output_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_stream_output_state *so;
+ uint32_t ctrl;
+ unsigned i;
+ unsigned prims = ~0;
+
+ so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so;
+
+ BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ if (!so || !nv50->num_so_targets) {
+ if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
+ BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
+ PUSH_DATA (push, 0);
+ }
+ BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
+ PUSH_DATA (push, 1);
+ return;
+ }
+
+ /* previous TFB needs to complete */
+ if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
+ BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ ctrl = so->ctrl;
+ if (nv50->screen->base.class_3d >= NVA0_3D_CLASS)
+ ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET;
+
+ BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
+ PUSH_DATA (push, ctrl);
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
+
+ for (i = 0; i < nv50->num_so_targets; ++i) {
+ struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
+ struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
+
+ const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
+
+ if (n == 4 && !targ->clean)
+ nv84_query_fifo_wait(push, targ->pq);
+ BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
+ PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
+ PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
+ PUSH_DATA (push, so->num_attribs[i]);
+ if (n == 4) {
+ PUSH_DATA(push, targ->pipe.buffer_size);
+
+ BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
+ if (!targ->clean) {
+ assert(targ->pq);
+ nv50_query_pushbuf_submit(push, targ->pq, 0x4);
+ } else {
+ PUSH_DATA(push, 0);
+ targ->clean = FALSE;
+ }
+ } else {
+ const unsigned limit = targ->pipe.buffer_size /
+ (so->stride[i] * nv50->state.prim_size);
+ prims = MIN2(prims, limit);
+ }
+ BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
+ }
+ if (prims != ~0) {
+ BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
+ PUSH_DATA (push, prims);
+ }
+ BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
+ PUSH_DATA (push, 1);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
new file mode 100644
index 00000000000..7dceb51c19e
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -0,0 +1,1110 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_helpers.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+#include "util/u_format_srgb.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv50/nv50_stateobj.h"
+#include "nv50/nv50_context.h"
+
+#include "nv50/nv50_3d.xml.h"
+#include "nv50/nv50_texture.xml.h"
+
+#include "nouveau_gldefs.h"
+
+/* Caveats:
+ * ! pipe_sampler_state.normalized_coords is ignored - rectangle textures will
+ * use non-normalized coordinates, everything else won't
+ * (The relevant bit is in the TIC entry and not the TSC entry.)
+ *
+ * ! pipe_sampler_state.seamless_cube_map is ignored - seamless filtering is
+ * always activated on NVA0 +
+ * (Give me the global bit, otherwise it's not worth the CPU work.)
+ *
+ * ! pipe_sampler_state.border_color is not swizzled according to the texture
+ * swizzle in pipe_sampler_view
+ * (This will be ugly with indirect independent texture/sampler access,
+ * we'd have to emulate the logic in the shader. GL doesn't have that,
+ * D3D doesn't have swizzle, if we knew what we were implementing we'd be
+ * good.)
+ *
+ * ! pipe_rasterizer_state.line_last_pixel is ignored - it is never drawn
+ *
+ * ! pipe_rasterizer_state.flatshade_first also applies to QUADS
+ * (There's a GL query for that, forcing an exception is just ridiculous.)
+ *
+ * ! pipe_rasterizer_state.half_pixel_center is ignored - pixel centers
+ * are always at half integer coordinates and the top-left rule applies
+ * (There does not seem to be a hardware switch for this.)
+ *
+ * ! pipe_rasterizer_state.sprite_coord_enable is masked with 0xff on NVC0
+ * (The hardware only has 8 slots meant for TexCoord and we have to assign
+ * in advance to maintain elegant separate shader objects.)
+ */
+
+static INLINE uint32_t
+nv50_colormask(unsigned mask)
+{
+ uint32_t ret = 0;
+
+ if (mask & PIPE_MASK_R)
+ ret |= 0x0001;
+ if (mask & PIPE_MASK_G)
+ ret |= 0x0010;
+ if (mask & PIPE_MASK_B)
+ ret |= 0x0100;
+ if (mask & PIPE_MASK_A)
+ ret |= 0x1000;
+
+ return ret;
+}
+
+#define NV50_BLEND_FACTOR_CASE(a, b) \
+ case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b
+
+static INLINE uint32_t
+nv50_blend_fac(unsigned factor)
+{
+ switch (factor) {
+ NV50_BLEND_FACTOR_CASE(ONE, ONE);
+ NV50_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR);
+ NV50_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA);
+ NV50_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA);
+ NV50_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR);
+ NV50_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE);
+ NV50_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR);
+ NV50_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA);
+ NV50_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR);
+ NV50_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA);
+ NV50_BLEND_FACTOR_CASE(ZERO, ZERO);
+ NV50_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR);
+ NV50_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA);
+ NV50_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA);
+ NV50_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR);
+ NV50_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR);
+ NV50_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA);
+ NV50_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR);
+ NV50_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA);
+ default:
+ return NV50_3D_BLEND_FACTOR_ZERO;
+ }
+}
+
+static void *
+nv50_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj);
+ int i;
+ boolean emit_common_func = cso->rt[0].blend_enable;
+ uint32_t ms;
+
+ if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
+ SB_BEGIN_3D(so, BLEND_INDEPENDENT, 1);
+ SB_DATA (so, cso->independent_blend_enable);
+ }
+
+ so->pipe = *cso;
+
+ SB_BEGIN_3D(so, COLOR_MASK_COMMON, 1);
+ SB_DATA (so, !cso->independent_blend_enable);
+
+ SB_BEGIN_3D(so, BLEND_ENABLE_COMMON, 1);
+ SB_DATA (so, !cso->independent_blend_enable);
+
+ if (cso->independent_blend_enable) {
+ SB_BEGIN_3D(so, BLEND_ENABLE(0), 8);
+ for (i = 0; i < 8; ++i) {
+ SB_DATA(so, cso->rt[i].blend_enable);
+ if (cso->rt[i].blend_enable)
+ emit_common_func = TRUE;
+ }
+
+ if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
+ emit_common_func = FALSE;
+
+ for (i = 0; i < 8; ++i) {
+ if (!cso->rt[i].blend_enable)
+ continue;
+ SB_BEGIN_3D_(so, NVA3_3D_IBLEND_EQUATION_RGB(i), 6);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func));
+ SB_DATA (so, nv50_blend_fac(cso->rt[i].rgb_src_factor));
+ SB_DATA (so, nv50_blend_fac(cso->rt[i].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func));
+ SB_DATA (so, nv50_blend_fac(cso->rt[i].alpha_src_factor));
+ SB_DATA (so, nv50_blend_fac(cso->rt[i].alpha_dst_factor));
+ }
+ }
+ } else {
+ SB_BEGIN_3D(so, BLEND_ENABLE(0), 1);
+ SB_DATA (so, cso->rt[0].blend_enable);
+ }
+
+ if (emit_common_func) {
+ SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
+ SB_DATA (so, nv50_blend_fac(cso->rt[0].rgb_src_factor));
+ SB_DATA (so, nv50_blend_fac(cso->rt[0].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].alpha_func));
+ SB_DATA (so, nv50_blend_fac(cso->rt[0].alpha_src_factor));
+ SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1);
+ SB_DATA (so, nv50_blend_fac(cso->rt[0].alpha_dst_factor));
+ }
+
+ if (cso->logicop_enable) {
+ SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_logicop_func(cso->logicop_func));
+ } else {
+ SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 1);
+ SB_DATA (so, 0);
+ }
+
+ if (cso->independent_blend_enable) {
+ SB_BEGIN_3D(so, COLOR_MASK(0), 8);
+ for (i = 0; i < 8; ++i)
+ SB_DATA(so, nv50_colormask(cso->rt[i].colormask));
+ } else {
+ SB_BEGIN_3D(so, COLOR_MASK(0), 1);
+ SB_DATA (so, nv50_colormask(cso->rt[0].colormask));
+ }
+
+ ms = 0;
+ if (cso->alpha_to_coverage)
+ ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
+ if (cso->alpha_to_one)
+ ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
+
+ SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1);
+ SB_DATA (so, ms);
+
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
+ return so;
+}
+
+static void
+nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->blend = hwcso;
+ nv50->dirty |= NV50_NEW_BLEND;
+}
+
+static void
+nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+/* NOTE: ignoring line_last_pixel, using FALSE (set on screen init) */
+static void *
+nv50_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv50_rasterizer_stateobj *so;
+ uint32_t reg;
+
+ so = CALLOC_STRUCT(nv50_rasterizer_stateobj);
+ if (!so)
+ return NULL;
+ so->pipe = *cso;
+
+#ifndef NV50_SCISSORS_CLIPPING
+ SB_BEGIN_3D(so, SCISSOR_ENABLE(0), 1);
+ SB_DATA (so, cso->scissor);
+#endif
+
+ SB_BEGIN_3D(so, SHADE_MODEL, 1);
+ SB_DATA (so, cso->flatshade ? NV50_3D_SHADE_MODEL_FLAT :
+ NV50_3D_SHADE_MODEL_SMOOTH);
+ SB_BEGIN_3D(so, PROVOKING_VERTEX_LAST, 1);
+ SB_DATA (so, !cso->flatshade_first);
+ SB_BEGIN_3D(so, VERTEX_TWO_SIDE_ENABLE, 1);
+ SB_DATA (so, cso->light_twoside);
+
+ SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1);
+ SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000);
+
+ SB_BEGIN_3D(so, MULTISAMPLE_ENABLE, 1);
+ SB_DATA (so, cso->multisample);
+
+ SB_BEGIN_3D(so, LINE_WIDTH, 1);
+ SB_DATA (so, fui(cso->line_width));
+ SB_BEGIN_3D(so, LINE_SMOOTH_ENABLE, 1);
+ SB_DATA (so, cso->line_smooth);
+
+ SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1);
+ if (cso->line_stipple_enable) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, LINE_STIPPLE, 1);
+ SB_DATA (so, (cso->line_stipple_pattern << 8) |
+ cso->line_stipple_factor);
+ } else {
+ SB_DATA (so, 0);
+ }
+
+ if (!cso->point_size_per_vertex) {
+ SB_BEGIN_3D(so, POINT_SIZE, 1);
+ SB_DATA (so, fui(cso->point_size));
+ }
+ SB_BEGIN_3D(so, POINT_SPRITE_ENABLE, 1);
+ SB_DATA (so, cso->point_quad_rasterization);
+ SB_BEGIN_3D(so, POINT_SMOOTH_ENABLE, 1);
+ SB_DATA (so, cso->point_smooth);
+
+ SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 3);
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_front));
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_back));
+ SB_DATA (so, cso->poly_smooth);
+
+ SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3);
+ SB_DATA (so, cso->cull_face != PIPE_FACE_NONE);
+ SB_DATA (so, cso->front_ccw ? NV50_3D_FRONT_FACE_CCW :
+ NV50_3D_FRONT_FACE_CW);
+ switch (cso->cull_face) {
+ case PIPE_FACE_FRONT_AND_BACK:
+ SB_DATA(so, NV50_3D_CULL_FACE_FRONT_AND_BACK);
+ break;
+ case PIPE_FACE_FRONT:
+ SB_DATA(so, NV50_3D_CULL_FACE_FRONT);
+ break;
+ case PIPE_FACE_BACK:
+ default:
+ SB_DATA(so, NV50_3D_CULL_FACE_BACK);
+ break;
+ }
+
+ SB_BEGIN_3D(so, POLYGON_STIPPLE_ENABLE, 1);
+ SB_DATA (so, cso->poly_stipple_enable);
+ SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3);
+ SB_DATA (so, cso->offset_point);
+ SB_DATA (so, cso->offset_line);
+ SB_DATA (so, cso->offset_tri);
+
+ if (cso->offset_point || cso->offset_line || cso->offset_tri) {
+ SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1);
+ SB_DATA (so, fui(cso->offset_scale));
+ SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1);
+ SB_DATA (so, fui(cso->offset_units * 2.0f));
+ SB_BEGIN_3D(so, POLYGON_OFFSET_CLAMP, 1);
+ SB_DATA (so, fui(cso->offset_clamp));
+ }
+
+ if (cso->depth_clip) {
+ reg = 0;
+ } else {
+ reg =
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR |
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR |
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1;
+ }
+#ifndef NV50_SCISSORS_CLIPPING
+ reg |=
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 |
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1;
+#endif
+ SB_BEGIN_3D(so, VIEW_VOLUME_CLIP_CTRL, 1);
+ SB_DATA (so, reg);
+
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
+}
+
+static void
+nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->rast = hwcso;
+ nv50->dirty |= NV50_NEW_RASTERIZER;
+}
+
+static void
+nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv50_zsa_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv50_zsa_stateobj *so = CALLOC_STRUCT(nv50_zsa_stateobj);
+
+ so->pipe = *cso;
+
+ SB_BEGIN_3D(so, DEPTH_WRITE_ENABLE, 1);
+ SB_DATA (so, cso->depth.writemask);
+ SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1);
+ if (cso->depth.enabled) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1);
+ SB_DATA (so, nvgl_comparison_op(cso->depth.func));
+ } else {
+ SB_DATA (so, 0);
+ }
+
+ if (cso->stencil[0].enabled) {
+ SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func));
+ SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2);
+ SB_DATA (so, cso->stencil[0].writemask);
+ SB_DATA (so, cso->stencil[0].valuemask);
+ } else {
+ SB_BEGIN_3D(so, STENCIL_ENABLE, 1);
+ SB_DATA (so, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ assert(cso->stencil[0].enabled);
+ SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func));
+ SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2);
+ SB_DATA (so, cso->stencil[1].writemask);
+ SB_DATA (so, cso->stencil[1].valuemask);
+ } else {
+ SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 1);
+ SB_DATA (so, 0);
+ }
+
+ SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1);
+ if (cso->alpha.enabled) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, ALPHA_TEST_REF, 2);
+ SB_DATA (so, fui(cso->alpha.ref_value));
+ SB_DATA (so, nvgl_comparison_op(cso->alpha.func));
+ } else {
+ SB_DATA (so, 0);
+ }
+
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
+}
+
+static void
+nv50_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->zsa = hwcso;
+ nv50->dirty |= NV50_NEW_ZSA;
+}
+
+static void
+nv50_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+/* ====================== SAMPLERS AND TEXTURES ================================
+ */
+
+#define NV50_TSC_WRAP_CASE(n) \
+ case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
+
+static INLINE unsigned
+nv50_tsc_wrap_mode(unsigned wrap)
+{
+ switch (wrap) {
+ NV50_TSC_WRAP_CASE(REPEAT);
+ NV50_TSC_WRAP_CASE(MIRROR_REPEAT);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(CLAMP);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP);
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ return NV50_TSC_WRAP_REPEAT;
+ }
+}
+
+void *
+nv50_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv50_tsc_entry *so = MALLOC_STRUCT(nv50_tsc_entry);
+ float f[2];
+
+ so->id = -1;
+
+ so->tsc[0] = (0x00026000 |
+ (nv50_tsc_wrap_mode(cso->wrap_s) << 0) |
+ (nv50_tsc_wrap_mode(cso->wrap_t) << 3) |
+ (nv50_tsc_wrap_mode(cso->wrap_r) << 6));
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ so->tsc[1] = NV50_TSC_1_MAGF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ so->tsc[1] = NV50_TSC_1_MAGF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ so->tsc[1] |= NV50_TSC_1_MINF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ so->tsc[1] |= NV50_TSC_1_MINF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ so->tsc[1] |= NV50_TSC_1_MIPF_NONE;
+ break;
+ }
+
+ if (nouveau_screen(pipe->screen)->class_3d >= NVE4_3D_CLASS) {
+ if (cso->seamless_cube_map)
+ so->tsc[1] |= NVE4_TSC_1_CUBE_SEAMLESS;
+ if (!cso->normalized_coords)
+ so->tsc[1] |= NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS;
+ }
+
+ if (cso->max_anisotropy >= 16)
+ so->tsc[0] |= (7 << 20);
+ else
+ if (cso->max_anisotropy >= 12)
+ so->tsc[0] |= (6 << 20);
+ else {
+ so->tsc[0] |= (cso->max_anisotropy >> 1) << 20;
+
+ if (cso->max_anisotropy >= 4)
+ so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35;
+ else
+ if (cso->max_anisotropy >= 2)
+ so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15;
+ }
+
+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ /* NOTE: must be deactivated for non-shadow textures */
+ so->tsc[0] |= (1 << 9);
+ so->tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
+ }
+
+ f[0] = CLAMP(cso->lod_bias, -16.0f, 15.0f);
+ so->tsc[1] |= ((int)(f[0] * 256.0f) & 0x1fff) << 12;
+
+ f[0] = CLAMP(cso->min_lod, 0.0f, 15.0f);
+ f[1] = CLAMP(cso->max_lod, 0.0f, 15.0f);
+ so->tsc[2] =
+ (((int)(f[1] * 256.0f) & 0xfff) << 12) | ((int)(f[0] * 256.0f) & 0xfff);
+
+ so->tsc[2] |=
+ util_format_linear_float_to_srgb_8unorm(cso->border_color.f[0]) << 24;
+ so->tsc[3] =
+ util_format_linear_float_to_srgb_8unorm(cso->border_color.f[1]) << 12;
+ so->tsc[3] |=
+ util_format_linear_float_to_srgb_8unorm(cso->border_color.f[2]) << 20;
+
+ so->tsc[4] = fui(cso->border_color.f[0]);
+ so->tsc[5] = fui(cso->border_color.f[1]);
+ so->tsc[6] = fui(cso->border_color.f[2]);
+ so->tsc[7] = fui(cso->border_color.f[3]);
+
+ return (void *)so;
+}
+
+static void
+nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ unsigned s, i;
+
+ for (s = 0; s < 3; ++s)
+ for (i = 0; i < nv50_context(pipe)->num_samplers[s]; ++i)
+ if (nv50_context(pipe)->samplers[s][i] == hwcso)
+ nv50_context(pipe)->samplers[s][i] = NULL;
+
+ nv50_screen_tsc_free(nv50_context(pipe)->screen, nv50_tsc_entry(hwcso));
+
+ FREE(hwcso);
+}
+
+static INLINE void
+nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
+ unsigned nr, void **hwcso)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; ++i) {
+ struct nv50_tsc_entry *old = nv50->samplers[s][i];
+
+ nv50->samplers[s][i] = nv50_tsc_entry(hwcso[i]);
+ if (old)
+ nv50_screen_tsc_unlock(nv50->screen, old);
+ }
+ for (; i < nv50->num_samplers[s]; ++i)
+ if (nv50->samplers[s][i])
+ nv50_screen_tsc_unlock(nv50->screen, nv50->samplers[s][i]);
+
+ nv50->num_samplers[s] = nr;
+
+ nv50->dirty |= NV50_NEW_SAMPLERS;
+}
+
+static void
+nv50_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nv50_stage_sampler_states_bind(nv50_context(pipe), 0, nr, s);
+}
+
+static void
+nv50_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nv50_stage_sampler_states_bind(nv50_context(pipe), 2, nr, s);
+}
+
+static void
+nv50_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nv50_stage_sampler_states_bind(nv50_context(pipe), 1, nr, s);
+}
+
+/* NOTE: only called when not referenced anywhere, won't be bound */
+static void
+nv50_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+
+ nv50_screen_tic_free(nv50_context(pipe)->screen, nv50_tic_entry(view));
+
+ FREE(nv50_tic_entry(view));
+}
+
+static INLINE void
+nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]);
+ if (old)
+ nv50_screen_tic_unlock(nv50->screen, old);
+
+ pipe_sampler_view_reference(&nv50->textures[s][i], views[i]);
+ }
+
+ for (i = nr; i < nv50->num_textures[s]; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]);
+ if (!old)
+ continue;
+ nv50_screen_tic_unlock(nv50->screen, old);
+
+ pipe_sampler_view_reference(&nv50->textures[s][i], NULL);
+ }
+
+ nv50->num_textures[s] = nr;
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+
+ nv50->dirty |= NV50_NEW_TEXTURES;
+}
+
+static void
+nv50_vp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nv50_stage_set_sampler_views(nv50_context(pipe), 0, nr, views);
+}
+
+static void
+nv50_fp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nv50_stage_set_sampler_views(nv50_context(pipe), 2, nr, views);
+}
+
+static void
+nv50_gp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nv50_stage_set_sampler_views(nv50_context(pipe), 1, nr, views);
+}
+
+/* ============================= SHADERS =======================================
+ */
+
+static void *
+nv50_sp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso, unsigned type)
+{
+ struct nv50_program *prog;
+
+ prog = CALLOC_STRUCT(nv50_program);
+ if (!prog)
+ return NULL;
+
+ prog->type = type;
+ prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ if (cso->stream_output.num_outputs)
+ prog->pipe.stream_output = cso->stream_output;
+
+ return (void *)prog;
+}
+
+static void
+nv50_sp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_program *prog = (struct nv50_program *)hwcso;
+
+ nv50_program_destroy(nv50_context(pipe), prog);
+
+ FREE((void *)prog->pipe.tokens);
+ FREE(prog);
+}
+
+static void *
+nv50_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nv50_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX);
+}
+
+static void
+nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->vertprog = hwcso;
+ nv50->dirty |= NV50_NEW_VERTPROG;
+}
+
+static void *
+nv50_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nv50_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT);
+}
+
+static void
+nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->fragprog = hwcso;
+ nv50->dirty |= NV50_NEW_FRAGPROG;
+}
+
+static void *
+nv50_gp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nv50_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY);
+}
+
+static void
+nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->gmtyprog = hwcso;
+ nv50->dirty |= NV50_NEW_GMTYPROG;
+}
+
+static void
+nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ struct pipe_constant_buffer *cb)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct pipe_resource *res = cb ? cb->buffer : NULL;
+ const unsigned s = nv50_context_shader_stage(shader);
+ const unsigned i = index;
+
+ if (shader == PIPE_SHADER_COMPUTE)
+ return;
+
+ if (nv50->constbuf[s][i].user)
+ nv50->constbuf[s][i].u.buf = NULL;
+ else
+ if (nv50->constbuf[s][i].u.buf)
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i));
+
+ pipe_resource_reference(&nv50->constbuf[s][i].u.buf, res);
+
+ nv50->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE;
+ if (nv50->constbuf[s][i].user) {
+ nv50->constbuf[s][i].u.data = cb->user_buffer;
+ nv50->constbuf[s][i].size = cb->buffer_size;
+ nv50->constbuf_valid[s] |= 1 << i;
+ } else
+ if (res) {
+ nv50->constbuf[s][i].offset = cb->buffer_offset;
+ nv50->constbuf[s][i].size = align(cb->buffer_size, 0x100);
+ nv50->constbuf_valid[s] |= 1 << i;
+ } else {
+ nv50->constbuf_valid[s] &= ~(1 << i);
+ }
+ nv50->constbuf_dirty[s] |= 1 << i;
+
+ nv50->dirty |= NV50_NEW_CONSTBUF;
+}
+
+/* =============================================================================
+ */
+
+static void
+nv50_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->blend_colour = *bcol;
+ nv50->dirty |= NV50_NEW_BLEND_COLOUR;
+}
+
+static void
+nv50_set_stencil_ref(struct pipe_context *pipe,
+ const struct pipe_stencil_ref *sr)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->stencil_ref = *sr;
+ nv50->dirty |= NV50_NEW_STENCIL_REF;
+}
+
+static void
+nv50_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ memcpy(nv50->clip.ucp, clip->ucp, sizeof(clip->ucp));
+
+ nv50->dirty |= NV50_NEW_CLIP;
+}
+
+static void
+nv50_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->sample_mask = sample_mask;
+ nv50->dirty |= NV50_NEW_SAMPLE_MASK;
+}
+
+
+static void
+nv50_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ unsigned i;
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+
+ for (i = 0; i < fb->nr_cbufs; ++i)
+ pipe_surface_reference(&nv50->framebuffer.cbufs[i], fb->cbufs[i]);
+ for (; i < nv50->framebuffer.nr_cbufs; ++i)
+ pipe_surface_reference(&nv50->framebuffer.cbufs[i], NULL);
+
+ nv50->framebuffer.nr_cbufs = fb->nr_cbufs;
+
+ nv50->framebuffer.width = fb->width;
+ nv50->framebuffer.height = fb->height;
+
+ pipe_surface_reference(&nv50->framebuffer.zsbuf, fb->zsbuf);
+
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+}
+
+static void
+nv50_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->stipple = *stipple;
+ nv50->dirty |= NV50_NEW_STIPPLE;
+}
+
+static void
+nv50_set_scissor_states(struct pipe_context *pipe,
+ unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *scissor)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->scissor = *scissor;
+ nv50->dirty |= NV50_NEW_SCISSOR;
+}
+
+static void
+nv50_set_viewport_states(struct pipe_context *pipe,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->viewport = *vpt;
+ nv50->dirty |= NV50_NEW_VIEWPORT;
+}
+
+static void
+nv50_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned start_slot, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ unsigned i;
+
+ util_set_vertex_buffers_count(nv50->vtxbuf, &nv50->num_vtxbufs, vb,
+ start_slot, count);
+
+ if (!vb) {
+ nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot);
+ nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot);
+ return;
+ }
+
+ for (i = 0; i < count; ++i) {
+ unsigned dst_index = start_slot + i;
+
+ if (!vb[i].buffer && vb[i].user_buffer) {
+ nv50->vbo_user |= 1 << dst_index;
+ if (!vb[i].stride)
+ nv50->vbo_constant |= 1 << dst_index;
+ else
+ nv50->vbo_constant &= ~(1 << dst_index);
+ } else {
+ nv50->vbo_user &= ~(1 << dst_index);
+ nv50->vbo_constant &= ~(1 << dst_index);
+ }
+ }
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
+
+ nv50->dirty |= NV50_NEW_ARRAYS;
+}
+
+static void
+nv50_set_index_buffer(struct pipe_context *pipe,
+ const struct pipe_index_buffer *ib)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ if (nv50->idxbuf.buffer)
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
+
+ if (ib) {
+ pipe_resource_reference(&nv50->idxbuf.buffer, ib->buffer);
+ nv50->idxbuf.index_size = ib->index_size;
+ if (ib->buffer) {
+ nv50->idxbuf.offset = ib->offset;
+ BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(ib->buffer), RD);
+ } else {
+ nv50->idxbuf.user_buffer = ib->user_buffer;
+ }
+ } else {
+ pipe_resource_reference(&nv50->idxbuf.buffer, NULL);
+ }
+}
+
+static void
+nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->vertex = hwcso;
+ nv50->dirty |= NV50_NEW_VERTEX;
+}
+
+static struct pipe_stream_output_target *
+nv50_so_target_create(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size)
+{
+ struct nv50_so_target *targ = MALLOC_STRUCT(nv50_so_target);
+ if (!targ)
+ return NULL;
+
+ if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) {
+ targ->pq = pipe->create_query(pipe,
+ NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET);
+ if (!targ->pq) {
+ FREE(targ);
+ return NULL;
+ }
+ } else {
+ targ->pq = NULL;
+ }
+ targ->clean = TRUE;
+
+ targ->pipe.buffer_size = size;
+ targ->pipe.buffer_offset = offset;
+ targ->pipe.context = pipe;
+ targ->pipe.buffer = NULL;
+ pipe_resource_reference(&targ->pipe.buffer, res);
+ pipe_reference_init(&targ->pipe.reference, 1);
+
+ return &targ->pipe;
+}
+
+static void
+nv50_so_target_destroy(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg)
+{
+ struct nv50_so_target *targ = nv50_so_target(ptarg);
+ if (targ->pq)
+ pipe->destroy_query(pipe, targ->pq);
+ pipe_resource_reference(&targ->pipe.buffer, NULL);
+ FREE(targ);
+}
+
+static void
+nv50_set_stream_output_targets(struct pipe_context *pipe,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ unsigned append_mask)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ unsigned i;
+ boolean serialize = TRUE;
+ const boolean can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS;
+
+ assert(num_targets <= 4);
+
+ for (i = 0; i < num_targets; ++i) {
+ const boolean changed = nv50->so_target[i] != targets[i];
+ if (!changed && (append_mask & (1 << i)))
+ continue;
+ nv50->so_targets_dirty |= 1 << i;
+
+ if (can_resume && changed && nv50->so_target[i]) {
+ nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
+ serialize = FALSE;
+ }
+
+ if (targets[i] && !(append_mask & (1 << i)))
+ nv50_so_target(targets[i])->clean = TRUE;
+
+ pipe_so_target_reference(&nv50->so_target[i], targets[i]);
+ }
+ for (; i < nv50->num_so_targets; ++i) {
+ if (can_resume && nv50->so_target[i]) {
+ nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
+ serialize = FALSE;
+ }
+ pipe_so_target_reference(&nv50->so_target[i], NULL);
+ nv50->so_targets_dirty |= 1 << i;
+ }
+ nv50->num_so_targets = num_targets;
+
+ if (nv50->so_targets_dirty)
+ nv50->dirty |= NV50_NEW_STRMOUT;
+}
+
+void
+nv50_init_state_functions(struct nv50_context *nv50)
+{
+ struct pipe_context *pipe = &nv50->base.pipe;
+
+ pipe->create_blend_state = nv50_blend_state_create;
+ pipe->bind_blend_state = nv50_blend_state_bind;
+ pipe->delete_blend_state = nv50_blend_state_delete;
+
+ pipe->create_rasterizer_state = nv50_rasterizer_state_create;
+ pipe->bind_rasterizer_state = nv50_rasterizer_state_bind;
+ pipe->delete_rasterizer_state = nv50_rasterizer_state_delete;
+
+ pipe->create_depth_stencil_alpha_state = nv50_zsa_state_create;
+ pipe->bind_depth_stencil_alpha_state = nv50_zsa_state_bind;
+ pipe->delete_depth_stencil_alpha_state = nv50_zsa_state_delete;
+
+ pipe->create_sampler_state = nv50_sampler_state_create;
+ pipe->delete_sampler_state = nv50_sampler_state_delete;
+ pipe->bind_vertex_sampler_states = nv50_vp_sampler_states_bind;
+ pipe->bind_fragment_sampler_states = nv50_fp_sampler_states_bind;
+ pipe->bind_geometry_sampler_states = nv50_gp_sampler_states_bind;
+
+ pipe->create_sampler_view = nv50_create_sampler_view;
+ pipe->sampler_view_destroy = nv50_sampler_view_destroy;
+ pipe->set_vertex_sampler_views = nv50_vp_set_sampler_views;
+ pipe->set_fragment_sampler_views = nv50_fp_set_sampler_views;
+ pipe->set_geometry_sampler_views = nv50_gp_set_sampler_views;
+
+ pipe->create_vs_state = nv50_vp_state_create;
+ pipe->create_fs_state = nv50_fp_state_create;
+ pipe->create_gs_state = nv50_gp_state_create;
+ pipe->bind_vs_state = nv50_vp_state_bind;
+ pipe->bind_fs_state = nv50_fp_state_bind;
+ pipe->bind_gs_state = nv50_gp_state_bind;
+ pipe->delete_vs_state = nv50_sp_state_delete;
+ pipe->delete_fs_state = nv50_sp_state_delete;
+ pipe->delete_gs_state = nv50_sp_state_delete;
+
+ pipe->set_blend_color = nv50_set_blend_color;
+ pipe->set_stencil_ref = nv50_set_stencil_ref;
+ pipe->set_clip_state = nv50_set_clip_state;
+ pipe->set_sample_mask = nv50_set_sample_mask;
+ pipe->set_constant_buffer = nv50_set_constant_buffer;
+ pipe->set_framebuffer_state = nv50_set_framebuffer_state;
+ pipe->set_polygon_stipple = nv50_set_polygon_stipple;
+ pipe->set_scissor_states = nv50_set_scissor_states;
+ pipe->set_viewport_states = nv50_set_viewport_states;
+
+ pipe->create_vertex_elements_state = nv50_vertex_state_create;
+ pipe->delete_vertex_elements_state = nv50_vertex_state_delete;
+ pipe->bind_vertex_elements_state = nv50_vertex_state_bind;
+
+ pipe->set_vertex_buffers = nv50_set_vertex_buffers;
+ pipe->set_index_buffer = nv50_set_index_buffer;
+
+ pipe->create_stream_output_target = nv50_so_target_create;
+ pipe->stream_output_target_destroy = nv50_so_target_destroy;
+ pipe->set_stream_output_targets = nv50_set_stream_output_targets;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
new file mode 100644
index 00000000000..866829ca22d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -0,0 +1,414 @@
+
+#include "nv50/nv50_context.h"
+#include "os/os_time.h"
+
+static void
+nv50_validate_fb(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ unsigned i;
+ unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1;
+ uint32_t array_size = 0xffff, array_mode = 0;
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+
+ BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs);
+ BEGIN_NV04(push, NV50_3D(SCREEN_SCISSOR_HORIZ), 2);
+ PUSH_DATA (push, fb->width << 16);
+ PUSH_DATA (push, fb->height << 16);
+
+ for (i = 0; i < fb->nr_cbufs; ++i) {
+ struct nv50_miptree *mt = nv50_miptree(fb->cbufs[i]->texture);
+ struct nv50_surface *sf = nv50_surface(fb->cbufs[i]);
+ struct nouveau_bo *bo = mt->base.bo;
+
+ array_size = MIN2(array_size, sf->depth);
+ if (mt->layout_3d)
+ array_mode = NV50_3D_RT_ARRAY_MODE_MODE_3D; /* 1 << 16 */
+
+ /* can't mix 3D with ARRAY or have RTs of different depth/array_size */
+ assert(mt->layout_3d || !array_mode || array_size == 1);
+
+ BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(i)), 5);
+ PUSH_DATAh(push, bo->offset + sf->offset);
+ PUSH_DATA (push, bo->offset + sf->offset);
+ PUSH_DATA (push, nv50_format_table[sf->base.format].rt);
+ if (likely(nouveau_bo_memtype(bo))) {
+ PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA (push, mt->layer_stride >> 2);
+ BEGIN_NV04(push, NV50_3D(RT_HORIZ(i)), 2);
+ PUSH_DATA (push, sf->width);
+ PUSH_DATA (push, sf->height);
+ BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
+ PUSH_DATA (push, array_mode | array_size);
+ } else {
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(RT_HORIZ(i)), 2);
+ PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | mt->level[0].pitch);
+ PUSH_DATA (push, sf->height);
+ BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
+ PUSH_DATA (push, 0);
+
+ assert(!fb->zsbuf);
+ assert(!mt->ms_mode);
+ }
+
+ ms_mode = mt->ms_mode;
+
+ if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
+ nv50->state.rt_serialize = TRUE;
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ /* only register for writing, otherwise we'd always serialize here */
+ BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR);
+ }
+
+ if (fb->zsbuf) {
+ struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
+ struct nv50_surface *sf = nv50_surface(fb->zsbuf);
+ struct nouveau_bo *bo = mt->base.bo;
+ int unk = mt->base.base.target == PIPE_TEXTURE_3D || sf->depth == 1;
+
+ BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5);
+ PUSH_DATAh(push, bo->offset + sf->offset);
+ PUSH_DATA (push, bo->offset + sf->offset);
+ PUSH_DATA (push, nv50_format_table[fb->zsbuf->format].rt);
+ PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA (push, mt->layer_stride >> 2);
+ BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(ZETA_HORIZ), 3);
+ PUSH_DATA (push, sf->width);
+ PUSH_DATA (push, sf->height);
+ PUSH_DATA (push, (unk << 16) | sf->depth);
+
+ ms_mode = mt->ms_mode;
+
+ if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
+ nv50->state.rt_serialize = TRUE;
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR);
+ } else {
+ BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
+ PUSH_DATA (push, ms_mode);
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
+ PUSH_DATA (push, fb->width << 16);
+ PUSH_DATA (push, fb->height << 16);
+}
+
+static void
+nv50_validate_blend_colour(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ BEGIN_NV04(push, NV50_3D(BLEND_COLOR(0)), 4);
+ PUSH_DATAf(push, nv50->blend_colour.color[0]);
+ PUSH_DATAf(push, nv50->blend_colour.color[1]);
+ PUSH_DATAf(push, nv50->blend_colour.color[2]);
+ PUSH_DATAf(push, nv50->blend_colour.color[3]);
+}
+
+static void
+nv50_validate_stencil_ref(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ BEGIN_NV04(push, NV50_3D(STENCIL_FRONT_FUNC_REF), 1);
+ PUSH_DATA (push, nv50->stencil_ref.ref_value[0]);
+ BEGIN_NV04(push, NV50_3D(STENCIL_BACK_FUNC_REF), 1);
+ PUSH_DATA (push, nv50->stencil_ref.ref_value[1]);
+}
+
+static void
+nv50_validate_stipple(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned i;
+
+ BEGIN_NV04(push, NV50_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
+ for (i = 0; i < 32; ++i)
+ PUSH_DATA(push, util_bswap32(nv50->stipple.stipple[i]));
+}
+
+static void
+nv50_validate_scissor(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct pipe_scissor_state *s = &nv50->scissor;
+#ifdef NV50_SCISSORS_CLIPPING
+ struct pipe_viewport_state *vp = &nv50->viewport;
+ int minx, maxx, miny, maxy;
+
+ if (!(nv50->dirty &
+ (NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | NV50_NEW_FRAMEBUFFER)) &&
+ nv50->state.scissor == nv50->rast->pipe.scissor)
+ return;
+ nv50->state.scissor = nv50->rast->pipe.scissor;
+
+ if (nv50->state.scissor) {
+ minx = s->minx;
+ maxx = s->maxx;
+ miny = s->miny;
+ maxy = s->maxy;
+ } else {
+ minx = 0;
+ maxx = nv50->framebuffer.width;
+ miny = 0;
+ maxy = nv50->framebuffer.height;
+ }
+
+ minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0])));
+ maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0])));
+ miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1])));
+ maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1])));
+
+ BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2);
+ PUSH_DATA (push, (maxx << 16) | minx);
+ PUSH_DATA (push, (maxy << 16) | miny);
+#else
+ BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2);
+ PUSH_DATA (push, (s->maxx << 16) | s->minx);
+ PUSH_DATA (push, (s->maxy << 16) | s->miny);
+#endif
+}
+
+static void
+nv50_validate_viewport(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ float zmin, zmax;
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSLATE_X(0)), 3);
+ PUSH_DATAf(push, nv50->viewport.translate[0]);
+ PUSH_DATAf(push, nv50->viewport.translate[1]);
+ PUSH_DATAf(push, nv50->viewport.translate[2]);
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_SCALE_X(0)), 3);
+ PUSH_DATAf(push, nv50->viewport.scale[0]);
+ PUSH_DATAf(push, nv50->viewport.scale[1]);
+ PUSH_DATAf(push, nv50->viewport.scale[2]);
+
+ zmin = nv50->viewport.translate[2] - fabsf(nv50->viewport.scale[2]);
+ zmax = nv50->viewport.translate[2] + fabsf(nv50->viewport.scale[2]);
+
+#ifdef NV50_SCISSORS_CLIPPING
+ BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(0)), 2);
+ PUSH_DATAf(push, zmin);
+ PUSH_DATAf(push, zmax);
+#endif
+}
+
+static INLINE void
+nv50_check_program_ucps(struct nv50_context *nv50,
+ struct nv50_program *vp, uint8_t mask)
+{
+ const unsigned n = util_logbase2(mask) + 1;
+
+ if (vp->vp.clpd_nr >= n)
+ return;
+ nv50_program_destroy(nv50, vp);
+
+ vp->vp.clpd_nr = n;
+ if (likely(vp == nv50->vertprog)) {
+ nv50->dirty |= NV50_NEW_VERTPROG;
+ nv50_vertprog_validate(nv50);
+ } else {
+ nv50->dirty |= NV50_NEW_GMTYPROG;
+ nv50_gmtyprog_validate(nv50);
+ }
+ nv50_fp_linkage_validate(nv50);
+}
+
+static void
+nv50_validate_clip(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *vp;
+ uint8_t clip_enable;
+
+ if (nv50->dirty & NV50_NEW_CLIP) {
+ BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
+ PUSH_DATA (push, (0 << 8) | NV50_CB_AUX);
+ BEGIN_NI04(push, NV50_3D(CB_DATA(0)), PIPE_MAX_CLIP_PLANES * 4);
+ PUSH_DATAp(push, &nv50->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
+ }
+
+ vp = nv50->gmtyprog;
+ if (likely(!vp))
+ vp = nv50->vertprog;
+
+ clip_enable = nv50->rast->pipe.clip_plane_enable;
+
+ BEGIN_NV04(push, NV50_3D(CLIP_DISTANCE_ENABLE), 1);
+ PUSH_DATA (push, clip_enable);
+
+ if (clip_enable)
+ nv50_check_program_ucps(nv50, vp, clip_enable);
+}
+
+static void
+nv50_validate_blend(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ PUSH_SPACE(push, nv50->blend->size);
+ PUSH_DATAp(push, nv50->blend->state, nv50->blend->size);
+}
+
+static void
+nv50_validate_zsa(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ PUSH_SPACE(push, nv50->zsa->size);
+ PUSH_DATAp(push, nv50->zsa->state, nv50->zsa->size);
+}
+
+static void
+nv50_validate_rasterizer(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ PUSH_SPACE(push, nv50->rast->size);
+ PUSH_DATAp(push, nv50->rast->state, nv50->rast->size);
+}
+
+static void
+nv50_validate_sample_mask(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ unsigned mask[4] =
+ {
+ nv50->sample_mask & 0xffff,
+ nv50->sample_mask & 0xffff,
+ nv50->sample_mask & 0xffff,
+ nv50->sample_mask & 0xffff
+ };
+
+ BEGIN_NV04(push, NV50_3D(MSAA_MASK(0)), 4);
+ PUSH_DATA (push, mask[0]);
+ PUSH_DATA (push, mask[1]);
+ PUSH_DATA (push, mask[2]);
+ PUSH_DATA (push, mask[3]);
+}
+
+static void
+nv50_switch_pipe_context(struct nv50_context *ctx_to)
+{
+ struct nv50_context *ctx_from = ctx_to->screen->cur_ctx;
+
+ if (ctx_from)
+ ctx_to->state = ctx_from->state;
+
+ ctx_to->dirty = ~0;
+
+ if (!ctx_to->vertex)
+ ctx_to->dirty &= ~(NV50_NEW_VERTEX | NV50_NEW_ARRAYS);
+
+ if (!ctx_to->vertprog)
+ ctx_to->dirty &= ~NV50_NEW_VERTPROG;
+ if (!ctx_to->fragprog)
+ ctx_to->dirty &= ~NV50_NEW_FRAGPROG;
+
+ if (!ctx_to->blend)
+ ctx_to->dirty &= ~NV50_NEW_BLEND;
+ if (!ctx_to->rast)
+#ifdef NV50_SCISSORS_CLIPPING
+ ctx_to->dirty &= ~(NV50_NEW_RASTERIZER | NV50_NEW_SCISSOR);
+#else
+ ctx_to->dirty &= ~NV50_NEW_RASTERIZER;
+#endif
+ if (!ctx_to->zsa)
+ ctx_to->dirty &= ~NV50_NEW_ZSA;
+
+ ctx_to->screen->cur_ctx = ctx_to;
+}
+
+static struct state_validate {
+ void (*func)(struct nv50_context *);
+ uint32_t states;
+} validate_list[] = {
+ { nv50_validate_fb, NV50_NEW_FRAMEBUFFER },
+ { nv50_validate_blend, NV50_NEW_BLEND },
+ { nv50_validate_zsa, NV50_NEW_ZSA },
+ { nv50_validate_sample_mask, NV50_NEW_SAMPLE_MASK },
+ { nv50_validate_rasterizer, NV50_NEW_RASTERIZER },
+ { nv50_validate_blend_colour, NV50_NEW_BLEND_COLOUR },
+ { nv50_validate_stencil_ref, NV50_NEW_STENCIL_REF },
+ { nv50_validate_stipple, NV50_NEW_STIPPLE },
+#ifdef NV50_SCISSORS_CLIPPING
+ { nv50_validate_scissor, NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT |
+ NV50_NEW_RASTERIZER |
+ NV50_NEW_FRAMEBUFFER },
+#else
+ { nv50_validate_scissor, NV50_NEW_SCISSOR },
+#endif
+ { nv50_validate_viewport, NV50_NEW_VIEWPORT },
+ { nv50_vertprog_validate, NV50_NEW_VERTPROG },
+ { nv50_gmtyprog_validate, NV50_NEW_GMTYPROG },
+ { nv50_fragprog_validate, NV50_NEW_FRAGPROG },
+ { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
+ NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER },
+ { nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
+ { nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
+ NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
+ { nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
+ NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
+ { nv50_constbufs_validate, NV50_NEW_CONSTBUF },
+ { nv50_validate_textures, NV50_NEW_TEXTURES },
+ { nv50_validate_samplers, NV50_NEW_SAMPLERS },
+ { nv50_stream_output_validate, NV50_NEW_STRMOUT |
+ NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
+ { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }
+};
+#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
+
+boolean
+nv50_state_validate(struct nv50_context *nv50, uint32_t mask, unsigned words)
+{
+ uint32_t state_mask;
+ int ret;
+ unsigned i;
+
+ if (nv50->screen->cur_ctx != nv50)
+ nv50_switch_pipe_context(nv50);
+
+ state_mask = nv50->dirty & mask;
+
+ if (state_mask) {
+ for (i = 0; i < validate_list_len; ++i) {
+ struct state_validate *validate = &validate_list[i];
+
+ if (state_mask & validate->states)
+ validate->func(nv50);
+ }
+ nv50->dirty &= ~state_mask;
+
+ if (nv50->state.rt_serialize) {
+ nv50->state.rt_serialize = FALSE;
+ BEGIN_NV04(nv50->base.pushbuf, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (nv50->base.pushbuf, 0);
+ }
+
+ nv50_bufctx_fence(nv50->bufctx_3d, FALSE);
+ }
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_3d);
+ ret = nouveau_pushbuf_validate(nv50->base.pushbuf);
+
+ if (unlikely(nv50->state.flushed)) {
+ nv50->state.flushed = FALSE;
+ nv50_bufctx_fence(nv50->bufctx_3d, TRUE);
+ }
+ return !ret;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
new file mode 100644
index 00000000000..238951733cf
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
@@ -0,0 +1,78 @@
+
+#ifndef __NV50_STATEOBJ_H__
+#define __NV50_STATEOBJ_H__
+
+#include "pipe/p_state.h"
+
+#define NV50_SCISSORS_CLIPPING
+
+#define SB_BEGIN_3D(so, m, s) \
+ (so)->state[(so)->size++] = NV50_FIFO_PKHDR(NV50_3D(m), s)
+
+#define SB_BEGIN_3D_(so, m, s) \
+ (so)->state[(so)->size++] = NV50_FIFO_PKHDR(SUBC_3D(m), s)
+
+#define SB_DATA(so, u) (so)->state[(so)->size++] = (u)
+
+#include "nv50/nv50_stateobj_tex.h"
+
+struct nv50_blend_stateobj {
+ struct pipe_blend_state pipe;
+ int size;
+ uint32_t state[84]; // TODO: allocate less if !independent_blend_enable
+};
+
+struct nv50_rasterizer_stateobj {
+ struct pipe_rasterizer_state pipe;
+ int size;
+ uint32_t state[48];
+};
+
+struct nv50_zsa_stateobj {
+ struct pipe_depth_stencil_alpha_state pipe;
+ int size;
+ uint32_t state[29];
+};
+
+struct nv50_constbuf {
+ union {
+ struct pipe_resource *buf;
+ const uint8_t *data;
+ } u;
+ uint32_t size; /* max 65536 */
+ uint32_t offset;
+ boolean user; /* should only be TRUE if u.data is valid and non-NULL */
+};
+
+struct nv50_vertex_element {
+ struct pipe_vertex_element pipe;
+ uint32_t state;
+};
+
+struct nv50_vertex_stateobj {
+ uint32_t min_instance_div[PIPE_MAX_ATTRIBS];
+ uint16_t vb_access_size[PIPE_MAX_ATTRIBS];
+ struct translate *translate;
+ unsigned num_elements;
+ uint32_t instance_elts;
+ uint32_t instance_bufs;
+ boolean need_conversion;
+ unsigned vertex_size;
+ unsigned packet_vertex_limit;
+ struct nv50_vertex_element element[0];
+};
+
+struct nv50_so_target {
+ struct pipe_stream_output_target pipe;
+ struct pipe_query *pq;
+ unsigned stride;
+ boolean clean;
+};
+
+static INLINE struct nv50_so_target *
+nv50_so_target(struct pipe_stream_output_target *ptarg)
+{
+ return (struct nv50_so_target *)ptarg;
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
new file mode 100644
index 00000000000..99548cbdb42
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
@@ -0,0 +1,34 @@
+
+#ifndef __NV50_STATEOBJ_TEX_H__
+#define __NV50_STATEOBJ_TEX_H__
+
+#include "pipe/p_state.h"
+
+struct nv50_tsc_entry {
+ int id;
+ uint32_t tsc[8];
+};
+
+static INLINE struct nv50_tsc_entry *
+nv50_tsc_entry(void *hwcso)
+{
+ return (struct nv50_tsc_entry *)hwcso;
+}
+
+struct nv50_tic_entry {
+ struct pipe_sampler_view pipe;
+ int id;
+ uint32_t tic[8];
+};
+
+static INLINE struct nv50_tic_entry *
+nv50_tic_entry(struct pipe_sampler_view *view)
+{
+ return (struct nv50_tic_entry *)view;
+}
+
+extern void *
+nv50_sampler_state_create(struct pipe_context *,
+ const struct pipe_sampler_state *);
+
+#endif /* __NV50_STATEOBJ_TEX_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
new file mode 100644
index 00000000000..dcc1fce41c5
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -0,0 +1,1353 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#include "pipe/p_defines.h"
+
+#include "util/u_inlines.h"
+#include "util/u_pack_color.h"
+#include "util/u_format.h"
+#include "util/u_surface.h"
+
+#include "tgsi/tgsi_ureg.h"
+
+#include "os/os_thread.h"
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_resource.h"
+
+#include "nv50/nv50_defs.xml.h"
+#include "nv50/nv50_texture.xml.h"
+
+/* these are used in nv50_blit.h */
+#define NV50_ENG2D_SUPPORTED_FORMATS 0xff0843e080608409ULL
+#define NV50_ENG2D_NOCONVERT_FORMATS 0x0008402000000000ULL
+#define NV50_ENG2D_LUMINANCE_FORMATS 0x0008402000000000ULL
+#define NV50_ENG2D_INTENSITY_FORMATS 0x0000000000000000ULL
+#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000608000ULL
+
+#define NOUVEAU_DRIVER 0x50
+#include "nv50/nv50_blit.h"
+
+static INLINE uint8_t
+nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
+{
+ uint8_t id = nv50_format_table[format].rt;
+
+ /* Hardware values for color formats range from 0xc0 to 0xff,
+ * but the 2D engine doesn't support all of them.
+ */
+ if ((id >= 0xc0) && (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))))
+ return id;
+ assert(dst_src_equal);
+
+ switch (util_format_get_blocksize(format)) {
+ case 1:
+ return NV50_SURFACE_FORMAT_R8_UNORM;
+ case 2:
+ return NV50_SURFACE_FORMAT_R16_UNORM;
+ case 4:
+ return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+ default:
+ return 0;
+ }
+}
+
+static int
+nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst,
+ struct nv50_miptree *mt, unsigned level, unsigned layer,
+ enum pipe_format pformat, boolean dst_src_pformat_equal)
+{
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t width, height, depth;
+ uint32_t format;
+ uint32_t mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT;
+ uint32_t offset = mt->level[level].offset;
+
+ format = nv50_2d_format(pformat, dst, dst_src_pformat_equal);
+ if (!format) {
+ NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
+ util_format_name(pformat));
+ return 1;
+ }
+
+ width = u_minify(mt->base.base.width0, level) << mt->ms_x;
+ height = u_minify(mt->base.base.height0, level) << mt->ms_y;
+ depth = u_minify(mt->base.base.depth0, level);
+
+ offset = mt->level[level].offset;
+ if (!mt->layout_3d) {
+ offset += mt->layer_stride * layer;
+ depth = 1;
+ layer = 0;
+ } else
+ if (!dst) {
+ offset += nv50_mt_zslice_offset(mt, level, layer);
+ layer = 0;
+ }
+
+ if (!nouveau_bo_memtype(bo)) {
+ BEGIN_NV04(push, SUBC_2D(mthd), 2);
+ PUSH_DATA (push, format);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, SUBC_2D(mthd + 0x14), 5);
+ PUSH_DATA (push, mt->level[level].pitch);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ PUSH_DATAh(push, bo->offset + offset);
+ PUSH_DATA (push, bo->offset + offset);
+ } else {
+ BEGIN_NV04(push, SUBC_2D(mthd), 5);
+ PUSH_DATA (push, format);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, mt->level[level].tile_mode);
+ PUSH_DATA (push, depth);
+ PUSH_DATA (push, layer);
+ BEGIN_NV04(push, SUBC_2D(mthd + 0x18), 4);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ PUSH_DATAh(push, bo->offset + offset);
+ PUSH_DATA (push, bo->offset + offset);
+ }
+
+#if 0
+ if (dst) {
+ BEGIN_NV04(push, SUBC_2D(NV50_2D_CLIP_X), 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ }
+#endif
+ return 0;
+}
+
+static int
+nv50_2d_texture_do_copy(struct nouveau_pushbuf *push,
+ struct nv50_miptree *dst, unsigned dst_level,
+ unsigned dx, unsigned dy, unsigned dz,
+ struct nv50_miptree *src, unsigned src_level,
+ unsigned sx, unsigned sy, unsigned sz,
+ unsigned w, unsigned h)
+{
+ const enum pipe_format dfmt = dst->base.base.format;
+ const enum pipe_format sfmt = src->base.base.format;
+ int ret;
+ boolean eqfmt = dfmt == sfmt;
+
+ if (!PUSH_SPACE(push, 2 * 16 + 32))
+ return PIPE_ERROR;
+
+ ret = nv50_2d_texture_set(push, 1, dst, dst_level, dz, dfmt, eqfmt);
+ if (ret)
+ return ret;
+
+ ret = nv50_2d_texture_set(push, 0, src, src_level, sz, sfmt, eqfmt);
+ if (ret)
+ return ret;
+
+ BEGIN_NV04(push, NV50_2D(BLIT_CONTROL), 1);
+ PUSH_DATA (push, NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE);
+ BEGIN_NV04(push, NV50_2D(BLIT_DST_X), 4);
+ PUSH_DATA (push, dx << dst->ms_x);
+ PUSH_DATA (push, dy << dst->ms_y);
+ PUSH_DATA (push, w << dst->ms_x);
+ PUSH_DATA (push, h << dst->ms_y);
+ BEGIN_NV04(push, NV50_2D(BLIT_DU_DX_FRACT), 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_2D(BLIT_SRC_X_FRACT), 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, sx << src->ms_x);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, sy << src->ms_y);
+
+ return 0;
+}
+
+static void
+nv50_resource_copy_region(struct pipe_context *pipe,
+ struct pipe_resource *dst, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ int ret;
+ boolean m2mf;
+ unsigned dst_layer = dstz, src_layer = src_box->z;
+
+ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
+ nouveau_copy_buffer(&nv50->base,
+ nv04_resource(dst), dstx,
+ nv04_resource(src), src_box->x, src_box->width);
+ return;
+ }
+
+ /* 0 and 1 are equal, only supporting 0/1, 2, 4 and 8 */
+ assert((src->nr_samples | 1) == (dst->nr_samples | 1));
+
+ m2mf = (src->format == dst->format) ||
+ (util_format_get_blocksizebits(src->format) ==
+ util_format_get_blocksizebits(dst->format));
+
+ nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+
+ if (m2mf) {
+ struct nv50_m2mf_rect drect, srect;
+ unsigned i;
+ unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
+ unsigned ny = util_format_get_nblocksy(src->format, src_box->height);
+
+ nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz);
+ nv50_m2mf_rect_setup(&srect, src, src_level,
+ src_box->x, src_box->y, src_box->z);
+
+ for (i = 0; i < src_box->depth; ++i) {
+ nv50_m2mf_transfer_rect(nv50, &drect, &srect, nx, ny);
+
+ if (nv50_miptree(dst)->layout_3d)
+ drect.z++;
+ else
+ drect.base += nv50_miptree(dst)->layer_stride;
+
+ if (nv50_miptree(src)->layout_3d)
+ srect.z++;
+ else
+ srect.base += nv50_miptree(src)->layer_stride;
+ }
+ return;
+ }
+
+ assert((src->format == dst->format) ||
+ (nv50_2d_src_format_faithful(src->format) &&
+ nv50_2d_dst_format_faithful(dst->format)));
+
+ BCTX_REFN(nv50->bufctx, 2D, nv04_resource(src), RD);
+ BCTX_REFN(nv50->bufctx, 2D, nv04_resource(dst), WR);
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx);
+ nouveau_pushbuf_validate(nv50->base.pushbuf);
+
+ for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) {
+ ret = nv50_2d_texture_do_copy(nv50->base.pushbuf,
+ nv50_miptree(dst), dst_level,
+ dstx, dsty, dst_layer,
+ nv50_miptree(src), src_level,
+ src_box->x, src_box->y, src_layer,
+ src_box->width, src_box->height);
+ if (ret)
+ break;
+ }
+ nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D);
+}
+
+static void
+nv50_clear_render_target(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ const union pipe_color_union *color,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_miptree *mt = nv50_miptree(dst->texture);
+ struct nv50_surface *sf = nv50_surface(dst);
+ struct nouveau_bo *bo = mt->base.bo;
+ unsigned z;
+
+ BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
+ PUSH_DATAf(push, color->f[0]);
+ PUSH_DATAf(push, color->f[1]);
+ PUSH_DATAf(push, color->f[2]);
+ PUSH_DATAf(push, color->f[3]);
+
+ if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
+ return;
+
+ PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
+
+ BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5);
+ PUSH_DATAh(push, bo->offset + sf->offset);
+ PUSH_DATA (push, bo->offset + sf->offset);
+ PUSH_DATA (push, nv50_format_table[dst->format].rt);
+ PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
+ if (nouveau_bo_memtype(bo))
+ PUSH_DATA(push, sf->width);
+ else
+ PUSH_DATA(push, NV50_3D_RT_HORIZ_LINEAR | mt->level[0].pitch);
+ PUSH_DATA (push, sf->height);
+ BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
+ PUSH_DATA (push, 1);
+
+ if (!nouveau_bo_memtype(bo)) {
+ BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
+ PUSH_DATA (push, (width << 16) | dstx);
+ PUSH_DATA (push, (height << 16) | dsty);
+
+ BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth);
+ for (z = 0; z < sf->depth; ++z) {
+ PUSH_DATA (push, 0x3c |
+ (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
+ }
+
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+}
+
+static void
+nv50_clear_depth_stencil(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_miptree *mt = nv50_miptree(dst->texture);
+ struct nv50_surface *sf = nv50_surface(dst);
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t mode = 0;
+ unsigned z;
+
+ assert(nouveau_bo_memtype(bo)); /* ZETA cannot be linear */
+
+ if (clear_flags & PIPE_CLEAR_DEPTH) {
+ BEGIN_NV04(push, NV50_3D(CLEAR_DEPTH), 1);
+ PUSH_DATAf(push, depth);
+ mode |= NV50_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (clear_flags & PIPE_CLEAR_STENCIL) {
+ BEGIN_NV04(push, NV50_3D(CLEAR_STENCIL), 1);
+ PUSH_DATA (push, stencil & 0xff);
+ mode |= NV50_3D_CLEAR_BUFFERS_S;
+ }
+
+ if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
+ return;
+
+ PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
+
+ BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5);
+ PUSH_DATAh(push, bo->offset + sf->offset);
+ PUSH_DATA (push, bo->offset + sf->offset);
+ PUSH_DATA (push, nv50_format_table[dst->format].rt);
+ PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(ZETA_HORIZ), 3);
+ PUSH_DATA (push, sf->width);
+ PUSH_DATA (push, sf->height);
+ PUSH_DATA (push, (1 << 16) | 1);
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
+ PUSH_DATA (push, (width << 16) | dstx);
+ PUSH_DATA (push, (height << 16) | dsty);
+
+ BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth);
+ for (z = 0; z < sf->depth; ++z) {
+ PUSH_DATA (push, mode |
+ (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
+ }
+
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+}
+
+void
+nv50_clear(struct pipe_context *pipe, unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ unsigned i;
+ uint32_t mode = 0;
+
+ /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
+ if (!nv50_state_validate(nv50, NV50_NEW_FRAMEBUFFER, 9 + (fb->nr_cbufs * 2)))
+ return;
+
+ if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
+ BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
+ PUSH_DATAf(push, color->f[0]);
+ PUSH_DATAf(push, color->f[1]);
+ PUSH_DATAf(push, color->f[2]);
+ PUSH_DATAf(push, color->f[3]);
+ mode =
+ NV50_3D_CLEAR_BUFFERS_R | NV50_3D_CLEAR_BUFFERS_G |
+ NV50_3D_CLEAR_BUFFERS_B | NV50_3D_CLEAR_BUFFERS_A;
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ BEGIN_NV04(push, NV50_3D(CLEAR_DEPTH), 1);
+ PUSH_DATA (push, fui(depth));
+ mode |= NV50_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ BEGIN_NV04(push, NV50_3D(CLEAR_STENCIL), 1);
+ PUSH_DATA (push, stencil & 0xff);
+ mode |= NV50_3D_CLEAR_BUFFERS_S;
+ }
+
+ BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1);
+ PUSH_DATA (push, mode);
+
+ for (i = 1; i < fb->nr_cbufs; i++) {
+ BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1);
+ PUSH_DATA (push, (i << 6) | 0x3c);
+ }
+}
+
+
+/* =============================== BLIT CODE ===================================
+ */
+
+struct nv50_blitter
+{
+ struct nv50_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES];
+ struct nv50_program vp;
+
+ struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */
+
+ pipe_mutex mutex;
+};
+
+struct nv50_blitctx
+{
+ struct nv50_context *nv50;
+ struct nv50_program *fp;
+ uint8_t mode;
+ uint16_t color_mask;
+ uint8_t filter;
+ enum pipe_texture_target target;
+ struct {
+ struct pipe_framebuffer_state fb;
+ struct nv50_rasterizer_stateobj *rast;
+ struct nv50_program *vp;
+ struct nv50_program *gp;
+ struct nv50_program *fp;
+ unsigned num_textures[3];
+ unsigned num_samplers[3];
+ struct pipe_sampler_view *texture[2];
+ struct nv50_tsc_entry *sampler[2];
+ uint32_t dirty;
+ } saved;
+ struct nv50_rasterizer_stateobj rast;
+};
+
+static void
+nv50_blitter_make_vp(struct nv50_blitter *blit)
+{
+ static const uint32_t code[] =
+ {
+ 0x10000001, 0x0423c788, /* mov b32 o[0x00] s[0x00] */ /* HPOS.x */
+ 0x10000205, 0x0423c788, /* mov b32 o[0x04] s[0x04] */ /* HPOS.y */
+ 0x10000409, 0x0423c788, /* mov b32 o[0x08] s[0x08] */ /* TEXC.x */
+ 0x1000060d, 0x0423c788, /* mov b32 o[0x0c] s[0x0c] */ /* TEXC.y */
+ 0x10000811, 0x0423c789, /* mov b32 o[0x10] s[0x10] */ /* TEXC.z */
+ };
+
+ blit->vp.type = PIPE_SHADER_VERTEX;
+ blit->vp.translated = TRUE;
+ blit->vp.code = (uint32_t *)code; /* const_cast */
+ blit->vp.code_size = sizeof(code);
+ blit->vp.max_gpr = 4;
+ blit->vp.max_out = 5;
+ blit->vp.out_nr = 2;
+ blit->vp.out[0].mask = 0x3;
+ blit->vp.out[0].sn = TGSI_SEMANTIC_POSITION;
+ blit->vp.out[1].hw = 2;
+ blit->vp.out[1].mask = 0x7;
+ blit->vp.out[1].sn = TGSI_SEMANTIC_GENERIC;
+ blit->vp.out[1].si = 0;
+ blit->vp.vp.attrs[0] = 0x73;
+ blit->vp.vp.psiz = 0x40;
+ blit->vp.vp.edgeflag = 0x40;
+}
+
+void *
+nv50_blitter_make_fp(struct pipe_context *pipe,
+ unsigned mode,
+ enum pipe_texture_target ptarg)
+{
+ struct ureg_program *ureg;
+ struct ureg_src tc;
+ struct ureg_dst out;
+ struct ureg_dst data;
+
+ const unsigned target = nv50_blit_get_tgsi_texture_target(ptarg);
+
+ boolean tex_rgbaz = FALSE;
+ boolean tex_s = FALSE;
+ boolean cvt_un8 = FALSE;
+
+ if (mode != NV50_BLIT_MODE_PASS &&
+ mode != NV50_BLIT_MODE_Z24X8 &&
+ mode != NV50_BLIT_MODE_X8Z24)
+ tex_s = TRUE;
+
+ if (mode != NV50_BLIT_MODE_X24S8 &&
+ mode != NV50_BLIT_MODE_S8X24 &&
+ mode != NV50_BLIT_MODE_XS)
+ tex_rgbaz = TRUE;
+
+ if (mode != NV50_BLIT_MODE_PASS &&
+ mode != NV50_BLIT_MODE_ZS &&
+ mode != NV50_BLIT_MODE_XS)
+ cvt_un8 = TRUE;
+
+ ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ if (!ureg)
+ return NULL;
+
+ out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+ tc = ureg_DECL_fs_input(
+ ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_LINEAR);
+
+ data = ureg_DECL_temporary(ureg);
+
+ if (tex_s) {
+ ureg_TEX(ureg, ureg_writemask(data, TGSI_WRITEMASK_X),
+ target, tc, ureg_DECL_sampler(ureg, 1));
+ ureg_MOV(ureg, ureg_writemask(data, TGSI_WRITEMASK_Y),
+ ureg_scalar(ureg_src(data), TGSI_SWIZZLE_X));
+ }
+ if (tex_rgbaz) {
+ const unsigned mask = (mode == NV50_BLIT_MODE_PASS) ?
+ TGSI_WRITEMASK_XYZW : TGSI_WRITEMASK_X;
+ ureg_TEX(ureg, ureg_writemask(data, mask),
+ target, tc, ureg_DECL_sampler(ureg, 0));
+ }
+
+ if (cvt_un8) {
+ struct ureg_src mask;
+ struct ureg_src scale;
+ struct ureg_dst outz;
+ struct ureg_dst outs;
+ struct ureg_dst zdst3 = ureg_writemask(data, TGSI_WRITEMASK_XYZ);
+ struct ureg_dst zdst = ureg_writemask(data, TGSI_WRITEMASK_X);
+ struct ureg_dst sdst = ureg_writemask(data, TGSI_WRITEMASK_Y);
+ struct ureg_src zsrc3 = ureg_src(data);
+ struct ureg_src zsrc = ureg_scalar(zsrc3, TGSI_SWIZZLE_X);
+ struct ureg_src ssrc = ureg_scalar(zsrc3, TGSI_SWIZZLE_Y);
+ struct ureg_src zshuf;
+
+ mask = ureg_imm3u(ureg, 0x0000ff, 0x00ff00, 0xff0000);
+ scale = ureg_imm4f(ureg,
+ 1.0f / 0x0000ff, 1.0f / 0x00ff00, 1.0f / 0xff0000,
+ (1 << 24) - 1);
+
+ if (mode == NV50_BLIT_MODE_Z24S8 ||
+ mode == NV50_BLIT_MODE_X24S8 ||
+ mode == NV50_BLIT_MODE_Z24X8) {
+ outz = ureg_writemask(out, TGSI_WRITEMASK_XYZ);
+ outs = ureg_writemask(out, TGSI_WRITEMASK_W);
+ zshuf = ureg_src(data);
+ } else {
+ outz = ureg_writemask(out, TGSI_WRITEMASK_YZW);
+ outs = ureg_writemask(out, TGSI_WRITEMASK_X);
+ zshuf = ureg_swizzle(zsrc3, TGSI_SWIZZLE_W,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z);
+ }
+
+ if (tex_s) {
+ ureg_I2F(ureg, sdst, ssrc);
+ ureg_MUL(ureg, outs, ssrc, ureg_scalar(scale, TGSI_SWIZZLE_X));
+ }
+
+ if (tex_rgbaz) {
+ ureg_MUL(ureg, zdst, zsrc, ureg_scalar(scale, TGSI_SWIZZLE_W));
+ ureg_F2I(ureg, zdst, zsrc);
+ ureg_AND(ureg, zdst3, zsrc, mask);
+ ureg_I2F(ureg, zdst3, zsrc3);
+ ureg_MUL(ureg, zdst3, zsrc3, scale);
+ ureg_MOV(ureg, outz, zshuf);
+ }
+ } else {
+ unsigned mask = TGSI_WRITEMASK_XYZW;
+
+ if (mode != NV50_BLIT_MODE_PASS) {
+ mask &= ~TGSI_WRITEMASK_ZW;
+ if (!tex_s)
+ mask = TGSI_WRITEMASK_X;
+ if (!tex_rgbaz)
+ mask = TGSI_WRITEMASK_Y;
+ }
+ ureg_MOV(ureg, ureg_writemask(out, mask), ureg_src(data));
+ }
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, pipe);
+}
+
+static void
+nv50_blitter_make_sampler(struct nv50_blitter *blit)
+{
+ /* clamp to edge, min/max lod = 0, nearest filtering */
+
+ blit->sampler[0].id = -1;
+
+ blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED |
+ (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) |
+ (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) |
+ (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT);
+ blit->sampler[0].tsc[1] =
+ NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE;
+
+ /* clamp to edge, min/max lod = 0, bilinear filtering */
+
+ blit->sampler[1].id = -1;
+
+ blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0];
+ blit->sampler[1].tsc[1] =
+ NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE;
+}
+
+unsigned
+nv50_blit_select_mode(const struct pipe_blit_info *info)
+{
+ const unsigned mask = info->mask;
+
+ switch (info->dst.resource->format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ switch (mask & PIPE_MASK_ZS) {
+ case PIPE_MASK_ZS: return NV50_BLIT_MODE_Z24S8;
+ case PIPE_MASK_Z: return NV50_BLIT_MODE_Z24X8;
+ default:
+ return NV50_BLIT_MODE_X24S8;
+ }
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ switch (mask & PIPE_MASK_ZS) {
+ case PIPE_MASK_ZS: return NV50_BLIT_MODE_S8Z24;
+ case PIPE_MASK_Z: return NV50_BLIT_MODE_X8Z24;
+ default:
+ return NV50_BLIT_MODE_S8X24;
+ }
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ switch (mask & PIPE_MASK_ZS) {
+ case PIPE_MASK_ZS: return NV50_BLIT_MODE_ZS;
+ case PIPE_MASK_Z: return NV50_BLIT_MODE_PASS;
+ default:
+ return NV50_BLIT_MODE_XS;
+ }
+ default:
+ return NV50_BLIT_MODE_PASS;
+ }
+}
+
+static void
+nv50_blit_select_fp(struct nv50_blitctx *ctx, const struct pipe_blit_info *info)
+{
+ struct nv50_blitter *blitter = ctx->nv50->screen->blitter;
+
+ const enum pipe_texture_target ptarg =
+ nv50_blit_reinterpret_pipe_texture_target(info->src.resource->target);
+
+ const unsigned targ = nv50_blit_texture_type(ptarg);
+ const unsigned mode = ctx->mode;
+
+ if (!blitter->fp[targ][mode]) {
+ pipe_mutex_lock(blitter->mutex);
+ if (!blitter->fp[targ][mode])
+ blitter->fp[targ][mode] =
+ nv50_blitter_make_fp(&ctx->nv50->base.pipe, mode, ptarg);
+ pipe_mutex_unlock(blitter->mutex);
+ }
+ ctx->fp = blitter->fp[targ][mode];
+}
+
+static void
+nv50_blit_set_dst(struct nv50_blitctx *ctx,
+ struct pipe_resource *res, unsigned level, unsigned layer,
+ enum pipe_format format)
+{
+ struct nv50_context *nv50 = ctx->nv50;
+ struct pipe_context *pipe = &nv50->base.pipe;
+ struct pipe_surface templ;
+
+ if (util_format_is_depth_or_stencil(format))
+ templ.format = nv50_blit_zeta_to_colour_format(format);
+ else
+ templ.format = format;
+
+ templ.u.tex.level = level;
+ templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
+
+ if (layer == -1) {
+ templ.u.tex.first_layer = 0;
+ templ.u.tex.last_layer =
+ (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1;
+ }
+
+ nv50->framebuffer.cbufs[0] = nv50_miptree_surface_new(pipe, res, &templ);
+ nv50->framebuffer.nr_cbufs = 1;
+ nv50->framebuffer.zsbuf = NULL;
+ nv50->framebuffer.width = nv50->framebuffer.cbufs[0]->width;
+ nv50->framebuffer.height = nv50->framebuffer.cbufs[0]->height;
+}
+
+static void
+nv50_blit_set_src(struct nv50_blitctx *blit,
+ struct pipe_resource *res, unsigned level, unsigned layer,
+ enum pipe_format format, const uint8_t filter)
+{
+ struct nv50_context *nv50 = blit->nv50;
+ struct pipe_context *pipe = &nv50->base.pipe;
+ struct pipe_sampler_view templ;
+ uint32_t flags;
+ enum pipe_texture_target target;
+
+ target = nv50_blit_reinterpret_pipe_texture_target(res->target);
+
+ templ.format = format;
+ templ.u.tex.first_level = templ.u.tex.last_level = level;
+ templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
+ templ.swizzle_r = PIPE_SWIZZLE_RED;
+ templ.swizzle_g = PIPE_SWIZZLE_GREEN;
+ templ.swizzle_b = PIPE_SWIZZLE_BLUE;
+ templ.swizzle_a = PIPE_SWIZZLE_ALPHA;
+
+ if (layer == -1) {
+ templ.u.tex.first_layer = 0;
+ templ.u.tex.last_layer =
+ (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1;
+ }
+
+ flags = res->last_level ? 0 : NV50_TEXVIEW_SCALED_COORDS;
+ flags |= NV50_TEXVIEW_ACCESS_RESOLVE;
+ if (filter && res->nr_samples == 8)
+ flags |= NV50_TEXVIEW_FILTER_MSAA8;
+
+ nv50->textures[2][0] = nv50_create_texture_view(
+ pipe, res, &templ, flags, target);
+ nv50->textures[2][1] = NULL;
+
+ nv50->num_textures[0] = nv50->num_textures[1] = 0;
+ nv50->num_textures[2] = 1;
+
+ templ.format = nv50_zs_to_s_format(format);
+ if (templ.format != res->format) {
+ nv50->textures[2][1] = nv50_create_texture_view(
+ pipe, res, &templ, flags, target);
+ nv50->num_textures[2] = 2;
+ }
+}
+
+static void
+nv50_blitctx_prepare_state(struct nv50_blitctx *blit)
+{
+ struct nouveau_pushbuf *push = blit->nv50->base.pushbuf;
+
+ if (blit->nv50->cond_query) {
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+ }
+
+ /* blend state */
+ BEGIN_NV04(push, NV50_3D(COLOR_MASK(0)), 1);
+ PUSH_DATA (push, blit->color_mask);
+ BEGIN_NV04(push, NV50_3D(BLEND_ENABLE(0)), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(LOGIC_OP_ENABLE), 1);
+ PUSH_DATA (push, 0);
+
+ /* rasterizer state */
+#ifndef NV50_SCISSORS_CLIPPING
+ BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 1);
+ PUSH_DATA (push, 1);
+#endif
+ BEGIN_NV04(push, NV50_3D(VERTEX_TWO_SIDE_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(MSAA_MASK(0)), 4);
+ PUSH_DATA (push, 0xffff);
+ PUSH_DATA (push, 0xffff);
+ PUSH_DATA (push, 0xffff);
+ PUSH_DATA (push, 0xffff);
+ BEGIN_NV04(push, NV50_3D(POLYGON_MODE_FRONT), 3);
+ PUSH_DATA (push, NV50_3D_POLYGON_MODE_FRONT_FILL);
+ PUSH_DATA (push, NV50_3D_POLYGON_MODE_BACK_FILL);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(CULL_FACE_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(POLYGON_STIPPLE_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(POLYGON_OFFSET_FILL_ENABLE), 1);
+ PUSH_DATA (push, 0);
+
+ /* zsa state */
+ BEGIN_NV04(push, NV50_3D(DEPTH_TEST_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(STENCIL_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(ALPHA_TEST_ENABLE), 1);
+ PUSH_DATA (push, 0);
+}
+
+static void
+nv50_blitctx_pre_blit(struct nv50_blitctx *ctx)
+{
+ struct nv50_context *nv50 = ctx->nv50;
+ struct nv50_blitter *blitter = nv50->screen->blitter;
+ int s;
+
+ ctx->saved.fb.width = nv50->framebuffer.width;
+ ctx->saved.fb.height = nv50->framebuffer.height;
+ ctx->saved.fb.nr_cbufs = nv50->framebuffer.nr_cbufs;
+ ctx->saved.fb.cbufs[0] = nv50->framebuffer.cbufs[0];
+ ctx->saved.fb.zsbuf = nv50->framebuffer.zsbuf;
+
+ ctx->saved.rast = nv50->rast;
+
+ ctx->saved.vp = nv50->vertprog;
+ ctx->saved.gp = nv50->gmtyprog;
+ ctx->saved.fp = nv50->fragprog;
+
+ nv50->rast = &ctx->rast;
+
+ nv50->vertprog = &blitter->vp;
+ nv50->gmtyprog = NULL;
+ nv50->fragprog = ctx->fp;
+
+ for (s = 0; s < 3; ++s) {
+ ctx->saved.num_textures[s] = nv50->num_textures[s];
+ ctx->saved.num_samplers[s] = nv50->num_samplers[s];
+ }
+ ctx->saved.texture[0] = nv50->textures[2][0];
+ ctx->saved.texture[1] = nv50->textures[2][1];
+ ctx->saved.sampler[0] = nv50->samplers[2][0];
+ ctx->saved.sampler[1] = nv50->samplers[2][1];
+
+ nv50->samplers[2][0] = &blitter->sampler[ctx->filter];
+ nv50->samplers[2][1] = &blitter->sampler[ctx->filter];
+
+ nv50->num_samplers[0] = nv50->num_samplers[1] = 0;
+ nv50->num_samplers[2] = 2;
+
+ ctx->saved.dirty = nv50->dirty;
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+
+ nv50->dirty =
+ NV50_NEW_FRAMEBUFFER |
+ NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG |
+ NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS;
+}
+
+static void
+nv50_blitctx_post_blit(struct nv50_blitctx *blit)
+{
+ struct nv50_context *nv50 = blit->nv50;
+ int s;
+
+ pipe_surface_reference(&nv50->framebuffer.cbufs[0], NULL);
+
+ nv50->framebuffer.width = blit->saved.fb.width;
+ nv50->framebuffer.height = blit->saved.fb.height;
+ nv50->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs;
+ nv50->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0];
+ nv50->framebuffer.zsbuf = blit->saved.fb.zsbuf;
+
+ nv50->rast = blit->saved.rast;
+
+ nv50->vertprog = blit->saved.vp;
+ nv50->gmtyprog = blit->saved.gp;
+ nv50->fragprog = blit->saved.fp;
+
+ pipe_sampler_view_reference(&nv50->textures[2][0], NULL);
+ pipe_sampler_view_reference(&nv50->textures[2][1], NULL);
+
+ for (s = 0; s < 3; ++s) {
+ nv50->num_textures[s] = blit->saved.num_textures[s];
+ nv50->num_samplers[s] = blit->saved.num_samplers[s];
+ }
+ nv50->textures[2][0] = blit->saved.texture[0];
+ nv50->textures[2][1] = blit->saved.texture[1];
+ nv50->samplers[2][0] = blit->saved.sampler[0];
+ nv50->samplers[2][1] = blit->saved.sampler[1];
+
+ if (nv50->cond_query)
+ nv50->base.pipe.render_condition(&nv50->base.pipe, nv50->cond_query,
+ nv50->cond_cond, nv50->cond_mode);
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+
+ nv50->dirty = blit->saved.dirty |
+ (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK |
+ NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND |
+ NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS |
+ NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG);
+}
+
+
+static void
+nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
+{
+ struct nv50_blitctx *blit = nv50->blit;
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct pipe_resource *src = info->src.resource;
+ struct pipe_resource *dst = info->dst.resource;
+ int32_t minx, maxx, miny, maxy;
+ int32_t i;
+ float x0, x1, y0, y1, z;
+ float dz;
+ float x_range, y_range;
+
+ blit->mode = nv50_blit_select_mode(info);
+ blit->color_mask = nv50_blit_derive_color_mask(info);
+ blit->filter = nv50_blit_get_filter(info);
+
+ nv50_blit_select_fp(blit, info);
+ nv50_blitctx_pre_blit(blit);
+
+ nv50_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format);
+ nv50_blit_set_src(blit, src, info->src.level, -1, info->src.format,
+ blit->filter);
+
+ nv50_blitctx_prepare_state(blit);
+
+ nv50_state_validate(nv50, ~0, 36);
+
+ x_range = (float)info->src.box.width / (float)info->dst.box.width;
+ y_range = (float)info->src.box.height / (float)info->dst.box.height;
+
+ x0 = (float)info->src.box.x - x_range * (float)info->dst.box.x;
+ y0 = (float)info->src.box.y - y_range * (float)info->dst.box.y;
+
+ x1 = x0 + 16384.0f * x_range;
+ y1 = y0 + 16384.0f * y_range;
+
+ x0 *= (float)(1 << nv50_miptree(src)->ms_x);
+ x1 *= (float)(1 << nv50_miptree(src)->ms_x);
+ y0 *= (float)(1 << nv50_miptree(src)->ms_y);
+ y1 *= (float)(1 << nv50_miptree(src)->ms_y);
+
+ if (src->last_level > 0) {
+ /* If there are mip maps, GPU always assumes normalized coordinates. */
+ const unsigned l = info->src.level;
+ const float fh = u_minify(src->width0 << nv50_miptree(src)->ms_x, l);
+ const float fv = u_minify(src->height0 << nv50_miptree(src)->ms_y, l);
+ x0 /= fh;
+ x1 /= fh;
+ y0 /= fv;
+ y1 /= fv;
+ }
+
+ /* XXX: multiply by 6 for cube arrays ? */
+ dz = (float)info->src.box.depth / (float)info->dst.box.depth;
+ z = (float)info->src.box.z;
+ if (nv50_miptree(src)->layout_3d)
+ z += 0.5f * dz;
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+ PUSH_DATA (push, 0x1);
+
+ /* Draw a large triangle in screen coordinates covering the whole
+ * render target, with scissors defining the destination region.
+ * The vertex is supplied with non-normalized texture coordinates
+ * arranged in a way to yield the desired offset and scale.
+ */
+
+ minx = info->dst.box.x;
+ maxx = info->dst.box.x + info->dst.box.width;
+ miny = info->dst.box.y;
+ maxy = info->dst.box.y + info->dst.box.height;
+ if (info->scissor_enable) {
+ minx = MAX2(minx, info->scissor.minx);
+ maxx = MIN2(maxx, info->scissor.maxx);
+ miny = MAX2(miny, info->scissor.miny);
+ maxy = MIN2(maxy, info->scissor.maxy);
+ }
+ BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2);
+ PUSH_DATA (push, (maxx << 16) | minx);
+ PUSH_DATA (push, (maxy << 16) | miny);
+
+ for (i = 0; i < info->dst.box.depth; ++i, z += dz) {
+ if (info->dst.box.z + i) {
+ BEGIN_NV04(push, NV50_3D(LAYER), 1);
+ PUSH_DATA (push, info->dst.box.z + i);
+ }
+ PUSH_SPACE(push, 32);
+ BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES);
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3);
+ PUSH_DATAf(push, x0);
+ PUSH_DATAf(push, y0);
+ PUSH_DATAf(push, z);
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3);
+ PUSH_DATAf(push, x1);
+ PUSH_DATAf(push, y0);
+ PUSH_DATAf(push, z);
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2);
+ PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_x);
+ PUSH_DATAf(push, 0.0f);
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3);
+ PUSH_DATAf(push, x0);
+ PUSH_DATAf(push, y1);
+ PUSH_DATAf(push, z);
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_y);
+ BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
+ PUSH_DATA (push, 0);
+ }
+ if (info->dst.box.z + info->dst.box.depth - 1) {
+ BEGIN_NV04(push, NV50_3D(LAYER), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ /* re-enable normally constant state */
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
+ PUSH_DATA (push, 1);
+
+ nv50_blitctx_post_blit(blit);
+}
+
+static void
+nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_miptree *dst = nv50_miptree(info->dst.resource);
+ struct nv50_miptree *src = nv50_miptree(info->src.resource);
+ const int32_t srcx_adj = info->src.box.width < 0 ? -1 : 0;
+ const int32_t srcy_adj = info->src.box.height < 0 ? -1 : 0;
+ const int32_t dz = info->dst.box.z;
+ const int32_t sz = info->src.box.z;
+ uint32_t dstw, dsth;
+ int32_t dstx, dsty;
+ int64_t srcx, srcy;
+ int64_t du_dx, dv_dy;
+ int i;
+ uint32_t mode;
+ uint32_t mask = nv50_blit_eng2d_get_mask(info);
+ boolean b;
+
+ mode = nv50_blit_get_filter(info) ?
+ NV50_2D_BLIT_CONTROL_FILTER_BILINEAR :
+ NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE;
+ mode |= (src->base.base.nr_samples > dst->base.base.nr_samples) ?
+ NV50_2D_BLIT_CONTROL_ORIGIN_CORNER : NV50_2D_BLIT_CONTROL_ORIGIN_CENTER;
+
+ du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width;
+ dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height;
+
+ b = info->dst.format == info->src.format;
+ nv50_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b);
+ nv50_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b);
+
+ if (info->scissor_enable) {
+ BEGIN_NV04(push, NV50_2D(CLIP_X), 5);
+ PUSH_DATA (push, info->scissor.minx << dst->ms_x);
+ PUSH_DATA (push, info->scissor.miny << dst->ms_y);
+ PUSH_DATA (push, (info->scissor.maxx - info->scissor.minx) << dst->ms_x);
+ PUSH_DATA (push, (info->scissor.maxy - info->scissor.miny) << dst->ms_y);
+ PUSH_DATA (push, 1); /* enable */
+ }
+
+ if (mask != 0xffffffff) {
+ BEGIN_NV04(push, NV50_2D(ROP), 1);
+ PUSH_DATA (push, 0xca); /* DPSDxax */
+ BEGIN_NV04(push, NV50_2D(PATTERN_COLOR_FORMAT), 1);
+ PUSH_DATA (push, NV50_2D_PATTERN_COLOR_FORMAT_32BPP);
+ BEGIN_NV04(push, NV50_2D(PATTERN_COLOR(0)), 4);
+ PUSH_DATA (push, 0x00000000);
+ PUSH_DATA (push, mask);
+ PUSH_DATA (push, 0xffffffff);
+ PUSH_DATA (push, 0xffffffff);
+ BEGIN_NV04(push, NV50_2D(OPERATION), 1);
+ PUSH_DATA (push, NV50_2D_OPERATION_ROP);
+ } else
+ if (info->src.format != info->dst.format) {
+ if (info->src.format == PIPE_FORMAT_R8_UNORM ||
+ info->src.format == PIPE_FORMAT_R16_UNORM ||
+ info->src.format == PIPE_FORMAT_R16_FLOAT ||
+ info->src.format == PIPE_FORMAT_R32_FLOAT) {
+ mask = 0xffff0000; /* also makes condition for OPERATION reset true */
+ BEGIN_NV04(push, NV50_2D(BETA4), 2);
+ PUSH_DATA (push, mask);
+ PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY_PREMULT);
+ }
+ }
+
+ if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) {
+ /* ms_x is always >= ms_y */
+ du_dx <<= src->ms_x - dst->ms_x;
+ dv_dy <<= src->ms_y - dst->ms_y;
+ } else {
+ du_dx >>= dst->ms_x - src->ms_x;
+ dv_dy >>= dst->ms_y - src->ms_y;
+ }
+
+ srcx = (int64_t)(info->src.box.x + srcx_adj) << (src->ms_x + 32);
+ srcy = (int64_t)(info->src.box.y + srcy_adj) << (src->ms_y + 32);
+
+ if (src->base.base.nr_samples > dst->base.base.nr_samples) {
+ /* center src coorinates for proper MS resolve filtering */
+ srcx += (int64_t)src->ms_x << 32;
+ srcy += (int64_t)src->ms_y << 32;
+ }
+
+ dstx = info->dst.box.x << dst->ms_x;
+ dsty = info->dst.box.y << dst->ms_y;
+
+ dstw = info->dst.box.width << dst->ms_x;
+ dsth = info->dst.box.height << dst->ms_y;
+
+ if (dstx < 0) {
+ dstw += dstx;
+ srcx -= du_dx * dstx;
+ dstx = 0;
+ }
+ if (dsty < 0) {
+ dsth += dsty;
+ srcy -= dv_dy * dsty;
+ dsty = 0;
+ }
+
+ BEGIN_NV04(push, NV50_2D(BLIT_CONTROL), 1);
+ PUSH_DATA (push, mode);
+ BEGIN_NV04(push, NV50_2D(BLIT_DST_X), 4);
+ PUSH_DATA (push, dstx);
+ PUSH_DATA (push, dsty);
+ PUSH_DATA (push, dstw);
+ PUSH_DATA (push, dsth);
+ BEGIN_NV04(push, NV50_2D(BLIT_DU_DX_FRACT), 4);
+ PUSH_DATA (push, du_dx);
+ PUSH_DATA (push, du_dx >> 32);
+ PUSH_DATA (push, dv_dy);
+ PUSH_DATA (push, dv_dy >> 32);
+
+ BCTX_REFN(nv50->bufctx, 2D, &dst->base, WR);
+ BCTX_REFN(nv50->bufctx, 2D, &src->base, RD);
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx);
+ if (nouveau_pushbuf_validate(nv50->base.pushbuf))
+ return;
+
+ for (i = 0; i < info->dst.box.depth; ++i) {
+ if (i > 0) {
+ /* no scaling in z-direction possible for eng2d blits */
+ if (dst->layout_3d) {
+ BEGIN_NV04(push, NV50_2D(DST_LAYER), 1);
+ PUSH_DATA (push, info->dst.box.z + i);
+ } else {
+ const unsigned z = info->dst.box.z + i;
+ BEGIN_NV04(push, NV50_2D(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, dst->base.address + z * dst->layer_stride);
+ PUSH_DATA (push, dst->base.address + z * dst->layer_stride);
+ }
+ if (src->layout_3d) {
+ /* not possible because of depth tiling */
+ assert(0);
+ } else {
+ const unsigned z = info->src.box.z + i;
+ BEGIN_NV04(push, NV50_2D(SRC_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, src->base.address + z * src->layer_stride);
+ PUSH_DATA (push, src->base.address + z * src->layer_stride);
+ }
+ BEGIN_NV04(push, NV50_2D(BLIT_SRC_Y_INT), 1); /* trigger */
+ PUSH_DATA (push, srcy >> 32);
+ } else {
+ BEGIN_NV04(push, NV50_2D(BLIT_SRC_X_FRACT), 4);
+ PUSH_DATA (push, srcx);
+ PUSH_DATA (push, srcx >> 32);
+ PUSH_DATA (push, srcy);
+ PUSH_DATA (push, srcy >> 32);
+ }
+ }
+ nv50_bufctx_fence(nv50->bufctx, FALSE);
+
+ nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D);
+
+ if (info->scissor_enable) {
+ BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
+ if (mask != 0xffffffff) {
+ BEGIN_NV04(push, NV50_2D(OPERATION), 1);
+ PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY);
+ }
+}
+
+static void
+nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ boolean eng3d = FALSE;
+
+ if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
+ if (!(info->mask & PIPE_MASK_ZS))
+ return;
+ if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT ||
+ info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ eng3d = TRUE;
+ if (info->filter != PIPE_TEX_FILTER_NEAREST)
+ eng3d = TRUE;
+ } else {
+ if (!(info->mask & PIPE_MASK_RGBA))
+ return;
+ if (info->mask != PIPE_MASK_RGBA)
+ eng3d = TRUE;
+ }
+
+ if (nv50_miptree(info->src.resource)->layout_3d) {
+ eng3d = TRUE;
+ } else
+ if (info->src.box.depth != info->dst.box.depth) {
+ eng3d = TRUE;
+ debug_printf("blit: cannot filter array or cube textures in z direction");
+ }
+
+ if (!eng3d && info->dst.format != info->src.format) {
+ if (!nv50_2d_dst_format_faithful(info->dst.format) ||
+ !nv50_2d_src_format_faithful(info->src.format)) {
+ eng3d = TRUE;
+ } else
+ if (!nv50_2d_src_format_faithful(info->src.format)) {
+ if (!util_format_is_luminance(info->src.format)) {
+ if (util_format_is_intensity(info->src.format))
+ eng3d = TRUE;
+ else
+ if (!nv50_2d_dst_format_ops_supported(info->dst.format))
+ eng3d = TRUE;
+ else
+ eng3d = !nv50_2d_format_supported(info->src.format);
+ }
+ } else
+ if (util_format_is_luminance_alpha(info->src.format))
+ eng3d = TRUE;
+ }
+
+ if (info->src.resource->nr_samples == 8 &&
+ info->dst.resource->nr_samples <= 1)
+ eng3d = TRUE;
+
+ /* FIXME: can't make this work with eng2d anymore */
+ if (info->src.resource->nr_samples > 1 ||
+ info->dst.resource->nr_samples > 1)
+ eng3d = TRUE;
+
+ /* FIXME: find correct src coordinate adjustments */
+ if ((info->src.box.width != info->dst.box.width &&
+ info->src.box.width != -info->dst.box.width) ||
+ (info->src.box.height != info->dst.box.height &&
+ info->src.box.height != -info->dst.box.height))
+ eng3d = TRUE;
+
+ if (!eng3d)
+ nv50_blit_eng2d(nv50, info);
+ else
+ nv50_blit_3d(nv50, info);
+}
+
+boolean
+nv50_blitter_create(struct nv50_screen *screen)
+{
+ screen->blitter = CALLOC_STRUCT(nv50_blitter);
+ if (!screen->blitter) {
+ NOUVEAU_ERR("failed to allocate blitter struct\n");
+ return FALSE;
+ }
+
+ pipe_mutex_init(screen->blitter->mutex);
+
+ nv50_blitter_make_vp(screen->blitter);
+ nv50_blitter_make_sampler(screen->blitter);
+
+ return TRUE;
+}
+
+void
+nv50_blitter_destroy(struct nv50_screen *screen)
+{
+ struct nv50_blitter *blitter = screen->blitter;
+ unsigned i, m;
+
+ for (i = 0; i < NV50_BLIT_MAX_TEXTURE_TYPES; ++i) {
+ for (m = 0; m < NV50_BLIT_MODES; ++m) {
+ struct nv50_program *prog = blitter->fp[i][m];
+ if (prog) {
+ nv50_program_destroy(NULL, prog);
+ FREE((void *)prog->pipe.tokens);
+ FREE(prog);
+ }
+ }
+ }
+
+ FREE(blitter);
+}
+
+boolean
+nv50_blitctx_create(struct nv50_context *nv50)
+{
+ nv50->blit = CALLOC_STRUCT(nv50_blitctx);
+ if (!nv50->blit) {
+ NOUVEAU_ERR("failed to allocate blit context\n");
+ return FALSE;
+ }
+
+ nv50->blit->nv50 = nv50;
+
+ nv50->blit->rast.pipe.half_pixel_center = 1;
+
+ return TRUE;
+}
+
+void
+nv50_init_surface_functions(struct nv50_context *nv50)
+{
+ struct pipe_context *pipe = &nv50->base.pipe;
+
+ pipe->resource_copy_region = nv50_resource_copy_region;
+ pipe->blit = nv50_blit;
+ pipe->clear_render_target = nv50_clear_render_target;
+ pipe->clear_depth_stencil = nv50_clear_depth_stencil;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
new file mode 100644
index 00000000000..9e512928381
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_resource.h"
+#include "nv50/nv50_texture.xml.h"
+#include "nv50/nv50_defs.xml.h"
+
+#include "util/u_format.h"
+
+#define NV50_TIC_0_SWIZZLE__MASK \
+ (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
+ NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
+
+static INLINE uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int)
+{
+ switch (swz) {
+ case PIPE_SWIZZLE_RED:
+ return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
+ case PIPE_SWIZZLE_GREEN:
+ return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
+ case PIPE_SWIZZLE_BLUE:
+ return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
+ case PIPE_SWIZZLE_ALPHA:
+ return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
+ case PIPE_SWIZZLE_ONE:
+ return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT;
+ case PIPE_SWIZZLE_ZERO:
+ default:
+ return NV50_TIC_MAP_ZERO;
+ }
+}
+
+struct pipe_sampler_view *
+nv50_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ const struct pipe_sampler_view *templ)
+{
+ uint32_t flags = 0;
+
+ if (res->target == PIPE_TEXTURE_RECT || res->target == PIPE_BUFFER)
+ flags |= NV50_TEXVIEW_SCALED_COORDS;
+
+ return nv50_create_texture_view(pipe, res, templ, flags, res->target);
+}
+
+struct pipe_sampler_view *
+nv50_create_texture_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ,
+ uint32_t flags,
+ enum pipe_texture_target target)
+{
+ const struct util_format_description *desc;
+ uint64_t addr;
+ uint32_t *tic;
+ uint32_t swz[4];
+ uint32_t depth;
+ struct nv50_tic_entry *view;
+ struct nv50_miptree *mt = nv50_miptree(texture);
+ boolean tex_int;
+
+ view = MALLOC_STRUCT(nv50_tic_entry);
+ if (!view)
+ return NULL;
+
+ view->pipe = *templ;
+ view->pipe.reference.count = 1;
+ view->pipe.texture = NULL;
+ view->pipe.context = pipe;
+
+ view->id = -1;
+
+ pipe_resource_reference(&view->pipe.texture, texture);
+
+ tic = &view->tic[0];
+
+ desc = util_format_description(view->pipe.format);
+
+ /* TIC[0] */
+
+ tic[0] = nv50_format_table[view->pipe.format].tic;
+
+ tex_int = util_format_is_pure_integer(view->pipe.format);
+
+ swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int);
+ swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int);
+ swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int);
+ swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int);
+ tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
+ (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
+ (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
+ (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
+ (swz[3] << NV50_TIC_0_MAPA__SHIFT);
+
+ addr = mt->base.address;
+
+ if (mt->base.base.target == PIPE_TEXTURE_1D_ARRAY ||
+ mt->base.base.target == PIPE_TEXTURE_2D_ARRAY) {
+ addr += view->pipe.u.tex.first_layer * mt->layer_stride;
+ depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
+ } else {
+ depth = mt->base.base.depth0;
+ }
+
+ tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER;
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
+
+ if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
+ tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
+
+ if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
+ if (target == PIPE_BUFFER) {
+ addr += view->pipe.u.buf.first_element * desc->block.bits / 8;
+ tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER;
+ tic[3] = 0;
+ tic[4] = /* width */
+ view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1;
+ tic[5] = 0;
+ } else {
+ tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT;
+ tic[3] = mt->level[0].pitch;
+ tic[4] = mt->base.base.width0;
+ tic[5] = (1 << 16) | mt->base.base.height0;
+ }
+ tic[6] =
+ tic[7] = 0;
+ tic[1] = addr;
+ tic[2] |= addr >> 32;
+ return &view->pipe;
+ }
+
+ tic[1] = addr;
+ tic[2] |= (addr >> 32) & 0xff;
+
+ tic[2] |=
+ ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
+ ((mt->level[0].tile_mode & 0xf00) << (25 - 8));
+
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ tic[2] |= NV50_TIC_2_TARGET_1D;
+ break;
+ case PIPE_TEXTURE_2D:
+ tic[2] |= NV50_TIC_2_TARGET_2D;
+ break;
+ case PIPE_TEXTURE_RECT:
+ tic[2] |= NV50_TIC_2_TARGET_RECT;
+ break;
+ case PIPE_TEXTURE_3D:
+ tic[2] |= NV50_TIC_2_TARGET_3D;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ depth /= 6;
+ tic[2] |= NV50_TIC_2_TARGET_CUBE;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY;
+ break;
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ depth /= 6;
+ tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
+ break;
+ case PIPE_BUFFER:
+ assert(0); /* should be linear and handled above ! */
+ tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR;
+ break;
+ default:
+ NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target);
+ return FALSE;
+ }
+
+ tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
+
+ tic[4] = (1 << 31) | (mt->base.base.width0 << mt->ms_x);
+
+ tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff;
+ tic[5] |= depth << 16;
+ tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
+
+ tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */
+
+ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+
+ if (unlikely(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS)))
+ if (mt->base.base.last_level)
+ tic[5] &= ~NV50_TIC_5_LAST_LEVEL__MASK;
+
+ return &view->pipe;
+}
+
+static boolean
+nv50_validate_tic(struct nv50_context *nv50, int s)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nouveau_bo *txc = nv50->screen->txc;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nv50->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]);
+ struct nv04_resource *res;
+
+ if (!tic) {
+ BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
+ PUSH_DATA (push, (i << 1) | 0);
+ continue;
+ }
+ res = &nv50_miptree(tic->pipe.texture)->base;
+
+ if (tic->id < 0) {
+ tic->id = nv50_screen_tic_alloc(nv50->screen, tic);
+
+ BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
+ PUSH_DATA (push, 262144);
+ PUSH_DATA (push, 65536);
+ PUSH_DATA (push, 1);
+ PUSH_DATAh(push, txc->offset);
+ PUSH_DATA (push, txc->offset);
+ BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, tic->id * 32);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NI04(push, NV50_2D(SIFC_DATA), 8);
+ PUSH_DATAp(push, &tic->tic[0], 8);
+
+ need_flush = TRUE;
+ } else
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
+ PUSH_DATA (push, 0x20);
+ }
+
+ nv50->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ res->status &= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ BCTX_REFN(nv50->bufctx_3d, TEXTURES, res, RD);
+
+ BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
+ PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1);
+ }
+ for (; i < nv50->state.num_textures[s]; ++i) {
+ BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
+ PUSH_DATA (push, (i << 1) | 0);
+ }
+ nv50->state.num_textures[s] = nv50->num_textures[s];
+
+ return need_flush;
+}
+
+void nv50_validate_textures(struct nv50_context *nv50)
+{
+ boolean need_flush;
+
+ need_flush = nv50_validate_tic(nv50, 0);
+ need_flush |= nv50_validate_tic(nv50, 2);
+
+ if (need_flush) {
+ BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TIC_FLUSH), 1);
+ PUSH_DATA (nv50->base.pushbuf, 0);
+ }
+}
+
+static boolean
+nv50_validate_tsc(struct nv50_context *nv50, int s)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nv50->num_samplers[s]; ++i) {
+ struct nv50_tsc_entry *tsc = nv50_tsc_entry(nv50->samplers[s][i]);
+
+ if (!tsc) {
+ BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
+ PUSH_DATA (push, (i << 4) | 0);
+ continue;
+ }
+ if (tsc->id < 0) {
+ tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc);
+
+ nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc,
+ 65536 + tsc->id * 32,
+ NOUVEAU_BO_VRAM, 32, tsc->tsc);
+ need_flush = TRUE;
+ }
+ nv50->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+ BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
+ PUSH_DATA (push, (tsc->id << 12) | (i << 4) | 1);
+ }
+ for (; i < nv50->state.num_samplers[s]; ++i) {
+ BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
+ PUSH_DATA (push, (i << 4) | 0);
+ }
+ nv50->state.num_samplers[s] = nv50->num_samplers[s];
+
+ return need_flush;
+}
+
+void nv50_validate_samplers(struct nv50_context *nv50)
+{
+ boolean need_flush;
+
+ need_flush = nv50_validate_tsc(nv50, 0);
+ need_flush |= nv50_validate_tsc(nv50, 2);
+
+ if (need_flush) {
+ BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TSC_FLUSH), 1);
+ PUSH_DATA (nv50->base.pushbuf, 0);
+ }
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h
new file mode 100644
index 00000000000..31eab9b5d87
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h
@@ -0,0 +1,306 @@
+#ifndef NV50_TEXTURE_XML
+#define NV50_TEXTURE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/nv50_texture.xml ( 8648 bytes, from 2013-04-13 12:49:11)
+- rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
+- rnndb/nvchipsets.xml ( 3954 bytes, from 2013-03-26 01:26:43)
+- rnndb/nv50_defs.xml ( 16652 bytes, from 2013-04-04 10:57:15)
+
+Copyright (C) 2006-2013 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- imirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin Koƛcielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_TIC_MAP_ZERO 0x00000000
+#define NV50_TIC_MAP_C0 0x00000002
+#define NV50_TIC_MAP_C1 0x00000003
+#define NV50_TIC_MAP_C2 0x00000004
+#define NV50_TIC_MAP_C3 0x00000005
+#define NV50_TIC_MAP_ONE_INT 0x00000006
+#define NV50_TIC_MAP_ONE_FLOAT 0x00000007
+#define NV50_TIC_TYPE_SNORM 0x00000001
+#define NV50_TIC_TYPE_UNORM 0x00000002
+#define NV50_TIC_TYPE_SINT 0x00000003
+#define NV50_TIC_TYPE_UINT 0x00000004
+#define NV50_TIC_TYPE_SSCALED 0x00000005
+#define NV50_TIC_TYPE_USCALED 0x00000006
+#define NV50_TIC_TYPE_FLOAT 0x00000007
+#define NV50_TSC_WRAP_REPEAT 0x00000000
+#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001
+#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002
+#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003
+#define NV50_TSC_WRAP_CLAMP 0x00000004
+#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005
+#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006
+#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007
+#define NV50_TIC__SIZE 0x00000020
+#define NV50_TIC_0 0x00000000
+#define NV50_TIC_0_MAPA__MASK 0x38000000
+#define NV50_TIC_0_MAPA__SHIFT 27
+#define NV50_TIC_0_MAPB__MASK 0x07000000
+#define NV50_TIC_0_MAPB__SHIFT 24
+#define NV50_TIC_0_MAPG__MASK 0x00e00000
+#define NV50_TIC_0_MAPG__SHIFT 21
+#define NV50_TIC_0_MAPR__MASK 0x001c0000
+#define NV50_TIC_0_MAPR__SHIFT 18
+#define NV50_TIC_0_TYPE3__MASK 0x00038000
+#define NV50_TIC_0_TYPE3__SHIFT 15
+#define NV50_TIC_0_TYPE2__MASK 0x00007000
+#define NV50_TIC_0_TYPE2__SHIFT 12
+#define NV50_TIC_0_TYPE1__MASK 0x00000e00
+#define NV50_TIC_0_TYPE1__SHIFT 9
+#define NV50_TIC_0_TYPE0__MASK 0x000001c0
+#define NV50_TIC_0_TYPE0__SHIFT 6
+#define NV50_TIC_0_FMT__MASK 0x0000003f
+#define NV50_TIC_0_FMT__SHIFT 0
+#define NV50_TIC_0_FMT_32_32_32_32 0x00000001
+#define NVC0_TIC_0_FMT_32_32_32 0x00000002
+#define NV50_TIC_0_FMT_16_16_16_16 0x00000003
+#define NV50_TIC_0_FMT_32_32 0x00000004
+#define NV50_TIC_0_FMT_32_8_X24 0x00000005
+#define NV50_TIC_0_FMT_8_8_8_8 0x00000008
+#define NV50_TIC_0_FMT_10_10_10_2 0x00000009
+#define NV50_TIC_0_FMT_16_16 0x0000000c
+#define NV50_TIC_0_FMT_24_8 0x0000000d
+#define NV50_TIC_0_FMT_8_24 0x0000000e
+#define NV50_TIC_0_FMT_32 0x0000000f
+#define NV50_TIC_0_FMT_BPTC_FLOAT 0x00000010
+#define NV50_TIC_0_FMT_BPTC_UFLOAT 0x00000011
+#define NV50_TIC_0_FMT_4_4_4_4 0x00000012
+#define NV50_TIC_0_FMT_1_5_5_5 0x00000013
+#define NV50_TIC_0_FMT_5_5_5_1 0x00000014
+#define NV50_TIC_0_FMT_5_6_5 0x00000015
+#define NV50_TIC_0_FMT_5_5_6 0x00000016
+#define NV50_TIC_0_FMT_BPTC 0x00000017
+#define NV50_TIC_0_FMT_8_8 0x00000018
+#define NV50_TIC_0_FMT_16 0x0000001b
+#define NV50_TIC_0_FMT_8 0x0000001d
+#define NV50_TIC_0_FMT_4_4 0x0000001e
+#define NV50_TIC_0_FMT_BITMAP 0x0000001f
+#define NV50_TIC_0_FMT_9_9_9_E5 0x00000020
+#define NV50_TIC_0_FMT_11_11_10 0x00000021
+#define NV50_TIC_0_FMT_U8_YA8_V8_YB8 0x00000022
+#define NV50_TIC_0_FMT_YA8_U8_YB8_V8 0x00000023
+#define NV50_TIC_0_FMT_DXT1 0x00000024
+#define NV50_TIC_0_FMT_DXT3 0x00000025
+#define NV50_TIC_0_FMT_DXT5 0x00000026
+#define NV50_TIC_0_FMT_RGTC1 0x00000027
+#define NV50_TIC_0_FMT_RGTC2 0x00000028
+#define NV50_TIC_0_FMT_S8_Z24 0x00000029
+#define NV50_TIC_0_FMT_Z24_X8 0x0000002a
+#define NV50_TIC_0_FMT_Z24_S8 0x0000002b
+#define NV50_TIC_0_FMT_Z24_C8_MS4_CS4 0x0000002c
+#define NV50_TIC_0_FMT_Z24_C8_MS8_CS8 0x0000002d
+#define NV50_TIC_0_FMT_Z24_C8_MS4_CS12 0x0000002e
+#define NV50_TIC_0_FMT_Z32 0x0000002f
+#define NV50_TIC_0_FMT_Z32_S8_X24 0x00000030
+#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS4 0x00000031
+#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS8_CS8 0x00000032
+#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS4 0x00000033
+#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS8_CS8 0x00000034
+#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS4 0x00000035
+#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS8_CS8 0x00000036
+#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS12 0x00000037
+#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS12 0x00000038
+#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS12 0x00000039
+#define NV50_TIC_0_FMT_Z16 0x0000003a
+
+#define NV50_TIC_1 0x00000004
+#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff
+#define NV50_TIC_1_OFFSET_LOW__SHIFT 0
+
+#define NV50_TIC_2 0x00000008
+#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff
+#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0
+#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400
+#define NV50_TIC_2_TARGET__MASK 0x0003c000
+#define NV50_TIC_2_TARGET__SHIFT 14
+#define NV50_TIC_2_TARGET_1D 0x00000000
+#define NV50_TIC_2_TARGET_2D 0x00004000
+#define NV50_TIC_2_TARGET_3D 0x00008000
+#define NV50_TIC_2_TARGET_CUBE 0x0000c000
+#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000
+#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000
+#define NV50_TIC_2_TARGET_BUFFER 0x00018000
+#define NV50_TIC_2_TARGET_RECT 0x0001c000
+#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000
+#define NV50_TIC_2_LINEAR 0x00040000
+#define NV50_TIC_2_TILE_MODE_X__MASK 0x00380000
+#define NV50_TIC_2_TILE_MODE_X__SHIFT 19
+#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000
+#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22
+#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000
+#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25
+#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000
+#define NV50_TIC_2_2D_UNK0258__SHIFT 28
+#define NV50_TIC_2_NO_BORDER 0x40000000
+#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000
+
+#define NV50_TIC_3 0x0000000c
+#define NV50_TIC_3_PITCH__MASK 0xffffffff
+#define NV50_TIC_3_PITCH__SHIFT 0
+
+#define NV50_TIC_4 0x00000010
+#define NV50_TIC_4_WIDTH__MASK 0xffffffff
+#define NV50_TIC_4_WIDTH__SHIFT 0
+
+#define NV50_TIC_5 0x00000014
+#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000
+#define NV50_TIC_5_LAST_LEVEL__SHIFT 28
+#define NV50_TIC_5_DEPTH__MASK 0x0fff0000
+#define NV50_TIC_5_DEPTH__SHIFT 16
+#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff
+#define NV50_TIC_5_HEIGHT__SHIFT 0
+
+#define NV50_TIC_7 0x0000001c
+#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f
+#define NV50_TIC_7_BASE_LEVEL__SHIFT 0
+#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0
+#define NV50_TIC_7_MAX_LEVEL__SHIFT 4
+#define NV50_TIC_7_MS_MODE__MASK 0x0000f000
+#define NV50_TIC_7_MS_MODE__SHIFT 12
+#define NV50_TIC_7_MS_MODE_MS1 0x00000000
+#define NV50_TIC_7_MS_MODE_MS2 0x00001000
+#define NV50_TIC_7_MS_MODE_MS4 0x00002000
+#define NV50_TIC_7_MS_MODE_MS8 0x00003000
+#define NVA3_TIC_7_MS_MODE_MS8_ALT 0x00004000
+#define NVA3_TIC_7_MS_MODE_MS2_ALT 0x00005000
+#define NVC0_TIC_7_MS_MODE_UNK6 0x00006000
+#define NV50_TIC_7_MS_MODE_MS4_CS4 0x00008000
+#define NV50_TIC_7_MS_MODE_MS4_CS12 0x00009000
+#define NV50_TIC_7_MS_MODE_MS8_CS8 0x0000a000
+#define NVC0_TIC_7_MS_MODE_MS8_CS24 0x0000b000
+
+#define NV50_TSC__SIZE 0x00000020
+#define NV50_TSC_0 0x00000000
+#define NV50_TSC_0_WRAPS__MASK 0x00000007
+#define NV50_TSC_0_WRAPS__SHIFT 0
+#define NV50_TSC_0_WRAPT__MASK 0x00000038
+#define NV50_TSC_0_WRAPT__SHIFT 3
+#define NV50_TSC_0_WRAPR__MASK 0x000001c0
+#define NV50_TSC_0_WRAPR__SHIFT 6
+#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200
+#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00
+#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10
+#define NV50_TSC_0_SRGB_CONVERSION_ALLOWED 0x00002000
+#define NV50_TSC_0_BOX_S__MASK 0x0001c000
+#define NV50_TSC_0_BOX_S__SHIFT 14
+#define NV50_TSC_0_BOX_T__MASK 0x000e0000
+#define NV50_TSC_0_BOX_T__SHIFT 17
+#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000
+#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20
+
+#define NV50_TSC_1 0x00000004
+#define NV50_TSC_1_UNKN_ANISO_15 0x10000000
+#define NV50_TSC_1_UNKN_ANISO_35 0x18000000
+#define NV50_TSC_1_MAGF__MASK 0x00000003
+#define NV50_TSC_1_MAGF__SHIFT 0
+#define NV50_TSC_1_MAGF_NEAREST 0x00000001
+#define NV50_TSC_1_MAGF_LINEAR 0x00000002
+#define NV50_TSC_1_MINF__MASK 0x00000030
+#define NV50_TSC_1_MINF__SHIFT 4
+#define NV50_TSC_1_MINF_NEAREST 0x00000010
+#define NV50_TSC_1_MINF_LINEAR 0x00000020
+#define NV50_TSC_1_MIPF__MASK 0x000000c0
+#define NV50_TSC_1_MIPF__SHIFT 6
+#define NV50_TSC_1_MIPF_NONE 0x00000040
+#define NV50_TSC_1_MIPF_NEAREST 0x00000080
+#define NV50_TSC_1_MIPF_LINEAR 0x000000c0
+#define NVE4_TSC_1_CUBE_SEAMLESS 0x00000200
+#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000
+#define NV50_TSC_1_LOD_BIAS__SHIFT 12
+#define NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS 0x02000000
+
+#define NV50_TSC_2 0x00000008
+#define NV50_TSC_2_MIN_LOD__MASK 0x00000fff
+#define NV50_TSC_2_MIN_LOD__SHIFT 0
+#define NV50_TSC_2_MAX_LOD__MASK 0x00fff000
+#define NV50_TSC_2_MAX_LOD__SHIFT 12
+#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__MASK 0xff000000
+#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__SHIFT 24
+
+#define NV50_TSC_3 0x0000000c
+#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__MASK 0x000ff000
+#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__SHIFT 12
+#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__MASK 0x0ff00000
+#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__SHIFT 20
+
+#define NV50_TSC_4 0x00000010
+#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff
+#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0
+
+#define NV50_TSC_5 0x00000014
+#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff
+#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0
+
+#define NV50_TSC_6 0x00000018
+#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff
+#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0
+
+#define NV50_TSC_7 0x0000001c
+#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff
+#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0
+
+
+#endif /* NV50_TEXTURE_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
new file mode 100644
index 00000000000..a9906829fec
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -0,0 +1,412 @@
+
+#include "util/u_format.h"
+
+#include "nv50/nv50_context.h"
+
+#include "nv50/nv50_defs.xml.h"
+
+struct nv50_transfer {
+ struct pipe_transfer base;
+ struct nv50_m2mf_rect rect[2];
+ uint32_t nblocksx;
+ uint32_t nblocksy;
+};
+
+void
+nv50_m2mf_rect_setup(struct nv50_m2mf_rect *rect,
+ struct pipe_resource *restrict res, unsigned l,
+ unsigned x, unsigned y, unsigned z)
+{
+ struct nv50_miptree *mt = nv50_miptree(res);
+ const unsigned w = u_minify(res->width0, l);
+ const unsigned h = u_minify(res->height0, l);
+
+ rect->bo = mt->base.bo;
+ rect->domain = mt->base.domain;
+ rect->base = mt->level[l].offset;
+ rect->pitch = mt->level[l].pitch;
+ if (util_format_is_plain(res->format)) {
+ rect->width = w << mt->ms_x;
+ rect->height = h << mt->ms_y;
+ rect->x = x << mt->ms_x;
+ rect->y = y << mt->ms_y;
+ } else {
+ rect->width = util_format_get_nblocksx(res->format, w);
+ rect->height = util_format_get_nblocksy(res->format, h);
+ rect->x = util_format_get_nblocksx(res->format, x);
+ rect->y = util_format_get_nblocksy(res->format, y);
+ }
+ rect->tile_mode = mt->level[l].tile_mode;
+ rect->cpp = util_format_get_blocksize(res->format);
+
+ if (mt->layout_3d) {
+ rect->z = z;
+ rect->depth = u_minify(res->depth0, l);
+ } else {
+ rect->base += z * mt->layer_stride;
+ rect->z = 0;
+ rect->depth = 1;
+ }
+}
+
+void
+nv50_m2mf_transfer_rect(struct nv50_context *nv50,
+ const struct nv50_m2mf_rect *dst,
+ const struct nv50_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nouveau_bufctx *bctx = nv50->bufctx;
+ const int cpp = dst->cpp;
+ uint32_t src_ofst = src->base;
+ uint32_t dst_ofst = dst->base;
+ uint32_t height = nblocksy;
+ uint32_t sy = src->y;
+ uint32_t dy = dst->y;
+
+ assert(dst->cpp == src->cpp);
+
+ nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ if (nouveau_bo_memtype(src->bo)) {
+ BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 6);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, src->tile_mode);
+ PUSH_DATA (push, src->width * cpp);
+ PUSH_DATA (push, src->height);
+ PUSH_DATA (push, src->depth);
+ PUSH_DATA (push, src->z);
+ } else {
+ src_ofst += src->y * src->pitch + src->x * cpp;
+
+ BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_PITCH_IN), 1);
+ PUSH_DATA (push, src->pitch);
+ }
+
+ if (nouveau_bo_memtype(dst->bo)) {
+ BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 6);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, dst->tile_mode);
+ PUSH_DATA (push, dst->width * cpp);
+ PUSH_DATA (push, dst->height);
+ PUSH_DATA (push, dst->depth);
+ PUSH_DATA (push, dst->z);
+ } else {
+ dst_ofst += dst->y * dst->pitch + dst->x * cpp;
+
+ BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_PITCH_OUT), 1);
+ PUSH_DATA (push, dst->pitch);
+ }
+
+ while (height) {
+ int line_count = height > 2047 ? 2047 : height;
+
+ BEGIN_NV04(push, NV50_M2MF(OFFSET_IN_HIGH), 2);
+ PUSH_DATAh(push, src->bo->offset + src_ofst);
+ PUSH_DATAh(push, dst->bo->offset + dst_ofst);
+
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_OFFSET_IN), 2);
+ PUSH_DATA (push, src->bo->offset + src_ofst);
+ PUSH_DATA (push, dst->bo->offset + dst_ofst);
+
+ if (nouveau_bo_memtype(src->bo)) {
+ BEGIN_NV04(push, NV50_M2MF(TILING_POSITION_IN), 1);
+ PUSH_DATA (push, (sy << 16) | (src->x * cpp));
+ } else {
+ src_ofst += line_count * src->pitch;
+ }
+ if (nouveau_bo_memtype(dst->bo)) {
+ BEGIN_NV04(push, NV50_M2MF(TILING_POSITION_OUT), 1);
+ PUSH_DATA (push, (dy << 16) | (dst->x * cpp));
+ } else {
+ dst_ofst += line_count * dst->pitch;
+ }
+
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_LINE_LENGTH_IN), 4);
+ PUSH_DATA (push, nblocksx * cpp);
+ PUSH_DATA (push, line_count);
+ PUSH_DATA (push, (1 << 8) | (1 << 0));
+ PUSH_DATA (push, 0);
+
+ height -= line_count;
+ sy += line_count;
+ dy += line_count;
+ }
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
+void
+nv50_sifc_linear_u8(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data)
+{
+ struct nv50_context *nv50 = nv50_context(&nv->pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ uint32_t *src = (uint32_t *)data;
+ unsigned count = (size + 3) / 4;
+ unsigned xcoord = offset & 0xff;
+
+ nouveau_bufctx_refn(nv50->bufctx, 0, dst, domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nv50->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ offset &= ~0xff;
+
+ BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
+ PUSH_DATA (push, 262144);
+ PUSH_DATA (push, 65536);
+ PUSH_DATA (push, 1);
+ PUSH_DATAh(push, dst->offset + offset);
+ PUSH_DATA (push, dst->offset + offset);
+ BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
+ PUSH_DATA (push, size);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, xcoord);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ while (count) {
+ unsigned nr;
+
+ if (!PUSH_SPACE(push, 16))
+ break;
+ nr = PUSH_AVAIL(push);
+ assert(nr >= 16);
+ nr = MIN2(count, nr - 1);
+ nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+
+ BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
+ PUSH_DATAp(push, src, nr);
+
+ src += nr;
+ count -= nr;
+ }
+
+ nouveau_bufctx_reset(nv50->bufctx, 0);
+}
+
+void
+nv50_m2mf_copy_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size)
+{
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ struct nouveau_bufctx *bctx = nv50_context(&nv->pipe)->bufctx;
+
+ nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 1);
+ PUSH_DATA (push, 1);
+
+ while (size) {
+ unsigned bytes = MIN2(size, 1 << 17);
+
+ BEGIN_NV04(push, NV50_M2MF(OFFSET_IN_HIGH), 2);
+ PUSH_DATAh(push, src->offset + srcoff);
+ PUSH_DATAh(push, dst->offset + dstoff);
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_OFFSET_IN), 2);
+ PUSH_DATA (push, src->offset + srcoff);
+ PUSH_DATA (push, dst->offset + dstoff);
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_LINE_LENGTH_IN), 4);
+ PUSH_DATA (push, bytes);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, (1 << 8) | (1 << 0));
+ PUSH_DATA (push, 0);
+
+ srcoff += bytes;
+ dstoff += bytes;
+ size -= bytes;
+ }
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
+void *
+nv50_miptree_transfer_map(struct pipe_context *pctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer)
+{
+ struct nv50_screen *screen = nv50_screen(pctx->screen);
+ struct nv50_context *nv50 = nv50_context(pctx);
+ struct nouveau_device *dev = nv50->screen->base.device;
+ const struct nv50_miptree *mt = nv50_miptree(res);
+ struct nv50_transfer *tx;
+ uint32_t size;
+ int ret;
+ unsigned flags = 0;
+
+ if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+ return NULL;
+
+ tx = CALLOC_STRUCT(nv50_transfer);
+ if (!tx)
+ return NULL;
+
+ pipe_resource_reference(&tx->base.resource, res);
+
+ tx->base.level = level;
+ tx->base.usage = usage;
+ tx->base.box = *box;
+
+ if (util_format_is_plain(res->format)) {
+ tx->nblocksx = box->width << mt->ms_x;
+ tx->nblocksy = box->height << mt->ms_x;
+ } else {
+ tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
+ tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
+ }
+
+ tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);
+ tx->base.layer_stride = tx->nblocksy * tx->base.stride;
+
+ nv50_m2mf_rect_setup(&tx->rect[0], res, level, box->x, box->y, box->z);
+
+ size = tx->base.layer_stride;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
+ size * tx->base.box.depth, NULL, &tx->rect[1].bo);
+ if (ret) {
+ FREE(tx);
+ return NULL;
+ }
+
+ tx->rect[1].cpp = tx->rect[0].cpp;
+ tx->rect[1].width = tx->nblocksx;
+ tx->rect[1].height = tx->nblocksy;
+ tx->rect[1].depth = 1;
+ tx->rect[1].pitch = tx->base.stride;
+ tx->rect[1].domain = NOUVEAU_BO_GART;
+
+ if (usage & PIPE_TRANSFER_READ) {
+ unsigned base = tx->rect[0].base;
+ unsigned z = tx->rect[0].z;
+ unsigned i;
+ for (i = 0; i < box->depth; ++i) {
+ nv50_m2mf_transfer_rect(nv50, &tx->rect[1], &tx->rect[0],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += size;
+ }
+ tx->rect[0].z = z;
+ tx->rect[0].base = base;
+ tx->rect[1].base = 0;
+ }
+
+ if (tx->rect[1].bo->map) {
+ *ptransfer = &tx->base;
+ return tx->rect[1].bo->map;
+ }
+
+ if (usage & PIPE_TRANSFER_READ)
+ flags = NOUVEAU_BO_RD;
+ if (usage & PIPE_TRANSFER_WRITE)
+ flags |= NOUVEAU_BO_WR;
+
+ ret = nouveau_bo_map(tx->rect[1].bo, flags, screen->base.client);
+ if (ret) {
+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
+ FREE(tx);
+ return NULL;
+ }
+
+ *ptransfer = &tx->base;
+ return tx->rect[1].bo->map;
+}
+
+void
+nv50_miptree_transfer_unmap(struct pipe_context *pctx,
+ struct pipe_transfer *transfer)
+{
+ struct nv50_context *nv50 = nv50_context(pctx);
+ struct nv50_transfer *tx = (struct nv50_transfer *)transfer;
+ struct nv50_miptree *mt = nv50_miptree(tx->base.resource);
+ unsigned i;
+
+ if (tx->base.usage & PIPE_TRANSFER_WRITE) {
+ for (i = 0; i < tx->base.box.depth; ++i) {
+ nv50_m2mf_transfer_rect(nv50, &tx->rect[0], &tx->rect[1],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += tx->nblocksy * tx->base.stride;
+ }
+ }
+
+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
+ pipe_resource_reference(&transfer->resource, NULL);
+
+ FREE(tx);
+}
+
+void
+nv50_cb_push(struct nouveau_context *nv,
+ struct nouveau_bo *bo, unsigned domain,
+ unsigned base, unsigned size,
+ unsigned offset, unsigned words, const uint32_t *data)
+{
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ struct nouveau_bufctx *bctx = nv50_context(&nv->pipe)->bufctx;
+
+ assert(!(offset & 3));
+ size = align(size, 0x100);
+
+ nouveau_bufctx_refn(bctx, 0, bo, NOUVEAU_BO_WR | domain);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ while (words) {
+ unsigned nr;
+
+ nr = PUSH_AVAIL(push);
+ nr = MIN2(nr - 7, words);
+ nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+
+ BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, bo->offset + base);
+ PUSH_DATA (push, bo->offset + base);
+ PUSH_DATA (push, (NV50_CB_TMP << 16) | (size & 0xffff));
+ BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
+ PUSH_DATA (push, (offset << 6) | NV50_CB_TMP);
+ BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
+ PUSH_DATAp(push, data, nr);
+
+ words -= nr;
+ data += nr;
+ offset += nr * 4;
+ }
+
+ nouveau_bufctx_reset(bctx, 0);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.h b/src/gallium/drivers/nouveau/nv50/nv50_transfer.h
new file mode 100644
index 00000000000..c58cb0008df
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.h
@@ -0,0 +1,27 @@
+
+#ifndef __NV50_TRANSFER_H__
+#define __NV50_TRANSFER_H__
+
+#include "pipe/p_state.h"
+
+struct nv50_m2mf_rect {
+ struct nouveau_bo *bo;
+ uint32_t base;
+ unsigned domain;
+ uint32_t pitch;
+ uint32_t width;
+ uint32_t x;
+ uint32_t height;
+ uint32_t y;
+ uint16_t depth;
+ uint16_t z;
+ uint16_t tile_mode;
+ uint16_t cpp;
+};
+
+void
+nv50_m2mf_rect_setup(struct nv50_m2mf_rect *rect,
+ struct pipe_resource *restrict res, unsigned l,
+ unsigned x, unsigned y, unsigned z);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
new file mode 100644
index 00000000000..c6162b5f415
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -0,0 +1,820 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_resource.h"
+
+#include "nv50/nv50_3d.xml.h"
+
+void
+nv50_vertex_state_delete(struct pipe_context *pipe,
+ void *hwcso)
+{
+ struct nv50_vertex_stateobj *so = hwcso;
+
+ if (so->translate)
+ so->translate->release(so->translate);
+ FREE(hwcso);
+}
+
+void *
+nv50_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct nv50_vertex_stateobj *so;
+ struct translate_key transkey;
+ unsigned i;
+
+ so = MALLOC(sizeof(*so) +
+ num_elements * sizeof(struct nv50_vertex_element));
+ if (!so)
+ return NULL;
+ so->num_elements = num_elements;
+ so->instance_elts = 0;
+ so->instance_bufs = 0;
+ so->need_conversion = FALSE;
+
+ memset(so->vb_access_size, 0, sizeof(so->vb_access_size));
+
+ for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
+ so->min_instance_div[i] = 0xffffffff;
+
+ transkey.nr_elements = 0;
+ transkey.output_stride = 0;
+
+ for (i = 0; i < num_elements; ++i) {
+ const struct pipe_vertex_element *ve = &elements[i];
+ const unsigned vbi = ve->vertex_buffer_index;
+ unsigned size;
+ enum pipe_format fmt = ve->src_format;
+
+ so->element[i].pipe = elements[i];
+ so->element[i].state = nv50_format_table[fmt].vtx;
+
+ if (!so->element[i].state) {
+ switch (util_format_get_nr_components(fmt)) {
+ case 1: fmt = PIPE_FORMAT_R32_FLOAT; break;
+ case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break;
+ case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break;
+ case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break;
+ default:
+ assert(0);
+ FREE(so);
+ return NULL;
+ }
+ so->element[i].state = nv50_format_table[fmt].vtx;
+ so->need_conversion = TRUE;
+ }
+ so->element[i].state |= i;
+
+ size = util_format_get_blocksize(fmt);
+ if (so->vb_access_size[vbi] < (ve->src_offset + size))
+ so->vb_access_size[vbi] = ve->src_offset + size;
+
+ if (1) {
+ unsigned j = transkey.nr_elements++;
+
+ transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;
+ transkey.element[j].input_format = ve->src_format;
+ transkey.element[j].input_buffer = vbi;
+ transkey.element[j].input_offset = ve->src_offset;
+ transkey.element[j].instance_divisor = ve->instance_divisor;
+
+ transkey.element[j].output_format = fmt;
+ transkey.element[j].output_offset = transkey.output_stride;
+ transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3;
+
+ if (unlikely(ve->instance_divisor)) {
+ so->instance_elts |= 1 << i;
+ so->instance_bufs |= 1 << vbi;
+ if (ve->instance_divisor < so->min_instance_div[vbi])
+ so->min_instance_div[vbi] = ve->instance_divisor;
+ }
+ }
+ }
+
+ so->translate = translate_create(&transkey);
+ so->vertex_size = transkey.output_stride / 4;
+ so->packet_vertex_limit = NV04_PFIFO_MAX_PACKET_LEN /
+ MAX2(so->vertex_size, 1);
+
+ return so;
+}
+
+#define NV50_3D_VERTEX_ATTRIB_INACTIVE \
+ NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT | \
+ NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 | \
+ NV50_3D_VERTEX_ARRAY_ATTRIB_CONST
+
+static void
+nv50_emit_vtxattr(struct nv50_context *nv50, struct pipe_vertex_buffer *vb,
+ struct pipe_vertex_element *ve, unsigned attr)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ const void *data = (const uint8_t *)vb->user_buffer + ve->src_offset;
+ float v[4];
+ const unsigned nc = util_format_get_nr_components(ve->src_format);
+ const struct util_format_description *desc =
+ util_format_description(ve->src_format);
+
+ assert(vb->user_buffer);
+
+ if (desc->channel[0].pure_integer) {
+ if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ desc->unpack_rgba_sint((int32_t *)v, 0, data, 0, 1, 1);
+ } else {
+ desc->unpack_rgba_uint((uint32_t *)v, 0, data, 0, 1, 1);
+ }
+ } else {
+ desc->unpack_rgba_float(v, 0, data, 0, 1, 1);
+ }
+
+ switch (nc) {
+ case 4:
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_4F_X(attr)), 4);
+ PUSH_DATAf(push, v[0]);
+ PUSH_DATAf(push, v[1]);
+ PUSH_DATAf(push, v[2]);
+ PUSH_DATAf(push, v[3]);
+ break;
+ case 3:
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(attr)), 3);
+ PUSH_DATAf(push, v[0]);
+ PUSH_DATAf(push, v[1]);
+ PUSH_DATAf(push, v[2]);
+ break;
+ case 2:
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(attr)), 2);
+ PUSH_DATAf(push, v[0]);
+ PUSH_DATAf(push, v[1]);
+ break;
+ case 1:
+ if (attr == nv50->vertprog->vp.edgeflag) {
+ BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);
+ PUSH_DATA (push, v[0] ? 1 : 0);
+ }
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_1F(attr)), 1);
+ PUSH_DATAf(push, v[0]);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static INLINE void
+nv50_user_vbuf_range(struct nv50_context *nv50, int vbi,
+ uint32_t *base, uint32_t *size)
+{
+ if (unlikely(nv50->vertex->instance_bufs & (1 << vbi))) {
+ /* TODO: use min and max instance divisor to get a proper range */
+ *base = 0;
+ *size = nv50->vtxbuf[vbi].buffer->width0;
+ } else {
+ /* NOTE: if there are user buffers, we *must* have index bounds */
+ assert(nv50->vb_elt_limit != ~0);
+ *base = nv50->vb_elt_first * nv50->vtxbuf[vbi].stride;
+ *size = nv50->vb_elt_limit * nv50->vtxbuf[vbi].stride +
+ nv50->vertex->vb_access_size[vbi];
+ }
+}
+
+static void
+nv50_upload_user_buffers(struct nv50_context *nv50,
+ uint64_t addrs[], uint32_t limits[])
+{
+ unsigned b;
+
+ for (b = 0; b < nv50->num_vtxbufs; ++b) {
+ struct nouveau_bo *bo;
+ const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b];
+ uint32_t base, size;
+
+ if (!(nv50->vbo_user & (1 << b)) || !vb->stride)
+ continue;
+ nv50_user_vbuf_range(nv50, b, &base, &size);
+
+ limits[b] = base + size - 1;
+ addrs[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, base, size,
+ &bo);
+ if (addrs[b])
+ BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD, bo);
+ }
+ nv50->base.vbo_dirty = TRUE;
+}
+
+static void
+nv50_update_user_vbufs(struct nv50_context *nv50)
+{
+ uint64_t address[PIPE_MAX_ATTRIBS];
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned i;
+ uint32_t written = 0;
+
+ for (i = 0; i < nv50->vertex->num_elements; ++i) {
+ struct pipe_vertex_element *ve = &nv50->vertex->element[i].pipe;
+ const unsigned b = ve->vertex_buffer_index;
+ struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b];
+ uint32_t base, size;
+
+ if (!(nv50->vbo_user & (1 << b)))
+ continue;
+
+ if (!vb->stride) {
+ nv50_emit_vtxattr(nv50, vb, ve, i);
+ continue;
+ }
+ nv50_user_vbuf_range(nv50, b, &base, &size);
+
+ if (!(written & (1 << b))) {
+ struct nouveau_bo *bo;
+ const uint32_t bo_flags = NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+ written |= 1 << b;
+ address[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer,
+ base, size, &bo);
+ if (address[b])
+ BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo);
+ }
+
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+ PUSH_DATAh(push, address[b] + base + size - 1);
+ PUSH_DATA (push, address[b] + base + size - 1);
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_START_HIGH(i)), 2);
+ PUSH_DATAh(push, address[b] + ve->src_offset);
+ PUSH_DATA (push, address[b] + ve->src_offset);
+ }
+ nv50->base.vbo_dirty = TRUE;
+}
+
+static INLINE void
+nv50_release_user_vbufs(struct nv50_context *nv50)
+{
+ if (nv50->vbo_user) {
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX_TMP);
+ nouveau_scratch_done(&nv50->base);
+ }
+}
+
+void
+nv50_vertex_arrays_validate(struct nv50_context *nv50)
+{
+ uint64_t addrs[PIPE_MAX_ATTRIBS];
+ uint32_t limits[PIPE_MAX_ATTRIBS];
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_vertex_stateobj *vertex = nv50->vertex;
+ struct pipe_vertex_buffer *vb;
+ struct nv50_vertex_element *ve;
+ uint32_t mask;
+ uint32_t refd = 0;
+ unsigned i;
+ const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);
+
+ if (unlikely(vertex->need_conversion))
+ nv50->vbo_fifo = ~0;
+ else
+ if (nv50->vbo_user & ~nv50->vbo_constant)
+ nv50->vbo_fifo = nv50->vbo_push_hint ? ~0 : 0;
+ else
+ nv50->vbo_fifo = 0;
+
+ if (!nv50->vbo_fifo) {
+ /* if vertex buffer was written by GPU - flush VBO cache */
+ for (i = 0; i < nv50->num_vtxbufs; ++i) {
+ struct nv04_resource *buf = nv04_resource(nv50->vtxbuf[i].buffer);
+ if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ nv50->base.vbo_dirty = TRUE;
+ break;
+ }
+ }
+ }
+
+ /* update vertex format state */
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_ATTRIB(0)), n);
+ if (nv50->vbo_fifo) {
+ nv50->state.num_vtxelts = vertex->num_elements;
+ for (i = 0; i < vertex->num_elements; ++i)
+ PUSH_DATA (push, vertex->element[i].state);
+ for (; i < n; ++i)
+ PUSH_DATA (push, NV50_3D_VERTEX_ATTRIB_INACTIVE);
+ for (i = 0; i < n; ++i) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ PUSH_DATA (push, 0);
+ }
+ return;
+ }
+ for (i = 0; i < vertex->num_elements; ++i) {
+ const unsigned b = vertex->element[i].pipe.vertex_buffer_index;
+ ve = &vertex->element[i];
+ vb = &nv50->vtxbuf[b];
+
+ if (likely(vb->stride) || !(nv50->vbo_user & (1 << b)))
+ PUSH_DATA(push, ve->state);
+ else
+ PUSH_DATA(push, ve->state | NV50_3D_VERTEX_ARRAY_ATTRIB_CONST);
+ }
+ for (; i < n; ++i)
+ PUSH_DATA(push, NV50_3D_VERTEX_ATTRIB_INACTIVE);
+
+ /* update per-instance enables */
+ mask = vertex->instance_elts ^ nv50->state.instance_elts;
+ while (mask) {
+ const int i = ffs(mask) - 1;
+ mask &= ~(1 << i);
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1);
+ PUSH_DATA (push, (vertex->instance_elts >> i) & 1);
+ }
+ nv50->state.instance_elts = vertex->instance_elts;
+
+ if (nv50->vbo_user & ~nv50->vbo_constant)
+ nv50_upload_user_buffers(nv50, addrs, limits);
+
+ /* update buffers and set constant attributes */
+ for (i = 0; i < vertex->num_elements; ++i) {
+ uint64_t address, limit;
+ const unsigned b = vertex->element[i].pipe.vertex_buffer_index;
+ ve = &vertex->element[i];
+ vb = &nv50->vtxbuf[b];
+
+ if (unlikely(nv50->vbo_constant & (1 << b))) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ PUSH_DATA (push, 0);
+ nv50_emit_vtxattr(nv50, vb, &ve->pipe, i);
+ continue;
+ } else
+ if (nv50->vbo_user & (1 << b)) {
+ address = addrs[b] + ve->pipe.src_offset;
+ limit = addrs[b] + limits[b];
+ } else {
+ struct nv04_resource *buf = nv04_resource(vb->buffer);
+ if (!(refd & (1 << b))) {
+ refd |= 1 << b;
+ BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD);
+ }
+ address = buf->address + vb->buffer_offset + ve->pipe.src_offset;
+ limit = buf->address + buf->base.width0 - 1;
+ }
+
+ if (unlikely(ve->pipe.instance_divisor)) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 4);
+ PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
+ PUSH_DATA (push, ve->pipe.instance_divisor);
+ } else {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 3);
+ PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
+ }
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+ PUSH_DATAh(push, limit);
+ PUSH_DATA (push, limit);
+ }
+ for (; i < nv50->state.num_vtxelts; ++i) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ PUSH_DATA (push, 0);
+ }
+ nv50->state.num_vtxelts = vertex->num_elements;
+}
+
+#define NV50_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nv50_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NV50_PRIM_GL_CASE(POINTS);
+ NV50_PRIM_GL_CASE(LINES);
+ NV50_PRIM_GL_CASE(LINE_LOOP);
+ NV50_PRIM_GL_CASE(LINE_STRIP);
+ NV50_PRIM_GL_CASE(TRIANGLES);
+ NV50_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NV50_PRIM_GL_CASE(TRIANGLE_FAN);
+ NV50_PRIM_GL_CASE(QUADS);
+ NV50_PRIM_GL_CASE(QUAD_STRIP);
+ NV50_PRIM_GL_CASE(POLYGON);
+ NV50_PRIM_GL_CASE(LINES_ADJACENCY);
+ NV50_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NV50_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NV50_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ default:
+ return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
+}
+
+/* For pre-nva0 transform feedback. */
+static const uint8_t nv50_pipe_prim_to_prim_size[PIPE_PRIM_MAX + 1] =
+{
+ [PIPE_PRIM_POINTS] = 1,
+ [PIPE_PRIM_LINES] = 2,
+ [PIPE_PRIM_LINE_LOOP] = 2,
+ [PIPE_PRIM_LINE_STRIP] = 2,
+ [PIPE_PRIM_TRIANGLES] = 3,
+ [PIPE_PRIM_TRIANGLE_STRIP] = 3,
+ [PIPE_PRIM_TRIANGLE_FAN] = 3,
+ [PIPE_PRIM_QUADS] = 3,
+ [PIPE_PRIM_QUAD_STRIP] = 3,
+ [PIPE_PRIM_POLYGON] = 3,
+ [PIPE_PRIM_LINES_ADJACENCY] = 2,
+ [PIPE_PRIM_LINE_STRIP_ADJACENCY] = 2,
+ [PIPE_PRIM_TRIANGLES_ADJACENCY] = 3,
+ [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = 3
+};
+
+static void
+nv50_draw_arrays(struct nv50_context *nv50,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned prim;
+
+ if (nv50->state.index_bias) {
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
+ PUSH_DATA (push, 0);
+ nv50->state.index_bias = 0;
+ }
+
+ prim = nv50_prim_gl(mode);
+
+ while (instance_count--) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, prim);
+ BEGIN_NV04(push, NV50_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, start);
+ PUSH_DATA (push, count);
+ BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
+ PUSH_DATA (push, 0);
+
+ prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+}
+
+static void
+nv50_draw_elements_inline_u08(struct nouveau_pushbuf *push, const uint8_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 3) {
+ unsigned i;
+ BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U32), count & 3);
+ for (i = 0; i < (count & 3); ++i)
+ PUSH_DATA(push, *map++);
+ count &= ~3;
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4;
+
+ BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U8), nr);
+ for (i = 0; i < nr; ++i) {
+ PUSH_DATA(push,
+ (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]);
+ map += 4;
+ }
+ count -= nr * 4;
+ }
+}
+
+static void
+nv50_draw_elements_inline_u16(struct nouveau_pushbuf *push, const uint16_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ count &= ~1;
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ PUSH_DATA(push, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
+}
+
+static void
+nv50_draw_elements_inline_u32(struct nouveau_pushbuf *push, const uint32_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ while (count) {
+ const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
+
+ BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U32), nr);
+ PUSH_DATAp(push, map, nr);
+
+ map += nr;
+ count -= nr;
+ }
+}
+
+static void
+nv50_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
+ const uint32_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ count--;
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ PUSH_DATA(push, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
+}
+
+static void
+nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count, int32_t index_bias)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned prim;
+ const unsigned index_size = nv50->idxbuf.index_size;
+
+ prim = nv50_prim_gl(mode);
+
+ if (index_bias != nv50->state.index_bias) {
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
+ PUSH_DATA (push, index_bias);
+ nv50->state.index_bias = index_bias;
+ }
+
+ if (nv50->idxbuf.buffer) {
+ struct nv04_resource *buf = nv04_resource(nv50->idxbuf.buffer);
+ unsigned pb_start;
+ unsigned pb_bytes;
+ const unsigned base = (buf->offset + nv50->idxbuf.offset) & ~3;
+
+ start += ((buf->offset + nv50->idxbuf.offset) & 3) >> (index_size >> 1);
+
+ assert(nouveau_resource_mapped_by_gpu(nv50->idxbuf.buffer));
+
+ while (instance_count--) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, prim);
+
+ nouveau_pushbuf_space(push, 8, 0, 1);
+
+ switch (index_size) {
+ case 4:
+ BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U32), count);
+ nouveau_pushbuf_data(push, buf->bo, base + start * 4, count * 4);
+ break;
+ case 2:
+ pb_start = (start & ~1) * 2;
+ pb_bytes = ((start + count + 1) & ~1) * 2 - pb_start;
+
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U16_SETUP), 1);
+ PUSH_DATA (push, (start << 31) | count);
+ BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U16), pb_bytes / 4);
+ nouveau_pushbuf_data(push, buf->bo, base + pb_start, pb_bytes);
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U16_SETUP), 1);
+ PUSH_DATA (push, 0);
+ break;
+ default:
+ assert(index_size == 1);
+ pb_start = start & ~3;
+ pb_bytes = ((start + count + 3) & ~3) - pb_start;
+
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U8_SETUP), 1);
+ PUSH_DATA (push, (start << 30) | count);
+ BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U8), pb_bytes / 4);
+ nouveau_pushbuf_data(push, buf->bo, base + pb_start, pb_bytes);
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U8_SETUP), 1);
+ PUSH_DATA (push, 0);
+ break;
+ }
+ BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
+ PUSH_DATA (push, 0);
+
+ prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ } else {
+ const void *data = nv50->idxbuf.user_buffer;
+
+ while (instance_count--) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, prim);
+ switch (index_size) {
+ case 1:
+ nv50_draw_elements_inline_u08(push, data, start, count);
+ break;
+ case 2:
+ nv50_draw_elements_inline_u16(push, data, start, count);
+ break;
+ case 4:
+ if (shorten)
+ nv50_draw_elements_inline_u32_short(push, data, start, count);
+ else
+ nv50_draw_elements_inline_u32(push, data, start, count);
+ break;
+ default:
+ assert(0);
+ return;
+ }
+ BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
+ PUSH_DATA (push, 0);
+
+ prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ }
+}
+
+static void
+nva0_draw_stream_output(struct nv50_context *nv50,
+ const struct pipe_draw_info *info)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_so_target *so = nv50_so_target(info->count_from_stream_output);
+ struct nv04_resource *res = nv04_resource(so->pipe.buffer);
+ unsigned num_instances = info->instance_count;
+ unsigned mode = nv50_prim_gl(info->mode);
+
+ if (unlikely(nv50->screen->base.class_3d < NVA0_3D_CLASS)) {
+ /* A proper implementation without waiting doesn't seem possible,
+ * so don't bother.
+ */
+ NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
+ return;
+ }
+
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ PUSH_SPACE(push, 4);
+ BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ assert(num_instances);
+ do {
+ PUSH_SPACE(push, 8);
+ BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, mode);
+ BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
+ nv50_query_pushbuf_submit(push, so->pq, 0x4);
+ BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
+ PUSH_DATA (push, 0);
+
+ mode |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ } while (--num_instances);
+}
+
+static void
+nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan)
+{
+ struct nv50_screen *screen = chan->user_priv;
+
+ nouveau_fence_update(&screen->base, TRUE);
+
+ nv50_bufctx_fence(screen->cur_ctx->bufctx_3d, TRUE);
+}
+
+void
+nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
+ nv50->vb_elt_first = info->min_index + info->index_bias;
+ nv50->vb_elt_limit = info->max_index - info->min_index;
+ nv50->instance_off = info->start_instance;
+ nv50->instance_max = info->instance_count - 1;
+
+ /* For picking only a few vertices from a large user buffer, push is better,
+ * if index count is larger and we expect repeated vertices, suggest upload.
+ */
+ nv50->vbo_push_hint = /* the 64 is heuristic */
+ !(info->indexed && ((nv50->vb_elt_limit + 64) < info->count));
+
+ if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_ARRAYS | NV50_NEW_VERTEX))) {
+ if (!!nv50->vbo_fifo != nv50->vbo_push_hint)
+ nv50->dirty |= NV50_NEW_ARRAYS;
+ else
+ if (!nv50->vbo_fifo)
+ nv50_update_user_vbufs(nv50);
+ }
+
+ if (unlikely(nv50->num_so_targets && !nv50->gmtyprog))
+ nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode];
+
+ nv50_state_validate(nv50, ~0, 8); /* 8 as minimum, we use flush_notify */
+
+ push->kick_notify = nv50_draw_vbo_kick_notify;
+
+ if (nv50->vbo_fifo) {
+ nv50_push_vbo(nv50, info);
+ push->kick_notify = nv50_default_kick_notify;
+ nouveau_pushbuf_bufctx(push, NULL);
+ return;
+ }
+
+ if (nv50->state.instance_base != info->start_instance) {
+ nv50->state.instance_base = info->start_instance;
+ /* NOTE: this does not affect the shader input, should it ? */
+ BEGIN_NV04(push, NV50_3D(VB_INSTANCE_BASE), 1);
+ PUSH_DATA (push, info->start_instance);
+ }
+
+ if (nv50->base.vbo_dirty) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
+ PUSH_DATA (push, 0);
+ nv50->base.vbo_dirty = FALSE;
+ }
+
+ if (info->indexed) {
+ boolean shorten = info->max_index <= 65535;
+
+ if (info->primitive_restart != nv50->state.prim_restart) {
+ if (info->primitive_restart) {
+ BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 2);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ } else {
+ BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
+ nv50->state.prim_restart = info->primitive_restart;
+ } else
+ if (info->primitive_restart) {
+ BEGIN_NV04(push, NV50_3D(PRIM_RESTART_INDEX), 1);
+ PUSH_DATA (push, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ }
+
+ nv50_draw_elements(nv50, shorten,
+ info->mode, info->start, info->count,
+ info->instance_count, info->index_bias);
+ } else
+ if (unlikely(info->count_from_stream_output)) {
+ nva0_draw_stream_output(nv50, info);
+ } else {
+ nv50_draw_arrays(nv50,
+ info->mode, info->start, info->count,
+ info->instance_count);
+ }
+ push->kick_notify = nv50_default_kick_notify;
+
+ nv50_release_user_vbufs(nv50);
+
+ nouveau_pushbuf_bufctx(push, NULL);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
new file mode 100644
index 00000000000..e8578c8be6f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
@@ -0,0 +1,125 @@
+
+#ifndef __NV50_WINSYS_H__
+#define __NV50_WINSYS_H__
+
+#include <stdint.h>
+#include <unistd.h>
+
+#include "pipe/p_defines.h"
+
+#include "nouveau_winsys.h"
+#include "nouveau_buffer.h"
+
+
+#ifndef NV04_PFIFO_MAX_PACKET_LEN
+#define NV04_PFIFO_MAX_PACKET_LEN 2047
+#endif
+
+
+static INLINE void
+nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin,
+ unsigned flags, struct nouveau_bo *bo)
+{
+ nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL;
+}
+
+static INLINE void
+nv50_add_bufctx_resident(struct nouveau_bufctx *bufctx, int bin,
+ struct nv04_resource *res, unsigned flags)
+{
+ struct nouveau_bufref *ref =
+ nouveau_bufctx_refn(bufctx, bin, res->bo, flags | res->domain);
+ ref->priv = res;
+ ref->priv_data = flags;
+}
+
+#define BCTX_REFN_bo(ctx, bin, fl, bo) \
+ nv50_add_bufctx_resident_bo(ctx, NV50_BIND_##bin, fl, bo);
+
+#define BCTX_REFN(bctx, bin, res, acc) \
+ nv50_add_bufctx_resident(bctx, NV50_BIND_##bin, res, NOUVEAU_BO_##acc)
+
+static INLINE void
+PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
+{
+ struct nouveau_pushbuf_refn ref = { bo, flags };
+ nouveau_pushbuf_refn(push, &ref, 1);
+}
+
+
+#define SUBC_3D(m) 3, (m)
+#define NV50_3D(n) SUBC_3D(NV50_3D_##n)
+#define NVA0_3D(n) SUBC_3D(NVA0_3D_##n)
+
+#define SUBC_2D(m) 4, (m)
+#define NV50_2D(n) SUBC_2D(NV50_2D_##n)
+
+#define SUBC_M2MF(m) 5, (m)
+#define NV50_M2MF(n) SUBC_M2MF(NV50_M2MF_##n)
+
+#define SUBC_COMPUTE(m) 6, (m)
+#define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n)
+
+
+static INLINE uint32_t
+NV50_FIFO_PKHDR(int subc, int mthd, unsigned size)
+{
+ return 0x00000000 | (size << 18) | (subc << 13) | mthd;
+}
+
+static INLINE uint32_t
+NV50_FIFO_PKHDR_NI(int subc, int mthd, unsigned size)
+{
+ return 0x40000000 | (size << 18) | (subc << 13) | mthd;
+}
+
+static INLINE uint32_t
+NV50_FIFO_PKHDR_L(int subc, int mthd)
+{
+ return 0x00030000 | (subc << 13) | mthd;
+}
+
+
+static INLINE uint32_t
+nouveau_bo_memtype(const struct nouveau_bo *bo)
+{
+ return bo->config.nv50.memtype;
+}
+
+
+static INLINE void
+PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data)
+{
+ *push->cur++ = (uint32_t)(data >> 32);
+}
+
+static INLINE void
+BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
+{
+#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, size + 1);
+#endif
+ PUSH_DATA (push, NV50_FIFO_PKHDR(subc, mthd, size));
+}
+
+static INLINE void
+BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
+{
+#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, size + 1);
+#endif
+ PUSH_DATA (push, NV50_FIFO_PKHDR_NI(subc, mthd, size));
+}
+
+/* long, non-incremental, nv50-only */
+static INLINE void
+BEGIN_NL50(struct nouveau_pushbuf *push, int subc, int mthd, uint32_t size)
+{
+#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, 2);
+#endif
+ PUSH_DATA (push, NV50_FIFO_PKHDR_L(subc, mthd));
+ PUSH_DATA (push, size);
+}
+
+#endif /* __NV50_WINSYS_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video.c b/src/gallium/drivers/nouveau/nv50/nv84_video.c
new file mode 100644
index 00000000000..3fee6d95f66
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video.c
@@ -0,0 +1,797 @@
+/*
+ * Copyright 2013 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include "util/u_format.h"
+#include "util/u_sampler.h"
+#include "vl/vl_zscan.h"
+
+#include "nv50/nv84_video.h"
+
+static int
+nv84_copy_firmware(const char *path, void *dest, ssize_t len)
+{
+ int fd = open(path, O_RDONLY | O_CLOEXEC);
+ ssize_t r;
+ if (fd < 0) {
+ fprintf(stderr, "opening firmware file %s failed: %m\n", path);
+ return 1;
+ }
+ r = read(fd, dest, len);
+ close(fd);
+
+ if (r != len) {
+ fprintf(stderr, "reading firwmare file %s failed: %m\n", path);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+filesize(const char *path)
+{
+ int ret;
+ struct stat statbuf;
+
+ ret = stat(path, &statbuf);
+ if (ret)
+ return ret;
+ return statbuf.st_size;
+}
+
+static struct nouveau_bo *
+nv84_load_firmwares(struct nouveau_device *dev, struct nv84_decoder *dec,
+ const char *fw1, const char *fw2)
+{
+ int ret, size1, size2 = 0;
+ struct nouveau_bo *fw;
+
+ size1 = filesize(fw1);
+ if (fw2)
+ size2 = filesize(fw2);
+ if (size1 < 0 || size2 < 0)
+ return NULL;
+
+ dec->vp_fw2_offset = align(size1, 0x100);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, dec->vp_fw2_offset + size2, NULL, &fw);
+ if (ret)
+ return NULL;
+ ret = nouveau_bo_map(fw, NOUVEAU_BO_WR, dec->client);
+ if (ret)
+ goto error;
+
+ ret = nv84_copy_firmware(fw1, fw->map, size1);
+ if (fw2 && !ret)
+ ret = nv84_copy_firmware(fw2, fw->map + dec->vp_fw2_offset, size2);
+ munmap(fw->map, fw->size);
+ fw->map = NULL;
+ if (!ret)
+ return fw;
+error:
+ nouveau_bo_ref(NULL, &fw);
+ return NULL;
+}
+
+static struct nouveau_bo *
+nv84_load_bsp_firmware(struct nouveau_device *dev, struct nv84_decoder *dec)
+{
+ return nv84_load_firmwares(
+ dev, dec, "/lib/firmware/nouveau/nv84_bsp-h264", NULL);
+}
+
+static struct nouveau_bo *
+nv84_load_vp_firmware(struct nouveau_device *dev, struct nv84_decoder *dec)
+{
+ return nv84_load_firmwares(
+ dev, dec,
+ "/lib/firmware/nouveau/nv84_vp-h264-1",
+ "/lib/firmware/nouveau/nv84_vp-h264-2");
+}
+
+static struct nouveau_bo *
+nv84_load_vp_firmware_mpeg(struct nouveau_device *dev, struct nv84_decoder *dec)
+{
+ return nv84_load_firmwares(
+ dev, dec, "/lib/firmware/nouveau/nv84_vp-mpeg12", NULL);
+}
+
+static void
+nv84_decoder_decode_bitstream_h264(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *video_target,
+ struct pipe_picture_desc *picture,
+ unsigned num_buffers,
+ const void *const *data,
+ const unsigned *num_bytes)
+{
+ struct nv84_decoder *dec = (struct nv84_decoder *)decoder;
+ struct nv84_video_buffer *target = (struct nv84_video_buffer *)video_target;
+
+ struct pipe_h264_picture_desc *desc = (struct pipe_h264_picture_desc *)picture;
+
+ assert(target->base.buffer_format == PIPE_FORMAT_NV12);
+
+ nv84_decoder_bsp(dec, desc, num_buffers, data, num_bytes, target);
+ nv84_decoder_vp_h264(dec, desc, target);
+}
+
+static void
+nv84_decoder_flush(struct pipe_video_codec *decoder)
+{
+}
+
+static void
+nv84_decoder_begin_frame_h264(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+}
+
+static void
+nv84_decoder_end_frame_h264(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+}
+
+static void
+nv84_decoder_decode_bitstream_mpeg12(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *video_target,
+ struct pipe_picture_desc *picture,
+ unsigned num_buffers,
+ const void *const *data,
+ const unsigned *num_bytes)
+{
+ struct nv84_decoder *dec = (struct nv84_decoder *)decoder;
+
+ assert(video_target->buffer_format == PIPE_FORMAT_NV12);
+
+ vl_mpg12_bs_decode(dec->mpeg12_bs,
+ video_target,
+ (struct pipe_mpeg12_picture_desc *)picture,
+ num_buffers,
+ data,
+ num_bytes);
+}
+
+static void
+nv84_decoder_begin_frame_mpeg12(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+ struct nv84_decoder *dec = (struct nv84_decoder *)decoder;
+ struct pipe_mpeg12_picture_desc *desc = (struct pipe_mpeg12_picture_desc *)picture;
+ int i;
+
+ nouveau_bo_wait(dec->mpeg12_bo, NOUVEAU_BO_RDWR, dec->client);
+ dec->mpeg12_mb_info = dec->mpeg12_bo->map + 0x100;
+ dec->mpeg12_data = dec->mpeg12_bo->map + 0x100 +
+ align(0x20 * mb(dec->base.width) * mb(dec->base.height), 0x100);
+ if (desc->intra_matrix) {
+ dec->zscan = desc->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
+ for (i = 0; i < 64; i++) {
+ dec->mpeg12_intra_matrix[i] = desc->intra_matrix[dec->zscan[i]];
+ dec->mpeg12_non_intra_matrix[i] = desc->non_intra_matrix[dec->zscan[i]];
+ }
+ dec->mpeg12_intra_matrix[0] = 1 << (7 - desc->intra_dc_precision);
+ }
+}
+
+static void
+nv84_decoder_end_frame_mpeg12(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+ nv84_decoder_vp_mpeg12(
+ (struct nv84_decoder *)decoder,
+ (struct pipe_mpeg12_picture_desc *)picture,
+ (struct nv84_video_buffer *)target);
+}
+
+static void
+nv84_decoder_decode_macroblock(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture,
+ const struct pipe_macroblock *macroblocks,
+ unsigned num_macroblocks)
+{
+ const struct pipe_mpeg12_macroblock *mb = (const struct pipe_mpeg12_macroblock *)macroblocks;
+ for (int i = 0; i < num_macroblocks; i++, mb++) {
+ nv84_decoder_vp_mpeg12_mb(
+ (struct nv84_decoder *)decoder,
+ (struct pipe_mpeg12_picture_desc *)picture,
+ mb);
+ }
+}
+
+static void
+nv84_decoder_destroy(struct pipe_video_codec *decoder)
+{
+ struct nv84_decoder *dec = (struct nv84_decoder *)decoder;
+
+ nouveau_bo_ref(NULL, &dec->bsp_fw);
+ nouveau_bo_ref(NULL, &dec->bsp_data);
+ nouveau_bo_ref(NULL, &dec->vp_fw);
+ nouveau_bo_ref(NULL, &dec->vp_data);
+ nouveau_bo_ref(NULL, &dec->mbring);
+ nouveau_bo_ref(NULL, &dec->vpring);
+ nouveau_bo_ref(NULL, &dec->bitstream);
+ nouveau_bo_ref(NULL, &dec->vp_params);
+ nouveau_bo_ref(NULL, &dec->fence);
+
+ nouveau_object_del(&dec->bsp);
+ nouveau_object_del(&dec->vp);
+
+ nouveau_bufctx_del(&dec->bsp_bufctx);
+ nouveau_pushbuf_del(&dec->bsp_pushbuf);
+ nouveau_object_del(&dec->bsp_channel);
+
+ nouveau_bufctx_del(&dec->vp_bufctx);
+ nouveau_pushbuf_del(&dec->vp_pushbuf);
+ nouveau_object_del(&dec->vp_channel);
+
+ nouveau_client_del(&dec->client);
+
+ if (dec->mpeg12_bs)
+ FREE(dec->mpeg12_bs);
+ FREE(dec);
+}
+
+struct pipe_video_codec *
+nv84_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)context;
+ struct nouveau_screen *screen = &nv50->screen->base;
+ struct nv84_decoder *dec;
+ struct nouveau_pushbuf *bsp_push, *vp_push;
+ struct nv50_surface surf;
+ struct nv50_miptree mip;
+ union pipe_color_union color;
+ struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 };
+ int ret, i;
+ int is_h264 = u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC;
+ int is_mpeg12 = u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_MPEG12;
+
+ if (getenv("XVMC_VL"))
+ return vl_create_decoder(context, templ);
+
+ if ((is_h264 && templ->entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) ||
+ (is_mpeg12 && templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_IDCT)) {
+ debug_printf("%x\n", templ->entrypoint);
+ return NULL;
+ }
+
+ if (!is_h264 && !is_mpeg12) {
+ debug_printf("invalid profile: %x\n", templ->profile);
+ return NULL;
+ }
+
+ dec = CALLOC_STRUCT(nv84_decoder);
+ if (!dec)
+ return NULL;
+
+ dec->base = *templ;
+ dec->base.context = context;
+ dec->base.destroy = nv84_decoder_destroy;
+ dec->base.flush = nv84_decoder_flush;
+ if (is_h264) {
+ dec->base.decode_bitstream = nv84_decoder_decode_bitstream_h264;
+ dec->base.begin_frame = nv84_decoder_begin_frame_h264;
+ dec->base.end_frame = nv84_decoder_end_frame_h264;
+
+ dec->frame_mbs = mb(dec->base.width) * mb_half(dec->base.height) * 2;
+ dec->frame_size = dec->frame_mbs << 8;
+ dec->vpring_deblock = align(0x30 * dec->frame_mbs, 0x100);
+ dec->vpring_residual = 0x2000 + MAX2(0x32000, 0x600 * dec->frame_mbs);
+ dec->vpring_ctrl = MAX2(0x10000, align(0x1080 + 0x144 * dec->frame_mbs, 0x100));
+ } else if (is_mpeg12) {
+ dec->base.decode_macroblock = nv84_decoder_decode_macroblock;
+ dec->base.begin_frame = nv84_decoder_begin_frame_mpeg12;
+ dec->base.end_frame = nv84_decoder_end_frame_mpeg12;
+
+ if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+ dec->mpeg12_bs = CALLOC_STRUCT(vl_mpg12_bs);
+ if (!dec->mpeg12_bs)
+ goto fail;
+ vl_mpg12_bs_init(dec->mpeg12_bs, &dec->base);
+ dec->base.decode_bitstream = nv84_decoder_decode_bitstream_mpeg12;
+ }
+ } else {
+ goto fail;
+ }
+
+ ret = nouveau_client_new(screen->device, &dec->client);
+ if (ret)
+ goto fail;
+
+ if (is_h264) {
+ ret = nouveau_object_new(&screen->device->object, 0,
+ NOUVEAU_FIFO_CHANNEL_CLASS,
+ &nv04_data, sizeof(nv04_data), &dec->bsp_channel);
+ if (ret)
+ goto fail;
+
+ ret = nouveau_pushbuf_new(dec->client, dec->bsp_channel, 4,
+ 32 * 1024, true, &dec->bsp_pushbuf);
+ if (ret)
+ goto fail;
+
+ ret = nouveau_bufctx_new(dec->client, 1, &dec->bsp_bufctx);
+ if (ret)
+ goto fail;
+ }
+
+ ret = nouveau_object_new(&screen->device->object, 0,
+ NOUVEAU_FIFO_CHANNEL_CLASS,
+ &nv04_data, sizeof(nv04_data), &dec->vp_channel);
+ if (ret)
+ goto fail;
+ ret = nouveau_pushbuf_new(dec->client, dec->vp_channel, 4,
+ 32 * 1024, true, &dec->vp_pushbuf);
+ if (ret)
+ goto fail;
+
+ ret = nouveau_bufctx_new(dec->client, 1, &dec->vp_bufctx);
+ if (ret)
+ goto fail;
+
+ bsp_push = dec->bsp_pushbuf;
+ vp_push = dec->vp_pushbuf;
+
+ if (is_h264) {
+ dec->bsp_fw = nv84_load_bsp_firmware(screen->device, dec);
+ dec->vp_fw = nv84_load_vp_firmware(screen->device, dec);
+ if (!dec->bsp_fw || !dec->vp_fw)
+ goto fail;
+ }
+ if (is_mpeg12) {
+ dec->vp_fw = nv84_load_vp_firmware_mpeg(screen->device, dec);
+ if (!dec->vp_fw)
+ goto fail;
+ }
+
+ if (is_h264) {
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP,
+ 0, 0x40000, NULL, &dec->bsp_data);
+ if (ret)
+ goto fail;
+ }
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP,
+ 0, 0x40000, NULL, &dec->vp_data);
+ if (ret)
+ goto fail;
+ if (is_h264) {
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP,
+ 0,
+ 2 * (dec->vpring_deblock +
+ dec->vpring_residual +
+ dec->vpring_ctrl +
+ 0x1000),
+ NULL, &dec->vpring);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP,
+ 0,
+ (templ->max_references + 1) * dec->frame_mbs * 0x40 +
+ dec->frame_size + 0x2000,
+ NULL, &dec->mbring);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART,
+ 0, 2 * (0x700 + MAX2(0x40000, 0x800 + 0x180 * dec->frame_mbs)),
+ NULL, &dec->bitstream);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_map(dec->bitstream, NOUVEAU_BO_WR, dec->client);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART,
+ 0, 0x2000, NULL, &dec->vp_params);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_map(dec->vp_params, NOUVEAU_BO_WR, dec->client);
+ if (ret)
+ goto fail;
+ }
+ if (is_mpeg12) {
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART,
+ 0,
+ align(0x20 * mb(templ->width) * mb(templ->height), 0x100) +
+ (6 * 64 * 8) * mb(templ->width) * mb(templ->height) + 0x100,
+ NULL, &dec->mpeg12_bo);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_map(dec->mpeg12_bo, NOUVEAU_BO_WR, dec->client);
+ if (ret)
+ goto fail;
+ }
+
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+ 0, 0x1000, NULL, &dec->fence);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_map(dec->fence, NOUVEAU_BO_WR, dec->client);
+ if (ret)
+ goto fail;
+ *(uint32_t *)dec->fence->map = 0;
+
+ if (is_h264) {
+ nouveau_pushbuf_bufctx(bsp_push, dec->bsp_bufctx);
+ nouveau_bufctx_refn(dec->bsp_bufctx, 0,
+ dec->bsp_fw, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(dec->bsp_bufctx, 0,
+ dec->bsp_data, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ }
+
+ nouveau_pushbuf_bufctx(vp_push, dec->vp_bufctx);
+ nouveau_bufctx_refn(dec->vp_bufctx, 0, dec->vp_fw,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(dec->vp_bufctx, 0, dec->vp_data,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+
+ if (is_h264 && !ret)
+ ret = nouveau_object_new(dec->bsp_channel, 0xbeef74b0, 0x74b0,
+ NULL, 0, &dec->bsp);
+
+ if (!ret)
+ ret = nouveau_object_new(dec->vp_channel, 0xbeef7476, 0x7476,
+ NULL, 0, &dec->vp);
+
+ if (ret)
+ goto fail;
+
+
+ if (is_h264) {
+ /* Zero out some parts of mbring/vpring. there's gotta be some cleaner way
+ * of doing this... perhaps makes sense to just copy the relevant logic
+ * here. */
+ color.f[0] = color.f[1] = color.f[2] = color.f[3] = 0;
+ surf.offset = dec->frame_size;
+ surf.width = 64;
+ surf.height = (templ->max_references + 1) * dec->frame_mbs / 4;
+ surf.depth = 1;
+ surf.base.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ surf.base.u.tex.level = 0;
+ surf.base.texture = &mip.base.base;
+ mip.level[0].tile_mode = 0;
+ mip.level[0].pitch = surf.width * 4;
+ mip.base.domain = NOUVEAU_BO_VRAM;
+ mip.base.bo = dec->mbring;
+ context->clear_render_target(context, &surf.base, &color, 0, 0, 64, 4760);
+ surf.offset = dec->vpring->size / 2 - 0x1000;
+ surf.width = 1024;
+ surf.height = 1;
+ mip.level[0].pitch = surf.width * 4;
+ mip.base.bo = dec->vpring;
+ context->clear_render_target(context, &surf.base, &color, 0, 0, 1024, 1);
+ surf.offset = dec->vpring->size - 0x1000;
+ context->clear_render_target(context, &surf.base, &color, 0, 0, 1024, 1);
+
+ PUSH_SPACE(screen->pushbuf, 5);
+ PUSH_REFN(screen->pushbuf, dec->fence, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ /* The clear_render_target is done via 3D engine, so use it to write to a
+ * sempahore to indicate that it's done.
+ */
+ BEGIN_NV04(screen->pushbuf, NV50_3D(QUERY_ADDRESS_HIGH), 4);
+ PUSH_DATAh(screen->pushbuf, dec->fence->offset);
+ PUSH_DATA (screen->pushbuf, dec->fence->offset);
+ PUSH_DATA (screen->pushbuf, 1);
+ PUSH_DATA (screen->pushbuf, 0xf010);
+ PUSH_KICK (screen->pushbuf);
+
+ PUSH_SPACE(bsp_push, 2 + 12 + 2 + 4 + 3);
+
+ BEGIN_NV04(bsp_push, SUBC_BSP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (bsp_push, dec->bsp->handle);
+
+ BEGIN_NV04(bsp_push, SUBC_BSP(0x180), 11);
+ for (i = 0; i < 11; i++)
+ PUSH_DATA(bsp_push, nv04_data.vram);
+ BEGIN_NV04(bsp_push, SUBC_BSP(0x1b8), 1);
+ PUSH_DATA (bsp_push, nv04_data.vram);
+
+ BEGIN_NV04(bsp_push, SUBC_BSP(0x600), 3);
+ PUSH_DATAh(bsp_push, dec->bsp_fw->offset);
+ PUSH_DATA (bsp_push, dec->bsp_fw->offset);
+ PUSH_DATA (bsp_push, dec->bsp_fw->size);
+
+ BEGIN_NV04(bsp_push, SUBC_BSP(0x628), 2);
+ PUSH_DATA (bsp_push, dec->bsp_data->offset >> 8);
+ PUSH_DATA (bsp_push, dec->bsp_data->size);
+ PUSH_KICK (bsp_push);
+ }
+
+ PUSH_SPACE(vp_push, 2 + 12 + 2 + 4 + 3);
+
+ BEGIN_NV04(vp_push, SUBC_VP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (vp_push, dec->vp->handle);
+
+ BEGIN_NV04(vp_push, SUBC_VP(0x180), 11);
+ for (i = 0; i < 11; i++)
+ PUSH_DATA(vp_push, nv04_data.vram);
+
+ BEGIN_NV04(vp_push, SUBC_VP(0x1b8), 1);
+ PUSH_DATA (vp_push, nv04_data.vram);
+
+ BEGIN_NV04(vp_push, SUBC_VP(0x600), 3);
+ PUSH_DATAh(vp_push, dec->vp_fw->offset);
+ PUSH_DATA (vp_push, dec->vp_fw->offset);
+ PUSH_DATA (vp_push, dec->vp_fw->size);
+
+ BEGIN_NV04(vp_push, SUBC_VP(0x628), 2);
+ PUSH_DATA (vp_push, dec->vp_data->offset >> 8);
+ PUSH_DATA (vp_push, dec->vp_data->size);
+ PUSH_KICK (vp_push);
+
+ return &dec->base;
+fail:
+ nv84_decoder_destroy(&dec->base);
+ return NULL;
+}
+
+static struct pipe_sampler_view **
+nv84_video_buffer_sampler_view_planes(struct pipe_video_buffer *buffer)
+{
+ struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer;
+ return buf->sampler_view_planes;
+}
+
+static struct pipe_sampler_view **
+nv84_video_buffer_sampler_view_components(struct pipe_video_buffer *buffer)
+{
+ struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer;
+ return buf->sampler_view_components;
+}
+
+static struct pipe_surface **
+nv84_video_buffer_surfaces(struct pipe_video_buffer *buffer)
+{
+ struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer;
+ return buf->surfaces;
+}
+
+static void
+nv84_video_buffer_destroy(struct pipe_video_buffer *buffer)
+{
+ struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer;
+ unsigned i;
+
+ assert(buf);
+
+ for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
+ pipe_resource_reference(&buf->resources[i], NULL);
+ pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL);
+ pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL);
+ pipe_surface_reference(&buf->surfaces[i * 2], NULL);
+ pipe_surface_reference(&buf->surfaces[i * 2 + 1], NULL);
+ }
+
+ nouveau_bo_ref(NULL, &buf->interlaced);
+ nouveau_bo_ref(NULL, &buf->full);
+
+ FREE(buffer);
+}
+
+struct pipe_video_buffer *
+nv84_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *template)
+{
+ struct nv84_video_buffer *buffer;
+ struct pipe_resource templ;
+ unsigned i, j, component;
+ struct pipe_sampler_view sv_templ;
+ struct pipe_surface surf_templ;
+ struct nv50_miptree *mt0, *mt1;
+ struct nouveau_bo *empty = NULL;
+ struct nouveau_screen *screen = &((struct nv50_context *)pipe)->screen->base;
+ union nouveau_bo_config cfg;
+ unsigned bo_size;
+
+ if (getenv("XVMC_VL") || template->buffer_format != PIPE_FORMAT_NV12)
+ return vl_video_buffer_create(pipe, template);
+
+ if (!template->interlaced) {
+ debug_printf("Require interlaced video buffers\n");
+ return NULL;
+ }
+ if (template->chroma_format != PIPE_VIDEO_CHROMA_FORMAT_420) {
+ debug_printf("Must use 4:2:0 format\n");
+ return NULL;
+ }
+
+ /*
+ * Note that there are always going to be exactly two planes, one for Y,
+ * and one for UV. These are also the resources. VP expects these to be
+ * adjacent, so they need to belong to the same BO.
+ */
+
+ buffer = CALLOC_STRUCT(nv84_video_buffer);
+ if (!buffer) return NULL;
+
+ buffer->mvidx = -1;
+
+ buffer->base.buffer_format = template->buffer_format;
+ buffer->base.context = pipe;
+ buffer->base.destroy = nv84_video_buffer_destroy;
+ buffer->base.chroma_format = template->chroma_format;
+ buffer->base.width = template->width;
+ buffer->base.height = template->height;
+ buffer->base.get_sampler_view_planes = nv84_video_buffer_sampler_view_planes;
+ buffer->base.get_sampler_view_components = nv84_video_buffer_sampler_view_components;
+ buffer->base.get_surfaces = nv84_video_buffer_surfaces;
+ buffer->base.interlaced = true;
+
+ memset(&templ, 0, sizeof(templ));
+ templ.target = PIPE_TEXTURE_2D_ARRAY;
+ templ.depth0 = 1;
+ templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+ templ.format = PIPE_FORMAT_R8_UNORM;
+ templ.width0 = align(template->width, 2);
+ templ.height0 = align(template->height, 4) / 2;
+ templ.flags = NV50_RESOURCE_FLAG_VIDEO | NV50_RESOURCE_FLAG_NOALLOC;
+ templ.array_size = 2;
+
+ cfg.nv50.tile_mode = 0x20;
+ cfg.nv50.memtype = 0x70;
+
+ buffer->resources[0] = pipe->screen->resource_create(pipe->screen, &templ);
+ if (!buffer->resources[0])
+ goto error;
+
+ templ.format = PIPE_FORMAT_R8G8_UNORM;
+ templ.width0 /= 2;
+ templ.height0 /= 2;
+ buffer->resources[1] = pipe->screen->resource_create(pipe->screen, &templ);
+ if (!buffer->resources[1])
+ goto error;
+
+ mt0 = nv50_miptree(buffer->resources[0]);
+ mt1 = nv50_miptree(buffer->resources[1]);
+
+ bo_size = mt0->total_size + mt1->total_size;
+ if (nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, 0,
+ bo_size, &cfg, &buffer->interlaced))
+ goto error;
+ /* XXX Change reference frame management so that this is only allocated in
+ * the decoder when necessary. */
+ if (nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, 0,
+ bo_size, &cfg, &buffer->full))
+ goto error;
+
+ mt0->base.bo = buffer->interlaced;
+ mt0->base.domain = NOUVEAU_BO_VRAM;
+ mt0->base.offset = 0;
+ mt0->base.address = buffer->interlaced->offset + mt0->base.offset;
+ nouveau_bo_ref(buffer->interlaced, &empty);
+
+ mt1->base.bo = buffer->interlaced;
+ mt1->base.domain = NOUVEAU_BO_VRAM;
+ mt1->base.offset = mt0->layer_stride * 2;
+ mt1->base.address = buffer->interlaced->offset + mt1->base.offset;
+ nouveau_bo_ref(buffer->interlaced, &empty);
+
+ memset(&sv_templ, 0, sizeof(sv_templ));
+ for (component = 0, i = 0; i < 2; ++i ) {
+ struct pipe_resource *res = buffer->resources[i];
+ unsigned nr_components = util_format_get_nr_components(res->format);
+
+ u_sampler_view_default_template(&sv_templ, res, res->format);
+ buffer->sampler_view_planes[i] =
+ pipe->create_sampler_view(pipe, res, &sv_templ);
+ if (!buffer->sampler_view_planes[i])
+ goto error;
+
+ for (j = 0; j < nr_components; ++j, ++component) {
+ sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b =
+ PIPE_SWIZZLE_RED + j;
+ sv_templ.swizzle_a = PIPE_SWIZZLE_ONE;
+
+ buffer->sampler_view_components[component] =
+ pipe->create_sampler_view(pipe, res, &sv_templ);
+ if (!buffer->sampler_view_components[component])
+ goto error;
+ }
+ }
+
+ memset(&surf_templ, 0, sizeof(surf_templ));
+ for (j = 0; j < 2; ++j) {
+ surf_templ.format = buffer->resources[j]->format;
+ surf_templ.u.tex.first_layer = surf_templ.u.tex.last_layer = 0;
+ buffer->surfaces[j * 2] =
+ pipe->create_surface(pipe, buffer->resources[j], &surf_templ);
+ if (!buffer->surfaces[j * 2])
+ goto error;
+
+ surf_templ.u.tex.first_layer = surf_templ.u.tex.last_layer = 1;
+ buffer->surfaces[j * 2 + 1] =
+ pipe->create_surface(pipe, buffer->resources[j], &surf_templ);
+ if (!buffer->surfaces[j * 2 + 1])
+ goto error;
+ }
+
+ return &buffer->base;
+
+error:
+ nv84_video_buffer_destroy(&buffer->base);
+ return NULL;
+}
+
+int
+nv84_screen_get_video_param(struct pipe_screen *pscreen,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_cap param)
+{
+ switch (param) {
+ case PIPE_VIDEO_CAP_SUPPORTED:
+ return u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC ||
+ u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG12;
+ case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_VIDEO_CAP_MAX_WIDTH:
+ case PIPE_VIDEO_CAP_MAX_HEIGHT:
+ return 2048;
+ case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+ return PIPE_FORMAT_NV12;
+ case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+ case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+ return true;
+ case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
+ return false;
+ case PIPE_VIDEO_CAP_MAX_LEVEL:
+ switch (profile) {
+ case PIPE_VIDEO_PROFILE_MPEG1:
+ return 0;
+ case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
+ case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
+ return 3;
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
+ return 41;
+ default:
+ debug_printf("unknown video profile: %d\n", profile);
+ return 0;
+ }
+ default:
+ debug_printf("unknown video param: %d\n", param);
+ return 0;
+ }
+}
+
+boolean
+nv84_screen_video_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint)
+{
+ if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
+ return format == PIPE_FORMAT_NV12;
+
+ return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video.h b/src/gallium/drivers/nouveau/nv50/nv84_video.h
new file mode 100644
index 00000000000..2edba389dbf
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2013 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NV84_VIDEO_H_
+#define NV84_VIDEO_H_
+
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+#include "vl/vl_types.h"
+
+#include "vl/vl_mpeg12_bitstream.h"
+
+#include "util/u_video.h"
+
+#include "nv50/nv50_context.h"
+
+/* These are expected to be on their own pushbufs */
+#define SUBC_BSP(m) 2, (m)
+#define SUBC_VP(m) 2, (m)
+
+union pipe_desc {
+ struct pipe_picture_desc *base;
+ struct pipe_mpeg12_picture_desc *mpeg12;
+ struct pipe_mpeg4_picture_desc *mpeg4;
+ struct pipe_vc1_picture_desc *vc1;
+ struct pipe_h264_picture_desc *h264;
+};
+
+struct nv84_video_buffer {
+ struct pipe_video_buffer base;
+ struct pipe_resource *resources[VL_NUM_COMPONENTS];
+ struct pipe_sampler_view *sampler_view_planes[VL_NUM_COMPONENTS];
+ struct pipe_sampler_view *sampler_view_components[VL_NUM_COMPONENTS];
+ struct pipe_surface *surfaces[VL_NUM_COMPONENTS * 2];
+
+ struct nouveau_bo *interlaced, *full;
+ int mvidx;
+ unsigned frame_num, frame_num_max;
+};
+
+struct nv84_decoder {
+ struct pipe_video_codec base;
+ struct nouveau_client *client;
+ struct nouveau_object *bsp_channel, *vp_channel, *bsp, *vp;
+ struct nouveau_pushbuf *bsp_pushbuf, *vp_pushbuf;
+ struct nouveau_bufctx *bsp_bufctx, *vp_bufctx;
+
+ struct nouveau_bo *bsp_fw, *bsp_data;
+ struct nouveau_bo *vp_fw, *vp_data;
+ struct nouveau_bo *mbring, *vpring;
+
+ /*
+ * states:
+ * 0: init
+ * 1: vpring/mbring cleared, bsp is ready
+ * 2: bsp is done, vp is ready
+ * and then vp it back to 1
+ */
+ struct nouveau_bo *fence;
+
+ struct nouveau_bo *bitstream;
+ struct nouveau_bo *vp_params;
+
+ size_t vp_fw2_offset;
+
+ unsigned frame_mbs, frame_size;
+ /* VPRING layout:
+ RESIDUAL
+ CTRL
+ DEBLOCK
+ 0x1000
+ */
+ unsigned vpring_deblock, vpring_residual, vpring_ctrl;
+
+
+ struct vl_mpg12_bs *mpeg12_bs;
+
+ struct nouveau_bo *mpeg12_bo;
+ void *mpeg12_mb_info;
+ uint16_t *mpeg12_data;
+ const int *zscan;
+ uint8_t mpeg12_intra_matrix[64];
+ uint8_t mpeg12_non_intra_matrix[64];
+};
+
+static INLINE uint32_t mb(uint32_t coord)
+{
+ return (coord + 0xf)>>4;
+}
+
+static INLINE uint32_t mb_half(uint32_t coord)
+{
+ return (coord + 0x1f)>>5;
+}
+
+int
+nv84_decoder_bsp(struct nv84_decoder *dec,
+ struct pipe_h264_picture_desc *desc,
+ unsigned num_buffers,
+ const void *const *data,
+ const unsigned *num_bytes,
+ struct nv84_video_buffer *dest);
+
+void
+nv84_decoder_vp_h264(struct nv84_decoder *dec,
+ struct pipe_h264_picture_desc *desc,
+ struct nv84_video_buffer *dest);
+
+void
+nv84_decoder_vp_mpeg12_mb(struct nv84_decoder *dec,
+ struct pipe_mpeg12_picture_desc *desc,
+ const struct pipe_mpeg12_macroblock *mb);
+
+void
+nv84_decoder_vp_mpeg12(struct nv84_decoder *dec,
+ struct pipe_mpeg12_picture_desc *desc,
+ struct nv84_video_buffer *dest);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c b/src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c
new file mode 100644
index 00000000000..86047b5f463
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright 2013 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv84_video.h"
+
+struct iparm {
+ struct iseqparm {
+ uint32_t chroma_format_idc; // 00
+ uint32_t pad[(0x128 - 0x4) / 4];
+ uint32_t log2_max_frame_num_minus4; // 128
+ uint32_t pic_order_cnt_type; // 12c
+ uint32_t log2_max_pic_order_cnt_lsb_minus4; // 130
+ uint32_t delta_pic_order_always_zero_flag; // 134
+ uint32_t num_ref_frames; // 138
+ uint32_t pic_width_in_mbs_minus1; // 13c
+ uint32_t pic_height_in_map_units_minus1; // 140
+ uint32_t frame_mbs_only_flag; // 144
+ uint32_t mb_adaptive_frame_field_flag; // 148
+ uint32_t direct_8x8_inference_flag; // 14c
+ } iseqparm; // 000
+ struct ipicparm {
+ uint32_t entropy_coding_mode_flag; // 00
+ uint32_t pic_order_present_flag; // 04
+ uint32_t num_slice_groups_minus1; // 08
+ uint32_t slice_group_map_type; // 0c
+ uint32_t pad1[0x60 / 4];
+ uint32_t u70; // 70
+ uint32_t u74; // 74
+ uint32_t u78; // 78
+ uint32_t num_ref_idx_l0_active_minus1; // 7c
+ uint32_t num_ref_idx_l1_active_minus1; // 80
+ uint32_t weighted_pred_flag; // 84
+ uint32_t weighted_bipred_idc; // 88
+ uint32_t pic_init_qp_minus26; // 8c
+ uint32_t chroma_qp_index_offset; // 90
+ uint32_t deblocking_filter_control_present_flag; // 94
+ uint32_t constrained_intra_pred_flag; // 98
+ uint32_t redundant_pic_cnt_present_flag; // 9c
+ uint32_t transform_8x8_mode_flag; // a0
+ uint32_t pad2[(0x1c8 - 0xa0 - 4) / 4];
+ uint32_t second_chroma_qp_index_offset; // 1c8
+ uint32_t u1cc; // 1cc
+ uint32_t curr_pic_order_cnt; // 1d0
+ uint32_t field_order_cnt[2]; // 1d4
+ uint32_t curr_mvidx; // 1dc
+ struct iref {
+ uint32_t u00; // 00
+ uint32_t field_is_ref; // 04 // bit0: top, bit1: bottom
+ uint8_t is_long_term; // 08
+ uint8_t non_existing; // 09
+ uint32_t frame_idx; // 0c
+ uint32_t field_order_cnt[2]; // 10
+ uint32_t mvidx; // 18
+ uint8_t field_pic_flag; // 1c
+ // 20
+ } refs[0x10]; // 1e0
+ } ipicparm; // 150
+};
+
+int
+nv84_decoder_bsp(struct nv84_decoder *dec,
+ struct pipe_h264_picture_desc *desc,
+ unsigned num_buffers,
+ const void *const *data,
+ const unsigned *num_bytes,
+ struct nv84_video_buffer *dest)
+{
+ struct iparm params;
+ uint32_t more_params[0x44 / 4] = {0};
+ unsigned total_bytes = 0;
+ int i;
+ static const uint32_t end[] = {0x0b010000, 0, 0x0b010000, 0};
+ char indexes[17] = {0};
+ struct nouveau_pushbuf *push = dec->bsp_pushbuf;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dec->bitstream, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART },
+ { dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ };
+
+ nouveau_bo_wait(dec->fence, NOUVEAU_BO_RDWR, dec->client);
+
+ STATIC_ASSERT(sizeof(struct iparm) == 0x530);
+
+ memset(&params, 0, sizeof(params));
+
+ dest->frame_num = dest->frame_num_max = desc->frame_num;
+
+ for (i = 0; i < 16; i++) {
+ struct iref *ref = &params.ipicparm.refs[i];
+ struct nv84_video_buffer *frame = (struct nv84_video_buffer *)desc->ref[i];
+ if (!frame) break;
+ /* The frame index is relative to the last IDR frame. So once the frame
+ * num goes back to 0, previous reference frames need to have a negative
+ * index.
+ */
+ if (desc->frame_num >= frame->frame_num_max) {
+ frame->frame_num_max = desc->frame_num;
+ } else {
+ frame->frame_num -= frame->frame_num_max + 1;
+ frame->frame_num_max = desc->frame_num;
+ }
+ ref->non_existing = 0;
+ ref->field_is_ref = (desc->top_is_reference[i] ? 1 : 0) |
+ (desc->bottom_is_reference[i] ? 2 : 0);
+ ref->is_long_term = desc->is_long_term[i];
+ ref->field_order_cnt[0] = desc->field_order_cnt_list[i][0];
+ ref->field_order_cnt[1] = desc->field_order_cnt_list[i][1];
+ ref->frame_idx = frame->frame_num;
+ ref->u00 = ref->mvidx = frame->mvidx;
+ ref->field_pic_flag = desc->field_pic_flag;
+ indexes[frame->mvidx] = 1;
+ }
+
+ /* Needs to be adjusted if we ever support non-4:2:0 videos */
+ params.iseqparm.chroma_format_idc = 1;
+
+ params.iseqparm.pic_width_in_mbs_minus1 = mb(dec->base.width) - 1;
+ if (desc->field_pic_flag || desc->mb_adaptive_frame_field_flag)
+ params.iseqparm.pic_height_in_map_units_minus1 = mb_half(dec->base.height) - 1;
+ else
+ params.iseqparm.pic_height_in_map_units_minus1 = mb(dec->base.height) - 1;
+
+ if (desc->bottom_field_flag)
+ params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[1];
+ else
+ params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[0];
+ params.ipicparm.field_order_cnt[0] = desc->field_order_cnt[0];
+ params.ipicparm.field_order_cnt[1] = desc->field_order_cnt[1];
+ if (desc->is_reference) {
+ if (dest->mvidx < 0) {
+ for (i = 0; i < desc->num_ref_frames + 1; i++) {
+ if (!indexes[i]) {
+ dest->mvidx = i;
+ break;
+ }
+ }
+ assert(i != desc->num_ref_frames + 1);
+ }
+
+ params.ipicparm.u1cc = params.ipicparm.curr_mvidx = dest->mvidx;
+ }
+
+ params.iseqparm.num_ref_frames = desc->num_ref_frames;
+ params.iseqparm.mb_adaptive_frame_field_flag = desc->mb_adaptive_frame_field_flag;
+ params.ipicparm.constrained_intra_pred_flag = desc->constrained_intra_pred_flag;
+ params.ipicparm.weighted_pred_flag = desc->weighted_pred_flag;
+ params.ipicparm.weighted_bipred_idc = desc->weighted_bipred_idc;
+ params.iseqparm.frame_mbs_only_flag = desc->frame_mbs_only_flag;
+ params.ipicparm.transform_8x8_mode_flag = desc->transform_8x8_mode_flag;
+ params.ipicparm.chroma_qp_index_offset = desc->chroma_qp_index_offset;
+ params.ipicparm.second_chroma_qp_index_offset = desc->second_chroma_qp_index_offset;
+ params.ipicparm.pic_init_qp_minus26 = desc->pic_init_qp_minus26;
+ params.ipicparm.num_ref_idx_l0_active_minus1 = desc->num_ref_idx_l0_active_minus1;
+ params.ipicparm.num_ref_idx_l1_active_minus1 = desc->num_ref_idx_l1_active_minus1;
+ params.iseqparm.log2_max_frame_num_minus4 = desc->log2_max_frame_num_minus4;
+ params.iseqparm.pic_order_cnt_type = desc->pic_order_cnt_type;
+ params.iseqparm.log2_max_pic_order_cnt_lsb_minus4 = desc->log2_max_pic_order_cnt_lsb_minus4;
+ params.iseqparm.delta_pic_order_always_zero_flag = desc->delta_pic_order_always_zero_flag;
+ params.iseqparm.direct_8x8_inference_flag = desc->direct_8x8_inference_flag;
+ params.ipicparm.entropy_coding_mode_flag = desc->entropy_coding_mode_flag;
+ params.ipicparm.pic_order_present_flag = desc->pic_order_present_flag;
+ params.ipicparm.deblocking_filter_control_present_flag = desc->deblocking_filter_control_present_flag;
+ params.ipicparm.redundant_pic_cnt_present_flag = desc->redundant_pic_cnt_present_flag;
+
+ memcpy(dec->bitstream->map, &params, sizeof(params));
+ for (i = 0; i < num_buffers; i++) {
+ assert(total_bytes + num_bytes[i] < dec->bitstream->size / 2 - 0x700);
+ memcpy(dec->bitstream->map + 0x700 + total_bytes, data[i], num_bytes[i]);
+ total_bytes += num_bytes[i];
+ }
+ memcpy(dec->bitstream->map + 0x700 + total_bytes, end, sizeof(end));
+ total_bytes += sizeof(end);
+ more_params[1] = total_bytes;
+ memcpy(dec->bitstream->map + 0x600, more_params, sizeof(more_params));
+
+ PUSH_SPACE(push, 5 + 21 + 3 + 2 + 4 + 2);
+ nouveau_pushbuf_refn(push, bo_refs, sizeof(bo_refs)/sizeof(bo_refs[0]));
+
+ /* Wait for the fence = 1 */
+ BEGIN_NV04(push, SUBC_BSP(0x10), 4);
+ PUSH_DATAh(push, dec->fence->offset);
+ PUSH_DATA (push, dec->fence->offset);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 1);
+
+ /* TODO: Use both halves of bitstream/vpring for alternating frames */
+
+ /* Kick off the BSP */
+ BEGIN_NV04(push, SUBC_BSP(0x400), 20);
+ PUSH_DATA (push, dec->bitstream->offset >> 8);
+ PUSH_DATA (push, (dec->bitstream->offset >> 8) + 7);
+ PUSH_DATA (push, dec->bitstream->size / 2 - 0x700);
+ PUSH_DATA (push, (dec->bitstream->offset >> 8) + 6);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, dec->mbring->offset >> 8);
+ PUSH_DATA (push, dec->frame_size);
+ PUSH_DATA (push, (dec->mbring->offset + dec->frame_size) >> 8);
+ PUSH_DATA (push, dec->vpring->offset >> 8);
+ PUSH_DATA (push, dec->vpring->size / 2);
+ PUSH_DATA (push, dec->vpring_residual);
+ PUSH_DATA (push, dec->vpring_ctrl);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, dec->vpring_residual);
+ PUSH_DATA (push, dec->vpring_residual + dec->vpring_ctrl);
+ PUSH_DATA (push, dec->vpring_deblock);
+ PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl +
+ dec->vpring_residual + dec->vpring_deblock) >> 8);
+ PUSH_DATA (push, 0x654321);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0x100008);
+
+ BEGIN_NV04(push, SUBC_BSP(0x620), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, SUBC_BSP(0x300), 1);
+ PUSH_DATA (push, 0);
+
+ /* Write fence = 2, intr */
+ BEGIN_NV04(push, SUBC_BSP(0x610), 3);
+ PUSH_DATAh(push, dec->fence->offset);
+ PUSH_DATA (push, dec->fence->offset);
+ PUSH_DATA (push, 2);
+
+ BEGIN_NV04(push, SUBC_BSP(0x304), 1);
+ PUSH_DATA (push, 0x101);
+ PUSH_KICK (push);
+ return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c b/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c
new file mode 100644
index 00000000000..619aa4e7a40
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c
@@ -0,0 +1,552 @@
+/*
+ * Copyright 2013 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv84_video.h"
+
+#include "util/u_sse.h"
+
+struct h264_iparm1 {
+ uint8_t scaling_lists_4x4[6][16]; // 00
+ uint8_t scaling_lists_8x8[2][64]; // 60
+ uint32_t width; // e0
+ uint32_t height; // e4
+ uint64_t ref1_addrs[16]; // e8
+ uint64_t ref2_addrs[16]; // 168
+ uint32_t unk1e8;
+ uint32_t unk1ec;
+ uint32_t w1; // 1f0
+ uint32_t w2; // 1f4
+ uint32_t w3; // 1f8
+ uint32_t h1; // 1fc
+ uint32_t h2; // 200
+ uint32_t h3; // 204
+ uint32_t mb_adaptive_frame_field_flag; // 208
+ uint32_t field_pic_flag; // 20c
+ uint32_t format; // 210
+ uint32_t unk214; // 214
+};
+
+struct h264_iparm2 {
+ uint32_t width; // 00
+ uint32_t height; // 04
+ uint32_t mbs; // 08
+ uint32_t w1; // 0c
+ uint32_t w2; // 10
+ uint32_t w3; // 14
+ uint32_t h1; // 18
+ uint32_t h2; // 1c
+ uint32_t h3; // 20
+ uint32_t unk24;
+ uint32_t mb_adaptive_frame_field_flag; // 28
+ uint32_t top; // 2c
+ uint32_t bottom; // 30
+ uint32_t is_reference; // 34
+};
+
+void
+nv84_decoder_vp_h264(struct nv84_decoder *dec,
+ struct pipe_h264_picture_desc *desc,
+ struct nv84_video_buffer *dest)
+{
+ struct h264_iparm1 param1;
+ struct h264_iparm2 param2;
+ int i, width = align(dest->base.width, 16),
+ height = align(dest->base.height, 16);
+
+ struct nouveau_pushbuf *push = dec->vp_pushbuf;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { dest->interlaced, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dest->full, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dec->vp_params, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART },
+ { dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ };
+ int num_refs = sizeof(bo_refs)/sizeof(*bo_refs);
+ bool is_ref = desc->is_reference;
+
+ STATIC_ASSERT(sizeof(struct h264_iparm1) == 0x218);
+ STATIC_ASSERT(sizeof(struct h264_iparm2) == 0x38);
+
+ memset(&param1, 0, sizeof(param1));
+ memset(&param2, 0, sizeof(param2));
+
+ memcpy(&param1.scaling_lists_4x4, desc->scaling_lists_4x4,
+ sizeof(param1.scaling_lists_4x4));
+ memcpy(&param1.scaling_lists_8x8, desc->scaling_lists_8x8,
+ sizeof(param1.scaling_lists_8x8));
+
+ param1.width = width;
+ param1.w1 = param1.w2 = param1.w3 = align(width, 64);
+ param1.height = param1.h2 = height;
+ param1.h1 = param1.h3 = align(height, 32);
+ param1.format = 0x3231564e; /* 'NV12' */
+ param1.mb_adaptive_frame_field_flag = desc->mb_adaptive_frame_field_flag;
+ param1.field_pic_flag = desc->field_pic_flag;
+
+ param2.width = width;
+ param2.w1 = param2.w2 = param2.w3 = param1.w1;
+ if (desc->field_pic_flag)
+ param2.height = align(height, 32) / 2;
+ else
+ param2.height = height;
+ param2.h1 = param2.h2 = align(height, 32);
+ param2.h3 = height;
+ param2.mbs = width * height >> 8;
+ if (desc->field_pic_flag) {
+ param2.top = desc->bottom_field_flag ? 2 : 1;
+ param2.bottom = desc->bottom_field_flag;
+ }
+ param2.mb_adaptive_frame_field_flag = desc->mb_adaptive_frame_field_flag;
+ param2.is_reference = desc->is_reference;
+
+ PUSH_SPACE(push, 5 + 16 + 3 + 2 + 6 + (is_ref ? 2 : 0) + 3 + 2 + 4 + 2);
+
+ struct nouveau_bo *ref2_default = dest->full;
+
+ for (i = 0; i < 16; i++) {
+ struct nv84_video_buffer *buf = (struct nv84_video_buffer *)desc->ref[i];
+ struct nouveau_bo *bo1, *bo2;
+ if (buf) {
+ bo1 = buf->interlaced;
+ bo2 = buf->full;
+ if (i == 0)
+ ref2_default = buf->full;
+ } else {
+ bo1 = dest->interlaced;
+ bo2 = ref2_default;
+ }
+ param1.ref1_addrs[i] = bo1->offset;
+ param1.ref2_addrs[i] = bo2->offset;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { bo1, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { bo2, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ };
+ nouveau_pushbuf_refn(push, bo_refs, sizeof(bo_refs)/sizeof(bo_refs[0]));
+ }
+
+ memcpy(dec->vp_params->map, &param1, sizeof(param1));
+ memcpy(dec->vp_params->map + 0x400, &param2, sizeof(param2));
+
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+
+ /* Wait for BSP to have completed */
+ BEGIN_NV04(push, SUBC_VP(0x10), 4);
+ PUSH_DATAh(push, dec->fence->offset);
+ PUSH_DATA (push, dec->fence->offset);
+ PUSH_DATA (push, 2);
+ PUSH_DATA (push, 1); /* wait for sem == 2 */
+
+ /* VP step 1 */
+ BEGIN_NV04(push, SUBC_VP(0x400), 15);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, param2.mbs);
+ PUSH_DATA (push, 0x3987654); /* each nibble probably a dma index */
+ PUSH_DATA (push, 0x55001); /* constant */
+ PUSH_DATA (push, dec->vp_params->offset >> 8);
+ PUSH_DATA (push, (dec->vpring->offset + dec->vpring_residual) >> 8);
+ PUSH_DATA (push, dec->vpring_ctrl);
+ PUSH_DATA (push, dec->vpring->offset >> 8);
+ PUSH_DATA (push, dec->bitstream->size / 2 - 0x700);
+ PUSH_DATA (push, (dec->mbring->offset + dec->mbring->size - 0x2000) >> 8);
+ PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl +
+ dec->vpring_residual + dec->vpring_deblock) >> 8);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0x100008);
+ PUSH_DATA (push, dest->interlaced->offset >> 8);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, SUBC_VP(0x620), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 0);
+
+ /* VP step 2 */
+ BEGIN_NV04(push, SUBC_VP(0x400), 5);
+ PUSH_DATA (push, 0x54530201);
+ PUSH_DATA (push, (dec->vp_params->offset >> 8) + 0x4);
+ PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl +
+ dec->vpring_residual) >> 8);
+ PUSH_DATA (push, dest->interlaced->offset >> 8);
+ PUSH_DATA (push, dest->interlaced->offset >> 8);
+
+ if (is_ref) {
+ BEGIN_NV04(push, SUBC_VP(0x414), 1);
+ PUSH_DATA (push, dest->full->offset >> 8);
+ }
+
+ BEGIN_NV04(push, SUBC_VP(0x620), 2);
+ PUSH_DATAh(push, dec->vp_fw2_offset);
+ PUSH_DATA (push, dec->vp_fw2_offset);
+
+ BEGIN_NV04(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 0);
+
+ /* Set the semaphore back to 1 */
+ BEGIN_NV04(push, SUBC_VP(0x610), 3);
+ PUSH_DATAh(push, dec->fence->offset);
+ PUSH_DATA (push, dec->fence->offset);
+ PUSH_DATA (push, 1);
+
+ /* Write to the semaphore location, intr */
+ BEGIN_NV04(push, SUBC_VP(0x304), 1);
+ PUSH_DATA (push, 0x101);
+
+ for (i = 0; i < 2; i++) {
+ struct nv50_miptree *mt = nv50_miptree(dest->resources[i]);
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ }
+
+ PUSH_KICK (push);
+}
+
+static INLINE int16_t inverse_quantize(int16_t val, uint8_t quant, int mpeg1) {
+ int16_t ret = val * quant / 16;
+ if (mpeg1 && ret) {
+ if (ret > 0)
+ ret = (ret - 1) | 1;
+ else
+ ret = (ret + 1) | 1;
+ }
+ if (ret < -2048)
+ ret = -2048;
+ else if (ret > 2047)
+ ret = 2047;
+ return ret;
+}
+
+struct mpeg12_mb_info {
+ uint32_t index;
+ uint8_t unk4;
+ uint8_t unk5;
+ uint16_t coded_block_pattern;
+ uint8_t block_counts[6];
+ uint16_t PMV[8];
+ uint16_t skipped;
+};
+
+void
+nv84_decoder_vp_mpeg12_mb(struct nv84_decoder *dec,
+ struct pipe_mpeg12_picture_desc *desc,
+ const struct pipe_mpeg12_macroblock *macrob)
+{
+ STATIC_ASSERT(sizeof(struct mpeg12_mb_info) == 32);
+
+ struct mpeg12_mb_info info = {0};
+ int i, sum = 0, mask, block_index, count;
+ const int16_t *blocks;
+ int intra = macrob->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA;
+ int motion = macrob->macroblock_type &
+ (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD);
+ const uint8_t *quant_matrix = intra ? dec->mpeg12_intra_matrix :
+ dec->mpeg12_non_intra_matrix;
+ int mpeg1 = dec->base.profile == PIPE_VIDEO_PROFILE_MPEG1;
+
+ info.index = macrob->y * mb(dec->base.width) + macrob->x;
+ info.unk4 = motion;
+ if (intra)
+ info.unk4 |= 1;
+ if (macrob->macroblock_modes.bits.dct_type)
+ info.unk4 |= 0x20;
+ info.unk5 = (macrob->motion_vertical_field_select << 4) |
+ (macrob->macroblock_modes.value & 0xf);
+ info.coded_block_pattern = macrob->coded_block_pattern;
+ if (motion) {
+ memcpy(info.PMV, macrob->PMV, sizeof(info.PMV));
+ }
+ blocks = macrob->blocks;
+ for (mask = 0x20, block_index = 0; mask > 0; mask >>= 1, block_index++) {
+ if ((macrob->coded_block_pattern & mask) == 0)
+ continue;
+
+ count = 0;
+
+ /*
+ * The observation here is that there are a lot of 0's, and things go
+ * a lot faster if one skips over them.
+ */
+
+#if defined(PIPE_ARCH_SSE) && defined(PIPE_ARCH_X86_64)
+/* Note that the SSE implementation is much more tuned to X86_64. As it's not
+ * benchmarked on X86_32, disable it there. I suspect that the code needs to
+ * be reorganized in terms of 32-bit wide data in order to be more
+ * efficient. NV84+ were released well into the 64-bit CPU era, so it should
+ * be a minority case.
+ */
+
+/* This returns a 16-bit bit-mask, each 2 bits are both 1 or both 0, depending
+ * on whether the corresponding (16-bit) word in blocks is zero or non-zero. */
+#define wordmask(blocks, zero) \
+ (uint64_t)(_mm_movemask_epi8( \
+ _mm_cmpeq_epi16( \
+ zero, _mm_load_si128((__m128i *)(blocks)))))
+
+ __m128i zero = _mm_setzero_si128();
+
+ /* TODO: Look into doing the inverse quantization in terms of SSE
+ * operations unconditionally, when necessary. */
+ uint64_t bmask0 = wordmask(blocks, zero);
+ bmask0 |= wordmask(blocks + 8, zero) << 16;
+ bmask0 |= wordmask(blocks + 16, zero) << 32;
+ bmask0 |= wordmask(blocks + 24, zero) << 48;
+ uint64_t bmask1 = wordmask(blocks + 32, zero);
+ bmask1 |= wordmask(blocks + 40, zero) << 16;
+ bmask1 |= wordmask(blocks + 48, zero) << 32;
+ bmask1 |= wordmask(blocks + 56, zero) << 48;
+
+ /* The wordmask macro returns the inverse of what we want, since it
+ * returns a 1 for equal-to-zero. Invert. */
+ bmask0 = ~bmask0;
+ bmask1 = ~bmask1;
+
+ /* Note that the bitmask is actually sequences of 2 bits for each block
+ * index. This is because there is no movemask_epi16. That means that
+ * (a) ffs will never return 64, since the prev bit will always be set
+ * in that case, and (b) we need to do an extra bit shift. Or'ing the
+ * bitmasks together is faster than having a loop that computes them one
+ * at a time and processes them, on a Core i7-920. Trying to put bmask
+ * into an array and then looping also slows things down.
+ */
+
+ /* shift needs to be the same width as i, and unsigned so that / 2
+ * becomes a rshift operation */
+ uint32_t shift;
+ i = 0;
+
+ if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+ int16_t tmp;
+ while ((shift = __builtin_ffsll(bmask0))) {
+ i += (shift - 1) / 2;
+ bmask0 >>= shift - 1;
+ *dec->mpeg12_data++ = dec->zscan[i] * 2;
+ tmp = inverse_quantize(blocks[i], quant_matrix[i], mpeg1);
+ *dec->mpeg12_data++ = tmp;
+ sum += tmp;
+ count++;
+ i++;
+ bmask0 >>= 2;
+ }
+ i = 32;
+ while ((shift = __builtin_ffsll(bmask1))) {
+ i += (shift - 1) / 2;
+ bmask1 >>= shift - 1;
+ *dec->mpeg12_data++ = dec->zscan[i] * 2;
+ tmp = inverse_quantize(blocks[i], quant_matrix[i], mpeg1);
+ *dec->mpeg12_data++ = tmp;
+ sum += tmp;
+ count++;
+ i++;
+ bmask1 >>= 2;
+ }
+ } else {
+ while ((shift = __builtin_ffsll(bmask0))) {
+ i += (shift - 1) / 2;
+ bmask0 >>= shift - 1;
+ *dec->mpeg12_data++ = i * 2;
+ *dec->mpeg12_data++ = blocks[i];
+ count++;
+ i++;
+ bmask0 >>= 2;
+ }
+ i = 32;
+ while ((shift = __builtin_ffsll(bmask1))) {
+ i += (shift - 1) / 2;
+ bmask1 >>= shift - 1;
+ *dec->mpeg12_data++ = i * 2;
+ *dec->mpeg12_data++ = blocks[i];
+ count++;
+ i++;
+ bmask1 >>= 2;
+ }
+ }
+#undef wordmask
+#else
+
+ /*
+ * This loop looks ridiculously written... and it is. I tried a lot of
+ * different ways of achieving this scan, and this was the fastest, at
+ * least on a Core i7-920. Note that it's not necessary to skip the 0's,
+ * the firmware will deal with those just fine. But it's faster to skip
+ * them. Note to people trying benchmarks: make sure to use realistic
+ * mpeg data, which can often be a single data point first followed by
+ * 63 0's, or <data> 7x <0> <data> 7x <0> etc.
+ */
+ i = 0;
+ if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+ while (true) {
+ int16_t tmp;
+ while (likely(i < 64 && !(tmp = blocks[i]))) i++;
+ if (i >= 64) break;
+ *dec->mpeg12_data++ = dec->zscan[i] * 2;
+ tmp = inverse_quantize(tmp, quant_matrix[i], mpeg1);
+ *dec->mpeg12_data++ = tmp;
+ sum += tmp;
+ count++;
+ i++;
+ }
+ } else {
+ while (true) {
+ int16_t tmp;
+ while (likely(i < 64 && !(tmp = blocks[i]))) i++;
+ if (i >= 64) break;
+ *dec->mpeg12_data++ = i * 2;
+ *dec->mpeg12_data++ = tmp;
+ count++;
+ i++;
+ }
+ }
+
+#endif
+
+ if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+ if (!mpeg1 && (sum & 1) == 0) {
+ if (count && *(dec->mpeg12_data - 2) == 63 * 2) {
+ uint16_t *val = dec->mpeg12_data - 1;
+ if (*val & 1) *val -= 1;
+ else *val += 1;
+ } else {
+ *dec->mpeg12_data++ = 63 * 2;
+ *dec->mpeg12_data++ = 1;
+ count++;
+ }
+ }
+ }
+
+ if (count) {
+ *(dec->mpeg12_data - 2) |= 1;
+ } else {
+ *dec->mpeg12_data++ = 1;
+ *dec->mpeg12_data++ = 0;
+ count = 1;
+ }
+ info.block_counts[block_index] = count;
+ blocks += 64;
+ }
+
+ memcpy(dec->mpeg12_mb_info, &info, sizeof(info));
+ dec->mpeg12_mb_info += sizeof(info);
+
+ if (macrob->num_skipped_macroblocks) {
+ info.index++;
+ info.coded_block_pattern = 0;
+ info.skipped = macrob->num_skipped_macroblocks - 1;
+ memset(info.block_counts, 0, sizeof(info.block_counts));
+ memcpy(dec->mpeg12_mb_info, &info, sizeof(info));
+ dec->mpeg12_mb_info += sizeof(info);
+ }
+}
+
+struct mpeg12_header {
+ uint32_t luma_top_size; // 00
+ uint32_t luma_bottom_size; // 04
+ uint32_t chroma_top_size; // 08
+ uint32_t mbs; // 0c
+ uint32_t mb_info_size; // 10
+ uint32_t mb_width_minus1; // 14
+ uint32_t mb_height_minus1; // 18
+ uint32_t width; // 1c
+ uint32_t height; // 20
+ uint8_t progressive; // 24
+ uint8_t mocomp_only; // 25
+ uint8_t frames; // 26
+ uint8_t picture_structure; // 27
+ uint32_t unk28; // 28 -- 0x50100
+ uint32_t unk2c; // 2c
+ uint32_t pad[4 * 13];
+};
+
+void
+nv84_decoder_vp_mpeg12(struct nv84_decoder *dec,
+ struct pipe_mpeg12_picture_desc *desc,
+ struct nv84_video_buffer *dest)
+{
+ struct nouveau_pushbuf *push = dec->vp_pushbuf;
+ struct nv84_video_buffer *ref1 = (struct nv84_video_buffer *)desc->ref[0];
+ struct nv84_video_buffer *ref2 = (struct nv84_video_buffer *)desc->ref[1];
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { dest->interlaced, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { NULL, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { NULL, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dec->mpeg12_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART },
+ };
+ int i, num_refs = sizeof(bo_refs) / sizeof(*bo_refs);
+ struct mpeg12_header header = {0};
+ struct nv50_miptree *y = nv50_miptree(dest->resources[0]);
+ struct nv50_miptree *uv = nv50_miptree(dest->resources[1]);
+
+ STATIC_ASSERT(sizeof(struct mpeg12_header) == 0x100);
+
+ if (ref1 == NULL)
+ ref1 = dest;
+ if (ref2 == NULL)
+ ref2 = dest;
+ bo_refs[1].bo = ref1->interlaced;
+ bo_refs[2].bo = ref2->interlaced;
+
+ header.luma_top_size = y->layer_stride;
+ header.luma_bottom_size = y->layer_stride;
+ header.chroma_top_size = uv->layer_stride;
+ header.mbs = mb(dec->base.width) * mb(dec->base.height);
+ header.mb_info_size = dec->mpeg12_mb_info - dec->mpeg12_bo->map - 0x100;
+ header.mb_width_minus1 = mb(dec->base.width) - 1;
+ header.mb_height_minus1 = mb(dec->base.height) - 1;
+ header.width = align(dec->base.width, 16);
+ header.height = align(dec->base.height, 16);
+ header.progressive = desc->frame_pred_frame_dct;
+ header.frames = 1 + (desc->ref[0] != NULL) + (desc->ref[1] != NULL);
+ header.picture_structure = desc->picture_structure;
+ header.unk28 = 0x50100;
+
+ memcpy(dec->mpeg12_bo->map, &header, sizeof(header));
+
+ PUSH_SPACE(push, 10 + 3 + 2);
+
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+
+ BEGIN_NV04(push, SUBC_VP(0x400), 9);
+ PUSH_DATA (push, 0x543210); /* each nibble possibly a dma index */
+ PUSH_DATA (push, 0x555001); /* constant */
+ PUSH_DATA (push, dec->mpeg12_bo->offset >> 8);
+ PUSH_DATA (push, (dec->mpeg12_bo->offset + 0x100) >> 8);
+ PUSH_DATA (push, (dec->mpeg12_bo->offset + 0x100 +
+ align(0x20 * mb(dec->base.width) *
+ mb(dec->base.height), 0x100)) >> 8);
+ PUSH_DATA (push, dest->interlaced->offset >> 8);
+ PUSH_DATA (push, ref1->interlaced->offset >> 8);
+ PUSH_DATA (push, ref2->interlaced->offset >> 8);
+ PUSH_DATA (push, 6 * 64 * 8 * header.mbs);
+
+ BEGIN_NV04(push, SUBC_VP(0x620), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 0);
+
+ for (i = 0; i < 2; i++) {
+ struct nv50_miptree *mt = nv50_miptree(dest->resources[i]);
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ }
+ PUSH_KICK (push);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video.c b/src/gallium/drivers/nouveau/nv50/nv98_video.c
new file mode 100644
index 00000000000..069481de207
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv98_video.h"
+
+#include "util/u_sampler.h"
+#include "util/u_format.h"
+
+static void
+nv98_decoder_decode_bitstream(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *video_target,
+ struct pipe_picture_desc *picture,
+ unsigned num_buffers,
+ const void *const *data,
+ const unsigned *num_bytes)
+{
+ struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder;
+ struct nouveau_vp3_video_buffer *target = (struct nouveau_vp3_video_buffer *)video_target;
+ uint32_t comm_seq = ++dec->fence_seq;
+ union pipe_desc desc;
+
+ unsigned vp_caps, is_ref, ret;
+ struct nouveau_vp3_video_buffer *refs[16] = {};
+
+ desc.base = picture;
+
+ assert(target->base.buffer_format == PIPE_FORMAT_NV12);
+
+ ret = nv98_decoder_bsp(dec, desc, target, comm_seq,
+ num_buffers, data, num_bytes,
+ &vp_caps, &is_ref, refs);
+
+ /* did we decode bitstream correctly? */
+ assert(ret == 2);
+
+ nv98_decoder_vp(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
+ nv98_decoder_ppp(dec, desc, target, comm_seq);
+}
+
+struct pipe_video_codec *
+nv98_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ)
+{
+ struct nouveau_screen *screen = &((struct nv50_context *)context)->screen->base;
+ struct nouveau_vp3_decoder *dec;
+ struct nouveau_pushbuf **push;
+ struct nv04_fifo nv04_data = {.vram = 0xbeef0201, .gart = 0xbeef0202};
+ union nouveau_bo_config cfg;
+
+ cfg.nv50.tile_mode = 0x20;
+ cfg.nv50.memtype = 0x70;
+
+ int ret, i;
+ uint32_t codec = 1, ppp_codec = 3;
+ uint32_t timeout;
+ u32 tmp_size = 0;
+
+ if (getenv("XVMC_VL"))
+ return vl_create_decoder(context, templ);
+
+ if (templ->entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+ debug_printf("%x\n", templ->entrypoint);
+ return NULL;
+ }
+
+ dec = CALLOC_STRUCT(nouveau_vp3_decoder);
+ if (!dec)
+ return NULL;
+ dec->client = screen->client;
+ dec->base = *templ;
+ nouveau_vp3_decoder_init_common(&dec->base);
+
+ dec->bsp_idx = 5;
+ dec->vp_idx = 6;
+ dec->ppp_idx = 7;
+
+ ret = nouveau_object_new(&screen->device->object, 0,
+ NOUVEAU_FIFO_CHANNEL_CLASS,
+ &nv04_data, sizeof(nv04_data), &dec->channel[0]);
+
+ if (!ret)
+ ret = nouveau_pushbuf_new(screen->client, dec->channel[0], 4,
+ 32 * 1024, true, &dec->pushbuf[0]);
+
+ for (i = 1; i < 3; ++i) {
+ dec->channel[i] = dec->channel[0];
+ dec->pushbuf[i] = dec->pushbuf[0];
+ }
+ push = dec->pushbuf;
+
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x85b1, NULL, 0, &dec->bsp);
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x85b2, NULL, 0, &dec->vp);
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x85b3, NULL, 0, &dec->ppp);
+ if (ret)
+ goto fail;
+
+ BEGIN_NV04(push[0], SUBC_BSP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push[0], dec->bsp->handle);
+
+ BEGIN_NV04(push[0], SUBC_BSP(0x180), 5);
+ for (i = 0; i < 5; i++)
+ PUSH_DATA (push[0], nv04_data.vram);
+
+ BEGIN_NV04(push[1], SUBC_VP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push[1], dec->vp->handle);
+
+ BEGIN_NV04(push[1], SUBC_VP(0x180), 6);
+ for (i = 0; i < 6; i++)
+ PUSH_DATA (push[1], nv04_data.vram);
+
+ BEGIN_NV04(push[2], SUBC_PPP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push[2], dec->ppp->handle);
+
+ BEGIN_NV04(push[2], SUBC_PPP(0x180), 5);
+ for (i = 0; i < 5; i++)
+ PUSH_DATA (push[2], nv04_data.vram);
+
+ dec->base.context = context;
+ dec->base.decode_bitstream = nv98_decoder_decode_bitstream;
+
+ for (i = 0; i < NOUVEAU_VP3_VIDEO_QDEPTH && !ret; ++i)
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+ 0, 1 << 20, NULL, &dec->bsp_bo[i]);
+ if (!ret)
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+ 0x100, 4 << 20, NULL, &dec->inter_bo[0]);
+ if (!ret)
+ nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]);
+ if (ret)
+ goto fail;
+
+ switch (u_reduce_video_profile(templ->profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG12: {
+ codec = 1;
+ assert(templ->max_references <= 2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_MPEG4: {
+ codec = 4;
+ tmp_size = mb(templ->height)*16 * mb(templ->width)*16;
+ assert(templ->max_references <= 2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_VC1: {
+ ppp_codec = codec = 2;
+ tmp_size = mb(templ->height)*16 * mb(templ->width)*16;
+ assert(templ->max_references <= 2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
+ codec = 3;
+ dec->tmp_stride = 16 * mb_half(templ->width) * nouveau_vp3_video_align(templ->height) * 3 / 2;
+ tmp_size = dec->tmp_stride * (templ->max_references + 1);
+ assert(templ->max_references <= 16);
+ break;
+ }
+ default:
+ fprintf(stderr, "invalid codec\n");
+ goto fail;
+ }
+
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
+ 0x4000, NULL, &dec->fw_bo);
+ if (ret)
+ goto fail;
+
+ ret = nouveau_vp3_load_firmware(dec, templ->profile, screen->device->chipset);
+ if (ret)
+ goto fw_fail;
+
+ if (codec != 3) {
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
+ 0x400, NULL, &dec->bitplane_bo);
+ if (ret)
+ goto fail;
+ }
+
+ dec->ref_stride = mb(templ->width)*16 * (mb_half(templ->height)*32 + nouveau_vp3_video_align(templ->height)/2);
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
+ dec->ref_stride * (templ->max_references+2) + tmp_size,
+ &cfg, &dec->ref_bo);
+ if (ret)
+ goto fail;
+
+ timeout = 0;
+
+ BEGIN_NV04(push[0], SUBC_BSP(0x200), 2);
+ PUSH_DATA (push[0], codec);
+ PUSH_DATA (push[0], timeout);
+
+ BEGIN_NV04(push[1], SUBC_VP(0x200), 2);
+ PUSH_DATA (push[1], codec);
+ PUSH_DATA (push[1], timeout);
+
+ BEGIN_NV04(push[2], SUBC_PPP(0x200), 2);
+ PUSH_DATA (push[2], ppp_codec);
+ PUSH_DATA (push[2], timeout);
+
+ ++dec->fence_seq;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART|NOUVEAU_BO_MAP,
+ 0, 0x1000, NULL, &dec->fence_bo);
+ if (ret)
+ goto fail;
+
+ nouveau_bo_map(dec->fence_bo, NOUVEAU_BO_RDWR, screen->client);
+ dec->fence_map = dec->fence_bo->map;
+ dec->fence_map[0] = dec->fence_map[4] = dec->fence_map[8] = 0;
+ dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
+
+ /* So lets test if the fence is working? */
+ nouveau_pushbuf_space(push[0], 6, 1, 0);
+ PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
+ BEGIN_NV04(push[0], SUBC_BSP(0x240), 3);
+ PUSH_DATAh(push[0], dec->fence_bo->offset);
+ PUSH_DATA (push[0], dec->fence_bo->offset);
+ PUSH_DATA (push[0], dec->fence_seq);
+
+ BEGIN_NV04(push[0], SUBC_BSP(0x304), 1);
+ PUSH_DATA (push[0], 0);
+ PUSH_KICK (push[0]);
+
+ nouveau_pushbuf_space(push[1], 6, 1, 0);
+ PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
+ BEGIN_NV04(push[1], SUBC_VP(0x240), 3);
+ PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push[1], (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push[1], dec->fence_seq);
+
+ BEGIN_NV04(push[1], SUBC_VP(0x304), 1);
+ PUSH_DATA (push[1], 0);
+ PUSH_KICK (push[1]);
+
+ nouveau_pushbuf_space(push[2], 6, 1, 0);
+ PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
+ BEGIN_NV04(push[2], SUBC_PPP(0x240), 3);
+ PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push[2], (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push[2], dec->fence_seq);
+
+ BEGIN_NV04(push[2], SUBC_PPP(0x304), 1);
+ PUSH_DATA (push[2], 0);
+ PUSH_KICK (push[2]);
+
+ usleep(100);
+ while (dec->fence_seq > dec->fence_map[0] ||
+ dec->fence_seq > dec->fence_map[4] ||
+ dec->fence_seq > dec->fence_map[8]) {
+ debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]);
+ usleep(100);
+ }
+ debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]);
+#endif
+
+ return &dec->base;
+
+fw_fail:
+ debug_printf("Cannot create decoder without firmware..\n");
+ dec->base.destroy(&dec->base);
+ return NULL;
+
+fail:
+ debug_printf("Creation failed: %s (%i)\n", strerror(-ret), ret);
+ dec->base.destroy(&dec->base);
+ return NULL;
+}
+
+struct pipe_video_buffer *
+nv98_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *templat)
+{
+ return nouveau_vp3_video_buffer_create(
+ pipe, templat, NV50_RESOURCE_FLAG_VIDEO);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video.h b/src/gallium/drivers/nouveau/nv50/nv98_video.h
new file mode 100644
index 00000000000..cec761df4ab
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_screen.h"
+#include "nouveau_vp3_video.h"
+
+#include "vl/vl_decoder.h"
+#include "vl/vl_types.h"
+
+#include "util/u_video.h"
+
+extern unsigned
+nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target,
+ unsigned comm_seq, unsigned num_buffers,
+ const void *const *data, const unsigned *num_bytes,
+ unsigned *vp_caps, unsigned *is_ref,
+ struct nouveau_vp3_video_buffer *refs[16]);
+
+extern void
+nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target, unsigned comm_seq,
+ unsigned caps, unsigned is_ref,
+ struct nouveau_vp3_video_buffer *refs[16]);
+
+extern void
+nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target, unsigned comm_seq);
diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
new file mode 100644
index 00000000000..97d4119b6d1
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv98_video.h"
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+static void dump_comm_bsp(struct comm *comm)
+{
+ unsigned idx = comm->bsp_cur_index & 0xf;
+ debug_printf("Cur seq: %x, bsp byte ofs: %x\n", comm->bsp_cur_index, comm->byte_ofs);
+ debug_printf("Status: %08x, pos: %08x\n", comm->status[idx], comm->pos[idx]);
+}
+#endif
+
+unsigned
+nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target,
+ unsigned comm_seq, unsigned num_buffers,
+ const void *const *data, const unsigned *num_bytes,
+ unsigned *vp_caps, unsigned *is_ref,
+ struct nouveau_vp3_video_buffer *refs[16])
+{
+ struct nouveau_pushbuf *push = dec->pushbuf[0];
+ enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ uint32_t bsp_addr, comm_addr, inter_addr;
+ uint32_t slice_size, bucket_size, ring_size;
+ uint32_t caps;
+ int ret;
+ struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
+ struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
+ unsigned fence_extra = 0;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+ { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+#if NOUVEAU_VP3_DEBUG_FENCE
+ { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
+#endif
+ { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ };
+ int num_refs = sizeof(bo_refs)/sizeof(*bo_refs);
+
+ if (!dec->bitplane_bo)
+ num_refs--;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ fence_extra = 4;
+#endif
+
+ ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client);
+ if (ret) {
+ debug_printf("map failed: %i %s\n", ret, strerror(-ret));
+ return -1;
+ }
+
+ caps = nouveau_vp3_bsp(dec, desc, target, comm_seq,
+ num_buffers, data, num_bytes);
+
+ nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
+
+ nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 8) + fence_extra + 2, num_refs, 0);
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+
+ bsp_addr = bsp_bo->offset >> 8;
+ inter_addr = inter_bo->offset >> 8;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ memset(dec->comm, 0, 0x200);
+ comm_addr = (dec->fence_bo->offset + COMM_OFFSET) >> 8;
+#else
+ comm_addr = bsp_addr + (COMM_OFFSET>>8);
+#endif
+
+ BEGIN_NV04(push, SUBC_BSP(0x700), 5);
+ PUSH_DATA (push, caps); // 700 cmd
+ PUSH_DATA (push, bsp_addr + 1); // 704 strparm_bsp
+ PUSH_DATA (push, bsp_addr + 7); // 708 str addr
+ PUSH_DATA (push, comm_addr); // 70c comm
+ PUSH_DATA (push, comm_seq); // 710 seq
+
+ if (codec != PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ u32 bitplane_addr;
+ int mpeg12 = (codec == PIPE_VIDEO_FORMAT_MPEG12);
+
+ bitplane_addr = dec->bitplane_bo->offset >> 8;
+
+ nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size);
+ BEGIN_NV04(push, SUBC_BSP(0x400), mpeg12 ? 5 : 7);
+ PUSH_DATA (push, bsp_addr); // 400 picparm addr
+ PUSH_DATA (push, inter_addr); // 404 interparm addr
+ PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 408 interdata addr
+ PUSH_DATA (push, ring_size << 8); // 40c interdata_size
+ if (!mpeg12) {
+ PUSH_DATA (push, bitplane_addr); // 410 BITPLANE_DATA
+ PUSH_DATA (push, 0x400); // 414 BITPLANE_DATA_SIZE
+ }
+ PUSH_DATA (push, 0); // dma idx
+ } else {
+ nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
+ BEGIN_NV04(push, SUBC_BSP(0x400), 8);
+ PUSH_DATA (push, bsp_addr); // 400 picparm addr
+ PUSH_DATA (push, inter_addr); // 404 interparm addr
+ PUSH_DATA (push, slice_size << 8); // 408 interparm size?
+ PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 40c interdata addr
+ PUSH_DATA (push, ring_size << 8); // 410 interdata size
+ PUSH_DATA (push, inter_addr + slice_size); // 414 bucket?
+ PUSH_DATA (push, bucket_size << 8); // 418 bucket size? unshifted..
+ PUSH_DATA (push, 0); // 41c targets
+ // TODO: Double check 414 / 418 with nvidia trace
+ }
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ BEGIN_NV04(push, SUBC_BSP(0x240), 3);
+ PUSH_DATAh(push, dec->fence_bo->offset);
+ PUSH_DATA (push, dec->fence_bo->offset);
+ PUSH_DATA (push, dec->fence_seq);
+
+ BEGIN_NV04(push, SUBC_BSP(0x300), 1);
+ PUSH_DATA (push, 1);
+ PUSH_KICK (push);
+
+ {
+ unsigned spin = 0;
+ do {
+ usleep(100);
+ if ((spin++ & 0xff) == 0xff) {
+ debug_printf("b%u: %u\n", dec->fence_seq, dec->fence_map[0]);
+ dump_comm_bsp(dec->comm);
+ }
+ } while (dec->fence_seq > dec->fence_map[0]);
+ }
+
+ dump_comm_bsp(dec->comm);
+ return dec->comm->status[comm_seq & 0xf];
+#else
+ BEGIN_NV04(push, SUBC_BSP(0x300), 1);
+ PUSH_DATA (push, 0);
+ PUSH_KICK (push);
+ return 2;
+#endif
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c
new file mode 100644
index 00000000000..6b0b7148dcb
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv98_video.h"
+
+static void
+nv98_decoder_setup_ppp(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target, uint32_t low700) {
+ struct nouveau_pushbuf *push = dec->pushbuf[2];
+
+ uint32_t stride_in = mb(dec->base.width);
+ uint32_t stride_out = mb(target->resources[0]->width0);
+ uint32_t dec_h = mb(dec->base.height);
+ uint32_t dec_w = mb(dec->base.width);
+ uint64_t in_addr;
+ uint32_t y2, cbcr, cbcr2, i;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { dec->ref_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+#if NOUVEAU_VP3_DEBUG_FENCE
+ { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
+#endif
+ };
+ unsigned num_refs = sizeof(bo_refs)/sizeof(*bo_refs);
+
+ for (i = 0; i < 2; ++i) {
+ struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i];
+ bo_refs[i].bo = mt->base.bo;
+ }
+
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+ nouveau_vp3_ycbcr_offsets(dec, &y2, &cbcr, &cbcr2);
+
+ BEGIN_NV04(push, SUBC_PPP(0x700), 10);
+ in_addr = nouveau_vp3_video_addr(dec, target) >> 8;
+
+ PUSH_DATA (push, (stride_out << 24) | (stride_out << 16) | low700); // 700
+ PUSH_DATA (push, (stride_in << 24) | (stride_in << 16) | (dec_h << 8) | dec_w); // 704
+ assert(dec_w == stride_in);
+
+ /* Input: */
+ PUSH_DATA (push, in_addr); // 708
+ PUSH_DATA (push, in_addr + y2); // 70c
+ PUSH_DATA (push, in_addr + cbcr); // 710
+ PUSH_DATA (push, in_addr + cbcr2); // 714
+
+ for (i = 0; i < 2; ++i) {
+ struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i];
+
+ PUSH_DATA (push, mt->base.address >> 8);
+ PUSH_DATA (push, (mt->base.address + mt->total_size/2) >> 8);
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ }
+}
+
+static uint32_t
+nv98_decoder_vc1_ppp(struct nouveau_vp3_decoder *dec, struct pipe_vc1_picture_desc *desc, struct nouveau_vp3_video_buffer *target) {
+ struct nouveau_pushbuf *push = dec->pushbuf[2];
+
+ nv98_decoder_setup_ppp(dec, target, 0x1412);
+ assert(!desc->deblockEnable);
+ assert(!(dec->base.width & 0xf));
+ assert(!(dec->base.height & 0xf));
+
+ BEGIN_NV04(push, SUBC_PPP(0x400), 1);
+ PUSH_DATA (push, desc->pquant << 11);
+
+ // 728 = wtf?
+ return 0x10;
+}
+
+void
+nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct nouveau_vp3_video_buffer *target, unsigned comm_seq) {
+ enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ struct nouveau_pushbuf *push = dec->pushbuf[2];
+ unsigned ppp_caps = 0x10;
+ unsigned fence_extra = 0;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ fence_extra = 4;
+#endif
+
+ nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0);
+
+ switch (codec) {
+ case PIPE_VIDEO_FORMAT_MPEG12: {
+ unsigned mpeg2 = dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1;
+ nv98_decoder_setup_ppp(dec, target, 0x1410 | mpeg2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_MPEG4: nv98_decoder_setup_ppp(dec, target, 0x1414); break;
+ case PIPE_VIDEO_FORMAT_VC1: ppp_caps = nv98_decoder_vc1_ppp(dec, desc.vc1, target); break;
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC: nv98_decoder_setup_ppp(dec, target, 0x1413); break;
+ default: assert(0);
+ }
+ BEGIN_NV04(push, SUBC_PPP(0x734), 2);
+ PUSH_DATA (push, comm_seq);
+ PUSH_DATA (push, ppp_caps);
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ BEGIN_NV04(push, SUBC_PPP(0x240), 3);
+ PUSH_DATAh(push, (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push, (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push, dec->fence_seq);
+
+ BEGIN_NV04(push, SUBC_PPP(0x300), 1);
+ PUSH_DATA (push, 1);
+ PUSH_KICK (push);
+
+ {
+ unsigned spin = 0;
+
+ do {
+ usleep(100);
+ if ((spin++ & 0xff) == 0xff)
+ debug_printf("p%u: %u\n", dec->fence_seq, dec->fence_map[8]);
+ } while (dec->fence_seq > dec->fence_map[8]);
+ }
+#else
+ BEGIN_NV04(push, SUBC_PPP(0x300), 1);
+ PUSH_DATA (push, 0);
+ PUSH_KICK (push);
+#endif
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
new file mode 100644
index 00000000000..9b756ea73f5
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv98_video.h"
+#include <sys/mman.h>
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq,
+ struct nouveau_bo *inter_bo, unsigned slice_size)
+{
+ unsigned i, idx = comm->pvp_cur_index & 0xf;
+ debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
+#if 0
+ debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
+ debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
+
+ for (i = 0; i != comm->irq_index; ++i)
+ debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
+ for (i = 0; i != comm->parse_endpos_index; ++i)
+ debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
+#endif
+ debug_printf("mb_y = %u\n", comm->mb_y[idx]);
+ if (comm->status_vp[idx] == 1)
+ return;
+
+ if ((comm->pvp_stage & 0xff) != 0xff) {
+ unsigned *map;
+ assert(nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client) >= 0);
+ map = inter_bo->map;
+ for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
+ debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
+ }
+ munmap(inter_bo->map, inter_bo->size);
+ inter_bo->map = NULL;
+ }
+ assert((comm->pvp_stage & 0xff) == 0xff);
+}
+#endif
+
+static void
+nv98_decoder_kick_ref(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target)
+{
+ dec->refs[target->valid_ref].vidbuf = NULL;
+ dec->refs[target->valid_ref].last_used = 0;
+// debug_printf("Unreffed %p\n", target);
+}
+
+void
+nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target, unsigned comm_seq,
+ unsigned caps, unsigned is_ref,
+ struct nouveau_vp3_video_buffer *refs[16])
+{
+ struct nouveau_pushbuf *push = dec->pushbuf[1];
+ uint32_t bsp_addr, comm_addr, inter_addr, ucode_addr, pic_addr[17], last_addr, null_addr;
+ uint32_t slice_size, bucket_size, ring_size, i;
+ enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
+ struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
+ u32 fence_extra = 0, codec_extra = 0;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+#if NOUVEAU_VP3_DEBUG_FENCE
+ { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
+#endif
+ { dec->fw_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+ };
+ int num_refs = sizeof(bo_refs)/sizeof(*bo_refs) - !dec->fw_bo;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ fence_extra = 4;
+#endif
+
+ if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
+ codec_extra += 2;
+ } else
+ nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size);
+
+ if (dec->base.max_references > 2)
+ codec_extra += 1 + (dec->base.max_references - 2);
+
+ pic_addr[16] = nouveau_vp3_video_addr(dec, target) >> 8;
+ last_addr = null_addr = nouveau_vp3_video_addr(dec, NULL) >> 8;
+
+ for (i = 0; i < dec->base.max_references; ++i) {
+ if (!refs[i])
+ pic_addr[i] = last_addr;
+ else if (dec->refs[refs[i]->valid_ref].vidbuf == refs[i])
+ last_addr = pic_addr[i] = nouveau_vp3_video_addr(dec, refs[i]) >> 8;
+ else
+ pic_addr[i] = null_addr;
+ }
+ if (!is_ref)
+ nv98_decoder_kick_ref(dec, target);
+
+ nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) +
+ 6 + codec_extra + fence_extra + 2, num_refs, 0);
+
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+
+ bsp_addr = bsp_bo->offset >> 8;
+#if NOUVEAU_VP3_DEBUG_FENCE
+ comm_addr = (dec->fence_bo->offset + COMM_OFFSET)>>8;
+#else
+ comm_addr = bsp_addr + (COMM_OFFSET>>8);
+#endif
+ inter_addr = inter_bo->offset >> 8;
+ if (dec->fw_bo)
+ ucode_addr = dec->fw_bo->offset >> 8;
+ else
+ ucode_addr = 0;
+
+ BEGIN_NV04(push, SUBC_VP(0x700), 7);
+ PUSH_DATA (push, caps); // 700
+ PUSH_DATA (push, comm_seq); // 704
+ PUSH_DATA (push, 0); // 708 fuc targets, ignored for nv98
+ PUSH_DATA (push, dec->fw_sizes); // 70c
+ PUSH_DATA (push, bsp_addr+(VP_OFFSET>>8)); // 710 picparm_addr
+ PUSH_DATA (push, inter_addr); // 714 inter_parm
+ PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 718 inter_data_ofs
+
+ if (bucket_size) {
+ uint64_t tmpimg_addr = dec->ref_bo->offset + dec->ref_stride * (dec->base.max_references+2);
+
+ BEGIN_NV04(push, SUBC_VP(0x71c), 2);
+ PUSH_DATA (push, tmpimg_addr >> 8); // 71c
+ PUSH_DATA (push, inter_addr + slice_size); // 720 bucket_ofs
+ }
+
+ BEGIN_NV04(push, SUBC_VP(0x724), 5);
+ PUSH_DATA (push, comm_addr); // 724
+ PUSH_DATA (push, ucode_addr); // 728
+ PUSH_DATA (push, pic_addr[16]); // 734
+ PUSH_DATA (push, pic_addr[0]); // 72c
+ PUSH_DATA (push, pic_addr[1]); // 730
+
+ if (dec->base.max_references > 2) {
+ int i;
+
+ BEGIN_NV04(push, SUBC_VP(0x400), dec->base.max_references - 2);
+ for (i = 2; i < dec->base.max_references; ++i) {
+ assert(0x400 + (i - 2) * 4 < 0x438);
+ PUSH_DATA (push, pic_addr[i]);
+ }
+ }
+
+ if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ BEGIN_NV04(push, SUBC_VP(0x438), 1);
+ PUSH_DATA (push, desc.h264->slice_count);
+ }
+
+ //debug_printf("Decoding %08lx with %08lx and %08lx\n", pic_addr[16], pic_addr[0], pic_addr[1]);
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ BEGIN_NV04(push, SUBC_VP(0x240), 3);
+ PUSH_DATAh(push, (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push, (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push, dec->fence_seq);
+
+ BEGIN_NV04(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 1);
+ PUSH_KICK(push);
+
+ {
+ unsigned spin = 0;
+ do {
+ usleep(100);
+ if ((spin++ & 0xff) == 0xff) {
+ debug_printf("v%u: %u\n", dec->fence_seq, dec->fence_map[4]);
+ dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
+ }
+ } while (dec->fence_seq > dec->fence_map[4]);
+ }
+ dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
+#else
+ BEGIN_NV04(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 0);
+ PUSH_KICK (push);
+#endif
+}