diff options
Diffstat (limited to 'src/gallium/drivers/nouveau/nv50')
40 files changed, 15555 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h new file mode 100644 index 00000000000..dfbef2c6a30 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h @@ -0,0 +1,416 @@ +#ifndef RNNDB_NV50_2D_XML +#define RNNDB_NV50_2D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- rnndb/nv50_2d.xml ( 11113 bytes, from 2011-07-09 13:43:58) +- ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58) +- ./rnndb/nv_object.xml ( 12912 bytes, from 2012-07-12 09:41:09) +- ./rnndb/nvchipsets.xml ( 3736 bytes, from 2012-07-12 09:41:09) +- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-07-09 13:43:58) +- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58) + +Copyright (C) 2006-2011 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + + +#define NV50_2D_DMA_NOTIFY 0x00000180 + +#define NV50_2D_DMA_DST 0x00000184 + +#define NV50_2D_DMA_SRC 0x00000188 + +#define NV50_2D_DMA_COND 0x0000018c + +#define NV50_2D_DST_FORMAT 0x00000200 + +#define NV50_2D_DST_LINEAR 0x00000204 + +#define NV50_2D_DST_TILE_MODE 0x00000208 + +#define NV50_2D_DST_DEPTH 0x0000020c + +#define NV50_2D_DST_LAYER 0x00000210 + +#define NV50_2D_DST_PITCH 0x00000214 + +#define NV50_2D_DST_WIDTH 0x00000218 + +#define NV50_2D_DST_HEIGHT 0x0000021c + +#define NV50_2D_DST_ADDRESS_HIGH 0x00000220 + +#define NV50_2D_DST_ADDRESS_LOW 0x00000224 + +#define NV50_2D_UNK228 0x00000228 + +#define NVC0_2D_UNK228 0x00000228 + +#define NV50_2D_SRC_FORMAT 0x00000230 + +#define NV50_2D_SRC_LINEAR 0x00000234 + +#define NV50_2D_SRC_TILE_MODE 0x00000238 + +#define NV50_2D_SRC_DEPTH 0x0000023c + +#define NV50_2D_SRC_LAYER 0x00000240 + +#define NVC0_2D_UNK0240 0x00000240 + +#define NV50_2D_SRC_PITCH 0x00000244 +#define NV50_2D_SRC_PITCH__MAX 0x00040000 + +#define NV50_2D_SRC_WIDTH 0x00000248 +#define NV50_2D_SRC_WIDTH__MAX 0x00010000 + +#define NV50_2D_SRC_HEIGHT 0x0000024c +#define NV50_2D_SRC_HEIGHT__MAX 0x00010000 + +#define NV50_2D_SRC_ADDRESS_HIGH 0x00000250 + +#define NV50_2D_SRC_ADDRESS_LOW 0x00000254 + +#define NV50_2D_UNK258 0x00000258 + +#define NV50_2D_UNK260 0x00000260 + +#define NV50_2D_COND_ADDRESS_HIGH 0x00000264 + +#define NV50_2D_COND_ADDRESS_LOW 0x00000268 + +#define NV50_2D_COND_MODE 0x0000026c +#define NV50_2D_COND_MODE_NEVER 0x00000000 +#define NV50_2D_COND_MODE_ALWAYS 0x00000001 +#define NV50_2D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NV50_2D_COND_MODE_EQUAL 0x00000003 +#define NV50_2D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NV50_2D_CLIP_X 0x00000280 + +#define NV50_2D_CLIP_Y 0x00000284 + +#define NV50_2D_CLIP_W 0x00000288 + +#define NV50_2D_CLIP_H 0x0000028c + +#define NV50_2D_CLIP_ENABLE 0x00000290 + +#define NV50_2D_COLOR_KEY_FORMAT 0x00000294 +#define NV50_2D_COLOR_KEY_FORMAT_16BPP 0x00000000 +#define NV50_2D_COLOR_KEY_FORMAT_15BPP 0x00000001 +#define NV50_2D_COLOR_KEY_FORMAT_24BPP 0x00000002 +#define NV50_2D_COLOR_KEY_FORMAT_30BPP 0x00000003 +#define NV50_2D_COLOR_KEY_FORMAT_8BPP 0x00000004 +#define NV50_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005 +#define NV50_2D_COLOR_KEY_FORMAT_32BPP 0x00000006 + +#define NV50_2D_COLOR_KEY 0x00000298 + +#define NV50_2D_COLOR_KEY_ENABLE 0x0000029c + +#define NV50_2D_ROP 0x000002a0 + +#define NV50_2D_BETA1 0x000002a4 +#define NV50_2D_BETA1_BETA1__MASK 0x7f800000 +#define NV50_2D_BETA1_BETA1__SHIFT 23 + +#define NV50_2D_BETA4 0x000002a8 +#define NV50_2D_BETA4_B__MASK 0x000000ff +#define NV50_2D_BETA4_B__SHIFT 0 +#define NV50_2D_BETA4_G__MASK 0x0000ff00 +#define NV50_2D_BETA4_G__SHIFT 8 +#define NV50_2D_BETA4_R__MASK 0x00ff0000 +#define NV50_2D_BETA4_R__SHIFT 16 +#define NV50_2D_BETA4_A__MASK 0xff000000 +#define NV50_2D_BETA4_A__SHIFT 24 + +#define NV50_2D_OPERATION 0x000002ac +#define NV50_2D_OPERATION_SRCCOPY_AND 0x00000000 +#define NV50_2D_OPERATION_ROP_AND 0x00000001 +#define NV50_2D_OPERATION_BLEND 0x00000002 +#define NV50_2D_OPERATION_SRCCOPY 0x00000003 +#define NV50_2D_OPERATION_ROP 0x00000004 +#define NV50_2D_OPERATION_SRCCOPY_PREMULT 0x00000005 +#define NV50_2D_OPERATION_BLEND_PREMULT 0x00000006 + +#define NV50_2D_PATTERN_OFFSET 0x000002b0 +#define NV50_2D_PATTERN_OFFSET_X__MASK 0x0000003f +#define NV50_2D_PATTERN_OFFSET_X__SHIFT 0 +#define NV50_2D_PATTERN_OFFSET_Y__MASK 0x00003f00 +#define NV50_2D_PATTERN_OFFSET_Y__SHIFT 8 + +#define NV50_2D_PATTERN_SELECT 0x000002b4 +#define NV50_2D_PATTERN_SELECT_MONO_8X8 0x00000000 +#define NV50_2D_PATTERN_SELECT_MONO_64X1 0x00000001 +#define NV50_2D_PATTERN_SELECT_MONO_1X64 0x00000002 +#define NV50_2D_PATTERN_SELECT_COLOR 0x00000003 + +#define NVC0_2D_UNK2DC 0x000002dc + +#define NVC0_2D_UNK2E0 0x000002e0 + +#define NV50_2D_PATTERN_COLOR_FORMAT 0x000002e8 +#define NV50_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000 +#define NV50_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001 +#define NV50_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002 +#define NV50_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003 +#define NV50_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004 +#define NV50_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005 +#define NV50_2D_PATTERN_COLOR_FORMAT_UNK6 0x00000006 + +#define NV50_2D_PATTERN_MONO_FORMAT 0x000002ec +#define NV50_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000 +#define NV50_2D_PATTERN_MONO_FORMAT_LE 0x00000001 + +#define NV50_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0)) +#define NV50_2D_PATTERN_COLOR__ESIZE 0x00000004 +#define NV50_2D_PATTERN_COLOR__LEN 0x00000002 + +#define NV50_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0)) +#define NV50_2D_PATTERN_BITMAP__ESIZE 0x00000004 +#define NV50_2D_PATTERN_BITMAP__LEN 0x00000002 + +#define NV50_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0)) +#define NV50_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004 +#define NV50_2D_PATTERN_X8R8G8B8__LEN 0x00000040 +#define NV50_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff +#define NV50_2D_PATTERN_X8R8G8B8_B__SHIFT 0 +#define NV50_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00 +#define NV50_2D_PATTERN_X8R8G8B8_G__SHIFT 8 +#define NV50_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000 +#define NV50_2D_PATTERN_X8R8G8B8_R__SHIFT 16 + +#define NV50_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0)) +#define NV50_2D_PATTERN_R5G6B5__ESIZE 0x00000004 +#define NV50_2D_PATTERN_R5G6B5__LEN 0x00000020 +#define NV50_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f +#define NV50_2D_PATTERN_R5G6B5_B0__SHIFT 0 +#define NV50_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0 +#define NV50_2D_PATTERN_R5G6B5_G0__SHIFT 5 +#define NV50_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800 +#define NV50_2D_PATTERN_R5G6B5_R0__SHIFT 11 +#define NV50_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000 +#define NV50_2D_PATTERN_R5G6B5_B1__SHIFT 16 +#define NV50_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000 +#define NV50_2D_PATTERN_R5G6B5_G1__SHIFT 21 +#define NV50_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000 +#define NV50_2D_PATTERN_R5G6B5_R1__SHIFT 27 + +#define NV50_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0)) +#define NV50_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004 +#define NV50_2D_PATTERN_X1R5G5B5__LEN 0x00000020 +#define NV50_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f +#define NV50_2D_PATTERN_X1R5G5B5_B0__SHIFT 0 +#define NV50_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0 +#define NV50_2D_PATTERN_X1R5G5B5_G0__SHIFT 5 +#define NV50_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00 +#define NV50_2D_PATTERN_X1R5G5B5_R0__SHIFT 10 +#define NV50_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000 +#define NV50_2D_PATTERN_X1R5G5B5_B1__SHIFT 16 +#define NV50_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000 +#define NV50_2D_PATTERN_X1R5G5B5_G1__SHIFT 21 +#define NV50_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000 +#define NV50_2D_PATTERN_X1R5G5B5_R1__SHIFT 26 + +#define NV50_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0)) +#define NV50_2D_PATTERN_Y8__ESIZE 0x00000004 +#define NV50_2D_PATTERN_Y8__LEN 0x00000010 +#define NV50_2D_PATTERN_Y8_Y0__MASK 0x000000ff +#define NV50_2D_PATTERN_Y8_Y0__SHIFT 0 +#define NV50_2D_PATTERN_Y8_Y1__MASK 0x0000ff00 +#define NV50_2D_PATTERN_Y8_Y1__SHIFT 8 +#define NV50_2D_PATTERN_Y8_Y2__MASK 0x00ff0000 +#define NV50_2D_PATTERN_Y8_Y2__SHIFT 16 +#define NV50_2D_PATTERN_Y8_Y3__MASK 0xff000000 +#define NV50_2D_PATTERN_Y8_Y3__SHIFT 24 + +#define NVC0_2D_DRAW_COLOR_LONG(i0) (0x00000540 + 0x4*(i0)) +#define NVC0_2D_DRAW_COLOR_LONG__ESIZE 0x00000004 +#define NVC0_2D_DRAW_COLOR_LONG__LEN 0x00000004 + +#define NV50_2D_DRAW_SHAPE 0x00000580 +#define NV50_2D_DRAW_SHAPE_POINTS 0x00000000 +#define NV50_2D_DRAW_SHAPE_LINES 0x00000001 +#define NV50_2D_DRAW_SHAPE_LINE_STRIP 0x00000002 +#define NV50_2D_DRAW_SHAPE_TRIANGLES 0x00000003 +#define NV50_2D_DRAW_SHAPE_RECTANGLES 0x00000004 + +#define NV50_2D_DRAW_COLOR_FORMAT 0x00000584 + +#define NV50_2D_DRAW_COLOR 0x00000588 + +#define NV50_2D_UNK58C 0x0000058c +#define NV50_2D_UNK58C_0 0x00000001 +#define NV50_2D_UNK58C_1 0x00000010 +#define NV50_2D_UNK58C_2 0x00000100 +#define NV50_2D_UNK58C_3 0x00001000 + +#define NV50_2D_DRAW_POINT16 0x000005e0 +#define NV50_2D_DRAW_POINT16_X__MASK 0x0000ffff +#define NV50_2D_DRAW_POINT16_X__SHIFT 0 +#define NV50_2D_DRAW_POINT16_Y__MASK 0xffff0000 +#define NV50_2D_DRAW_POINT16_Y__SHIFT 16 + +#define NV50_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0)) +#define NV50_2D_DRAW_POINT32_X__ESIZE 0x00000008 +#define NV50_2D_DRAW_POINT32_X__LEN 0x00000040 + +#define NV50_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0)) +#define NV50_2D_DRAW_POINT32_Y__ESIZE 0x00000008 +#define NV50_2D_DRAW_POINT32_Y__LEN 0x00000040 + +#define NV50_2D_SIFC_BITMAP_ENABLE 0x00000800 + +#define NV50_2D_SIFC_FORMAT 0x00000804 + +#define NV50_2D_SIFC_BITMAP_FORMAT 0x00000808 +#define NV50_2D_SIFC_BITMAP_FORMAT_I1 0x00000000 +#define NV50_2D_SIFC_BITMAP_FORMAT_I4 0x00000001 +#define NV50_2D_SIFC_BITMAP_FORMAT_I8 0x00000002 + +#define NV50_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c + +#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810 +#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000 +#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001 +#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002 + +#define NV50_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814 + +#define NV50_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818 + +#define NV50_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c + +#define NV50_2D_SIFC_WIDTH 0x00000838 + +#define NV50_2D_SIFC_HEIGHT 0x0000083c + +#define NV50_2D_SIFC_DX_DU_FRACT 0x00000840 + +#define NV50_2D_SIFC_DX_DU_INT 0x00000844 + +#define NV50_2D_SIFC_DY_DV_FRACT 0x00000848 + +#define NV50_2D_SIFC_DY_DV_INT 0x0000084c + +#define NV50_2D_SIFC_DST_X_FRACT 0x00000850 + +#define NV50_2D_SIFC_DST_X_INT 0x00000854 + +#define NV50_2D_SIFC_DST_Y_FRACT 0x00000858 + +#define NV50_2D_SIFC_DST_Y_INT 0x0000085c + +#define NV50_2D_SIFC_DATA 0x00000860 + +#define NV50_2D_UNK0870 0x00000870 + +#define NV50_2D_UNK0880 0x00000880 + +#define NV50_2D_UNK0884 0x00000884 + +#define NV50_2D_UNK0888 0x00000888 + +#define NV50_2D_BLIT_CONTROL 0x0000088c +#define NV50_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001 +#define NV50_2D_BLIT_CONTROL_ORIGIN__SHIFT 0 +#define NV50_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000 +#define NV50_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001 +#define NV50_2D_BLIT_CONTROL_FILTER__MASK 0x00000010 +#define NV50_2D_BLIT_CONTROL_FILTER__SHIFT 4 +#define NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000 +#define NV50_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010 + +#define NV50_2D_BLIT_DST_X 0x000008b0 + +#define NV50_2D_BLIT_DST_Y 0x000008b4 + +#define NV50_2D_BLIT_DST_W 0x000008b8 + +#define NV50_2D_BLIT_DST_H 0x000008bc + +#define NV50_2D_BLIT_DU_DX_FRACT 0x000008c0 + +#define NV50_2D_BLIT_DU_DX_INT 0x000008c4 + +#define NV50_2D_BLIT_DV_DY_FRACT 0x000008c8 + +#define NV50_2D_BLIT_DV_DY_INT 0x000008cc + +#define NV50_2D_BLIT_SRC_X_FRACT 0x000008d0 + +#define NV50_2D_BLIT_SRC_X_INT 0x000008d4 + +#define NV50_2D_BLIT_SRC_Y_FRACT 0x000008d8 + +#define NV50_2D_BLIT_SRC_Y_INT 0x000008dc + +#define NVC0_2D_FIRMWARE(i0) (0x000008e0 + 0x4*(i0)) +#define NVC0_2D_FIRMWARE__ESIZE 0x00000004 +#define NVC0_2D_FIRMWARE__LEN 0x00000020 + + +#endif /* RNNDB_NV50_2D_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h new file mode 100644 index 00000000000..9dff8b2dd13 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h @@ -0,0 +1,2110 @@ +#ifndef RNNDB_NV50_3D_XML +#define RNNDB_NV50_3D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- rnndb/nv50_3d.xml ( 65226 bytes, from 2012-01-28 13:46:30) +- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv_3ddefs.xml ( 16394 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv_object.xml ( 12672 bytes, from 2011-08-11 18:25:12) + +Copyright (C) 2006-2012 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NV50_3D_DMA_NOTIFY 0x00000180 + +#define NV50_3D_DMA_ZETA 0x00000184 + +#define NV50_3D_DMA_QUERY 0x00000188 + +#define NV50_3D_DMA_VTXBUF 0x0000018c + +#define NV50_3D_DMA_LOCAL 0x00000190 + +#define NV50_3D_DMA_STACK 0x00000194 + +#define NV50_3D_DMA_CODE_CB 0x00000198 + +#define NV50_3D_DMA_TSC 0x0000019c + +#define NV50_3D_DMA_TIC 0x000001a0 + +#define NV50_3D_DMA_TEXTURE 0x000001a4 + +#define NV50_3D_DMA_STRMOUT 0x000001a8 + +#define NV50_3D_DMA_CLIPID 0x000001ac + +#define NV50_3D_DMA_COLOR(i0) (0x000001c0 + 0x4*(i0)) +#define NV50_3D_DMA_COLOR__ESIZE 0x00000004 +#define NV50_3D_DMA_COLOR__LEN 0x00000008 + +#define NV50_3D_RT(i0) (0x00000200 + 0x20*(i0)) +#define NV50_3D_RT__ESIZE 0x00000020 +#define NV50_3D_RT__LEN 0x00000008 + +#define NV50_3D_RT_ADDRESS_HIGH(i0) (0x00000200 + 0x20*(i0)) + +#define NV50_3D_RT_ADDRESS_LOW(i0) (0x00000204 + 0x20*(i0)) + +#define NV50_3D_RT_FORMAT(i0) (0x00000208 + 0x20*(i0)) + +#define NV50_3D_RT_TILE_MODE(i0) (0x0000020c + 0x20*(i0)) +#define NV50_3D_RT_TILE_MODE_X__MASK 0x0000000f +#define NV50_3D_RT_TILE_MODE_X__SHIFT 0 +#define NV50_3D_RT_TILE_MODE_Y__MASK 0x000000f0 +#define NV50_3D_RT_TILE_MODE_Y__SHIFT 4 +#define NV50_3D_RT_TILE_MODE_Z__MASK 0x00000f00 +#define NV50_3D_RT_TILE_MODE_Z__SHIFT 8 + +#define NV50_3D_RT_LAYER_STRIDE(i0) (0x00000210 + 0x20*(i0)) +#define NV50_3D_RT_LAYER_STRIDE__SHR 2 + +#define NV50_3D_RT_UNK14(i0) (0x00000214 + 0x20*(i0)) + +#define NV50_3D_VTX_ATTR_1F(i0) (0x00000300 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_1F__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_1F__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_2H(i0) (0x00000340 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_2H__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_2H__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_2H_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_2H_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_2H_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_2H_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_2F_X(i0) (0x00000380 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_2F_X__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_2F_X__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_2F_Y(i0) (0x00000384 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_2F_Y__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_2F_Y__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_3F_X(i0) (0x00000400 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_3F_X__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_3F_X__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_3F_Y(i0) (0x00000404 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_3F_Y__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_3F_Y__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_3F_Z(i0) (0x00000408 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_3F_Z__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_3F_Z__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4F_X(i0) (0x00000500 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_4F_X__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_4F_X__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4F_Y(i0) (0x00000504 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_4F_Y__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_4F_Y__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4F_Z(i0) (0x00000508 + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_4F_Z__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_4F_Z__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4F_W(i0) (0x0000050c + 0x10*(i0)) +#define NV50_3D_VTX_ATTR_4F_W__ESIZE 0x00000010 +#define NV50_3D_VTX_ATTR_4F_W__LEN 0x00000010 + +#define NV50_3D_VTX_ATTR_4H_0(i0) (0x00000600 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4H_0__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4H_0__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4H_0_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4H_0_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4H_0_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4H_0_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4H_1(i0) (0x00000604 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4H_1__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4H_1__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4H_1_Z__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4H_1_Z__SHIFT 0 +#define NV50_3D_VTX_ATTR_4H_1_W__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4H_1_W__SHIFT 16 + +#define NV50_3D_VTX_ATTR_2I(i0) (0x00000680 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_2I__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_2I__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_2I_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_2I_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_2I_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_2I_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_2NI(i0) (0x000006c0 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_2NI__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_2NI__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_2NI_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_2NI_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_2NI_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_2NI_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4I_0(i0) (0x00000700 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4I_0__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4I_0__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4I_0_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4I_0_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4I_0_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4I_0_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4I_1(i0) (0x00000704 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4I_1__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4I_1__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4I_1_Z__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4I_1_Z__SHIFT 0 +#define NV50_3D_VTX_ATTR_4I_1_W__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4I_1_W__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4NI_0(i0) (0x00000780 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4NI_0__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4NI_0__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4NI_0_X__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4NI_0_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4NI_0_Y__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4NI_0_Y__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4NI_1(i0) (0x00000784 + 0x8*(i0)) +#define NV50_3D_VTX_ATTR_4NI_1__ESIZE 0x00000008 +#define NV50_3D_VTX_ATTR_4NI_1__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4NI_1_Z__MASK 0x0000ffff +#define NV50_3D_VTX_ATTR_4NI_1_Z__SHIFT 0 +#define NV50_3D_VTX_ATTR_4NI_1_W__MASK 0xffff0000 +#define NV50_3D_VTX_ATTR_4NI_1_W__SHIFT 16 + +#define NV50_3D_VTX_ATTR_4UB(i0) (0x00000800 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_4UB__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_4UB__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4UB_X__MASK 0x000000ff +#define NV50_3D_VTX_ATTR_4UB_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4UB_Y__MASK 0x0000ff00 +#define NV50_3D_VTX_ATTR_4UB_Y__SHIFT 8 +#define NV50_3D_VTX_ATTR_4UB_Z__MASK 0x00ff0000 +#define NV50_3D_VTX_ATTR_4UB_Z__SHIFT 16 +#define NV50_3D_VTX_ATTR_4UB_W__MASK 0xff000000 +#define NV50_3D_VTX_ATTR_4UB_W__SHIFT 24 + +#define NV50_3D_VTX_ATTR_4B(i0) (0x00000840 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_4B__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_4B__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4B_X__MASK 0x000000ff +#define NV50_3D_VTX_ATTR_4B_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4B_Y__MASK 0x0000ff00 +#define NV50_3D_VTX_ATTR_4B_Y__SHIFT 8 +#define NV50_3D_VTX_ATTR_4B_Z__MASK 0x00ff0000 +#define NV50_3D_VTX_ATTR_4B_Z__SHIFT 16 +#define NV50_3D_VTX_ATTR_4B_W__MASK 0xff000000 +#define NV50_3D_VTX_ATTR_4B_W__SHIFT 24 + +#define NV50_3D_VTX_ATTR_4NUB(i0) (0x00000880 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_4NUB__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_4NUB__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4NUB_X__MASK 0x000000ff +#define NV50_3D_VTX_ATTR_4NUB_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4NUB_Y__MASK 0x0000ff00 +#define NV50_3D_VTX_ATTR_4NUB_Y__SHIFT 8 +#define NV50_3D_VTX_ATTR_4NUB_Z__MASK 0x00ff0000 +#define NV50_3D_VTX_ATTR_4NUB_Z__SHIFT 16 +#define NV50_3D_VTX_ATTR_4NUB_W__MASK 0xff000000 +#define NV50_3D_VTX_ATTR_4NUB_W__SHIFT 24 + +#define NV50_3D_VTX_ATTR_4NB(i0) (0x000008c0 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_4NB__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_4NB__LEN 0x00000010 +#define NV50_3D_VTX_ATTR_4NB_X__MASK 0x000000ff +#define NV50_3D_VTX_ATTR_4NB_X__SHIFT 0 +#define NV50_3D_VTX_ATTR_4NB_Y__MASK 0x0000ff00 +#define NV50_3D_VTX_ATTR_4NB_Y__SHIFT 8 +#define NV50_3D_VTX_ATTR_4NB_Z__MASK 0x00ff0000 +#define NV50_3D_VTX_ATTR_4NB_Z__SHIFT 16 +#define NV50_3D_VTX_ATTR_4NB_W__MASK 0xff000000 +#define NV50_3D_VTX_ATTR_4NB_W__SHIFT 24 + +#define NV50_3D_VERTEX_ARRAY_FETCH(i0) (0x00000900 + 0x10*(i0)) +#define NV50_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010 +#define NV50_3D_VERTEX_ARRAY_FETCH__LEN 0x00000010 +#define NV50_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff +#define NV50_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0 +#define NV50_3D_VERTEX_ARRAY_FETCH_ENABLE 0x20000000 + +#define NV50_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00000904 + 0x10*(i0)) +#define NV50_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010 +#define NV50_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000010 + +#define NV50_3D_VERTEX_ARRAY_START_LOW(i0) (0x00000908 + 0x10*(i0)) +#define NV50_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010 +#define NV50_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000010 + +#define NV50_3D_VERTEX_ARRAY_DIVISOR(i0) (0x0000090c + 0x10*(i0)) +#define NV50_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010 +#define NV50_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_SCALE_X__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_SCALE_Y__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_SCALE_Z__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0)) +#define NV50_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0)) +#define NV50_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020 +#define NV50_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010 + +#define NV50_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0)) +#define NV50_3D_VIEWPORT_HORIZ__ESIZE 0x00000010 +#define NV50_3D_VIEWPORT_HORIZ__LEN 0x00000010 +#define NV50_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff +#define NV50_3D_VIEWPORT_HORIZ_X__SHIFT 0 +#define NV50_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000 +#define NV50_3D_VIEWPORT_HORIZ_W__SHIFT 16 + +#define NV50_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0)) +#define NV50_3D_VIEWPORT_VERT__ESIZE 0x00000010 +#define NV50_3D_VIEWPORT_VERT__LEN 0x00000010 +#define NV50_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff +#define NV50_3D_VIEWPORT_VERT_Y__SHIFT 0 +#define NV50_3D_VIEWPORT_VERT_H__MASK 0xffff0000 +#define NV50_3D_VIEWPORT_VERT_H__SHIFT 16 + +#define NV50_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0)) +#define NV50_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010 +#define NV50_3D_DEPTH_RANGE_NEAR__LEN 0x00000010 + +#define NV50_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0)) +#define NV50_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010 +#define NV50_3D_DEPTH_RANGE_FAR__LEN 0x00000010 + +#define NV50_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0)) +#define NV50_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008 +#define NV50_3D_CLIP_RECT_HORIZ__LEN 0x00000008 +#define NV50_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff +#define NV50_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0 +#define NV50_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000 +#define NV50_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16 + +#define NV50_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0)) +#define NV50_3D_CLIP_RECT_VERT__ESIZE 0x00000008 +#define NV50_3D_CLIP_RECT_VERT__LEN 0x00000008 +#define NV50_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff +#define NV50_3D_CLIP_RECT_VERT_MIN__SHIFT 0 +#define NV50_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000 +#define NV50_3D_CLIP_RECT_VERT_MAX__SHIFT 16 + +#define NV50_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0)) +#define NV50_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008 +#define NV50_3D_CLIPID_REGION_HORIZ__LEN 0x00000004 +#define NV50_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff +#define NV50_3D_CLIPID_REGION_HORIZ_X__SHIFT 0 +#define NV50_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000 +#define NV50_3D_CLIPID_REGION_HORIZ_W__SHIFT 16 + +#define NV50_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0)) +#define NV50_3D_CLIPID_REGION_VERT__ESIZE 0x00000008 +#define NV50_3D_CLIPID_REGION_VERT__LEN 0x00000004 +#define NV50_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff +#define NV50_3D_CLIPID_REGION_VERT_Y__SHIFT 0 +#define NV50_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000 +#define NV50_3D_CLIPID_REGION_VERT_H__SHIFT 16 + +#define NV50_3D_UNK0D60 0x00000d60 + +#define NV50_3D_UNK0D64 0x00000d64 + +#define NV50_3D_COUNTER_ENABLE 0x00000d68 +#define NV50_3D_COUNTER_ENABLE_VFETCH_VERTICES 0x00000001 +#define NV50_3D_COUNTER_ENABLE_VFETCH_PRIMITIVES 0x00000002 +#define NV50_3D_COUNTER_ENABLE_VP_LAUNCHES 0x00000004 +#define NV50_3D_COUNTER_ENABLE_GP_LAUNCHES 0x00000008 +#define NV50_3D_COUNTER_ENABLE_GP_PRIMITIVES_OUT 0x00000010 +#define NV50_3D_COUNTER_ENABLE_TRANSFORM_FEEDBACK 0x00000020 +#define NV50_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000040 +#define NV50_3D_COUNTER_ENABLE_RAST_PRIMITIVES_PRECLIP 0x00000080 +#define NV50_3D_COUNTER_ENABLE_RAST_PRIMITIVES_POSTCLIP 0x00000100 +#define NV50_3D_COUNTER_ENABLE_FP_PIXELS 0x00000200 +#define NV84_3D_COUNTER_ENABLE_UNK0A 0x00000400 + +#define NV50_3D_UNK0D6C(i0) (0x00000d6c + 0x4*(i0)) +#define NV50_3D_UNK0D6C__ESIZE 0x00000004 +#define NV50_3D_UNK0D6C__LEN 0x00000002 +#define NV50_3D_UNK0D6C_X__MASK 0x0000ffff +#define NV50_3D_UNK0D6C_X__SHIFT 0 +#define NV50_3D_UNK0D6C_Y__MASK 0xffff0000 +#define NV50_3D_UNK0D6C_Y__SHIFT 16 + +#define NV50_3D_VERTEX_BUFFER_FIRST 0x00000d74 + +#define NV50_3D_VERTEX_BUFFER_COUNT 0x00000d78 + +#define NV50_3D_UNK0D7C 0x00000d7c + +#define NV50_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0)) +#define NV50_3D_CLEAR_COLOR__ESIZE 0x00000004 +#define NV50_3D_CLEAR_COLOR__LEN 0x00000004 + +#define NV50_3D_CLEAR_DEPTH 0x00000d90 + +#define NV50_3D_STACK_ADDRESS_HIGH 0x00000d94 + +#define NV50_3D_STACK_ADDRESS_LOW 0x00000d98 + +#define NV50_3D_STACK_SIZE_LOG 0x00000d9c + +#define NV50_3D_CLEAR_STENCIL 0x00000da0 + +#define NV50_3D_STRMOUT_PARAMS_LATCH 0x00000da4 + +#define NV50_3D_STRMOUT_PRIMITIVE_LIMIT 0x00000da8 + +#define NV50_3D_POLYGON_MODE_FRONT 0x00000dac +#define NV50_3D_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV50_3D_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV50_3D_POLYGON_MODE_FRONT_FILL 0x00001b02 + +#define NV50_3D_POLYGON_MODE_BACK 0x00000db0 +#define NV50_3D_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV50_3D_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV50_3D_POLYGON_MODE_BACK_FILL 0x00001b02 + +#define NV50_3D_POLYGON_SMOOTH_ENABLE 0x00000db4 + +#define NV50_3D_UNK0DB8 0x00000db8 + +#define NV50_3D_ZCULL_UNK0DBC 0x00000dbc +#define NV50_3D_ZCULL_UNK0DBC_UNK0 0x00000001 +#define NV50_3D_ZCULL_UNK0DBC_UNK16__MASK 0x00030000 +#define NV50_3D_ZCULL_UNK0DBC_UNK16__SHIFT 16 + +#define NV50_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0 + +#define NV50_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4 + +#define NV50_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8 + +#define NV50_3D_UNK0DCC 0x00000dcc + +#define NV50_3D_VTX_ATTR_MASK_UNK0DD0(i0) (0x00000dd0 + 0x4*(i0)) +#define NV50_3D_VTX_ATTR_MASK_UNK0DD0__ESIZE 0x00000004 +#define NV50_3D_VTX_ATTR_MASK_UNK0DD0__LEN 0x00000002 + +#define NV50_3D_ZCULL_UNK0DD8 0x00000dd8 +#define NV50_3D_ZCULL_UNK0DD8_UNK0__MASK 0x00000007 +#define NV50_3D_ZCULL_UNK0DD8_UNK0__SHIFT 0 +#define NVA3_3D_ZCULL_UNK0DD8_UNK9 0x00000200 +#define NV50_3D_ZCULL_UNK0DD8_UNK16__MASK 0xffff0000 +#define NV50_3D_ZCULL_UNK0DD8_UNK16__SHIFT 16 + +#define NV50_3D_UNK0DDC 0x00000ddc + +#define NV50_3D_UNK0DE0 0x00000de0 + +#define NV50_3D_WATCHDOG_TIMER 0x00000de4 + +#define NV50_3D_UNK0DE8 0x00000de8 + +#define NV50_3D_UNK0DEC 0x00000dec + +#define NV50_3D_UNK0DF0 0x00000df0 +#define NV50_3D_UNK0DF0_UNK0 0x00000001 +#define NV50_3D_UNK0DF0_UNK1__MASK 0x00000ff0 +#define NV50_3D_UNK0DF0_UNK1__SHIFT 4 + +#define NV50_3D_UNK0DF4 0x00000df4 + +#define NV50_3D_WINDOW_OFFSET_X 0x00000df8 + +#define NV50_3D_WINDOW_OFFSET_Y 0x00000dfc + +#define NV50_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0)) +#define NV50_3D_SCISSOR_ENABLE__ESIZE 0x00000010 +#define NV50_3D_SCISSOR_ENABLE__LEN 0x00000010 + +#define NV50_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0)) +#define NV50_3D_SCISSOR_HORIZ__ESIZE 0x00000010 +#define NV50_3D_SCISSOR_HORIZ__LEN 0x00000010 +#define NV50_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff +#define NV50_3D_SCISSOR_HORIZ_MIN__SHIFT 0 +#define NV50_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000 +#define NV50_3D_SCISSOR_HORIZ_MAX__SHIFT 16 + +#define NV50_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0)) +#define NV50_3D_SCISSOR_VERT__ESIZE 0x00000010 +#define NV50_3D_SCISSOR_VERT__LEN 0x00000010 +#define NV50_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff +#define NV50_3D_SCISSOR_VERT_MIN__SHIFT 0 +#define NV50_3D_SCISSOR_VERT_MAX__MASK 0xffff0000 +#define NV50_3D_SCISSOR_VERT_MAX__SHIFT 16 + +#define NV50_3D_CB_ADDR 0x00000f00 +#define NV50_3D_CB_ADDR_ID__MASK 0x003fff00 +#define NV50_3D_CB_ADDR_ID__SHIFT 8 +#define NV50_3D_CB_ADDR_BUFFER__MASK 0x0000007f +#define NV50_3D_CB_ADDR_BUFFER__SHIFT 0 + +#define NV50_3D_CB_DATA(i0) (0x00000f04 + 0x4*(i0)) +#define NV50_3D_CB_DATA__ESIZE 0x00000004 +#define NV50_3D_CB_DATA__LEN 0x00000010 + +#define NV50_3D_LOCAL_WARPS_LOG_ALLOC 0x00000f44 + +#define NV50_3D_LOCAL_WARPS_NO_CLAMP 0x00000f48 + +#define NV50_3D_STACK_WARPS_LOG_ALLOC 0x00000f4c + +#define NV50_3D_STACK_WARPS_NO_CLAMP 0x00000f50 + +#define NV50_3D_STENCIL_BACK_FUNC_REF 0x00000f54 + +#define NV50_3D_STENCIL_BACK_MASK 0x00000f58 + +#define NV50_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c + +#define NV50_3D_UNK0F60(i0) (0x00000f60 + 0x4*(i0)) +#define NV50_3D_UNK0F60__ESIZE 0x00000004 +#define NV50_3D_UNK0F60__LEN 0x00000004 + +#define NV50_3D_GP_ADDRESS_HIGH 0x00000f70 + +#define NV50_3D_GP_ADDRESS_LOW 0x00000f74 + +#define NV50_3D_UNK0F78 0x00000f78 + +#define NV50_3D_VP_ADDRESS_HIGH 0x00000f7c + +#define NV50_3D_VP_ADDRESS_LOW 0x00000f80 + +#define NV50_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84 + +#define NV50_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88 + +#define NV50_3D_UNK0F8C 0x00000f8c + +#define NV50_3D_COLOR_MASK_COMMON 0x00000f90 + +#define NV50_3D_UNK0F94 0x00000f94 + +#define NV50_3D_UNK0F98 0x00000f98 + +#define NV50_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0)) +#define NV50_3D_DEPTH_BOUNDS__ESIZE 0x00000004 +#define NV50_3D_DEPTH_BOUNDS__LEN 0x00000002 + +#define NV50_3D_FP_ADDRESS_HIGH 0x00000fa4 + +#define NV50_3D_FP_ADDRESS_LOW 0x00000fa8 + +#define NV50_3D_UNK0FAC 0x00000fac +#define NV50_3D_UNK0FAC_UNK0 0x00000001 +#define NVA0_3D_UNK0FAC_UNK2 0x00000002 +#define NV50_3D_UNK0FAC_UNK1__MASK 0x000ffff0 +#define NV50_3D_UNK0FAC_UNK1__SHIFT 4 + +#define NV50_3D_UNK0FB0 0x00000fb0 + +#define NV50_3D_UNK0FB4 0x00000fb4 + +#define NV50_3D_UNK0FB8 0x00000fb8 + +#define NV50_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0)) +#define NV50_3D_MSAA_MASK__ESIZE 0x00000004 +#define NV50_3D_MSAA_MASK__LEN 0x00000004 + +#define NV50_3D_CLIPID_ADDRESS_HIGH 0x00000fcc + +#define NV50_3D_CLIPID_ADDRESS_LOW 0x00000fd0 + +#define NV50_3D_SEMANTIC_VIEWPORT 0x00000fd4 +#define NV50_3D_SEMANTIC_VIEWPORT_VIEWPORT_ID__MASK 0x000000ff +#define NV50_3D_SEMANTIC_VIEWPORT_VIEWPORT_ID__SHIFT 0 + +#define NV50_3D_UNK0FD8 0x00000fd8 +#define NV50_3D_UNK0FD8_UNK0 0x00000001 +#define NV50_3D_UNK0FD8_UNK1 0x00000010 + +#define NV50_3D_UNK0FDC 0x00000fdc + +#define NV50_3D_ZETA_ADDRESS_HIGH 0x00000fe0 + +#define NV50_3D_ZETA_ADDRESS_LOW 0x00000fe4 + +#define NV50_3D_ZETA_FORMAT 0x00000fe8 + +#define NV50_3D_ZETA_TILE_MODE 0x00000fec + +#define NV50_3D_ZETA_LAYER_STRIDE 0x00000ff0 +#define NV50_3D_ZETA_LAYER_STRIDE__SHR 2 + +#define NV50_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4 +#define NV50_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000 +#define NV50_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16 +#define NV50_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff +#define NV50_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0 + +#define NV50_3D_SCREEN_SCISSOR_VERT 0x00000ff8 +#define NV50_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000 +#define NV50_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16 +#define NV50_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff +#define NV50_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0 + +#define NV50_3D_UNK0FFC 0x00000ffc + +#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001000 + 0x4*(i0)) +#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004 +#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000010 + +#define NV50_3D_UNK1040(i0) (0x00001040 + 0x4*(i0)) +#define NV50_3D_UNK1040__ESIZE 0x00000004 +#define NV50_3D_UNK1040__LEN 0x00000010 + +#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001080 + 0x8*(i0)) +#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008 +#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000010 + +#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001084 + 0x8*(i0)) +#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008 +#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000010 + +#define NV50_3D_UNK1100 0x00001100 + +#define NV84_3D_UNK1104 0x00001104 +#define NV84_3D_UNK1104_0__MASK 0x0000ffff +#define NV84_3D_UNK1104_0__SHIFT 0 +#define NV84_3D_UNK1104_0__MAX 0x00002000 +#define NV84_3D_UNK1104_0__ALIGN 0x00000040 +#define NV84_3D_UNK1104_1__MASK 0xffff0000 +#define NV84_3D_UNK1104_1__SHIFT 16 +#define NV84_3D_UNK1104_1__MAX 0x00002000 +#define NV84_3D_UNK1104_1__ALIGN 0x00000040 + +#define NV84_3D_UNK1108 0x00001108 +#define NV84_3D_UNK1108_0 0x00000001 +#define NV84_3D_UNK1108_1 0x00000010 + +#define NV84_3D_UNK110C 0x0000110c + +#define NV84_3D_UNK1110 0x00001110 + +#define NV84_3D_WRCACHE_FLUSH 0x00001114 + +#define NV84_3D_VERTEX_ID_BASE 0x00001118 + +#define NV84_3D_PRIMITIVE_ID 0x0000111c + +#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT(i0) (0x00001120 + 0x4*(i0)) +#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT__ESIZE 0x00000004 +#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT__LEN 0x00000004 + +#define NVA3_3D_VP_ATTR_EN_ALT(i0) (0x00001130 + 0x4*(i0)) +#define NVA3_3D_VP_ATTR_EN_ALT__ESIZE 0x00000004 +#define NVA3_3D_VP_ATTR_EN_ALT__LEN 0x00000004 +#define NVA3_3D_VP_ATTR_EN_ALT_7__MASK 0xf0000000 +#define NVA3_3D_VP_ATTR_EN_ALT_7__SHIFT 28 +#define NVA3_3D_VP_ATTR_EN_ALT_7_X 0x10000000 +#define NVA3_3D_VP_ATTR_EN_ALT_7_Y 0x20000000 +#define NVA3_3D_VP_ATTR_EN_ALT_7_Z 0x40000000 +#define NVA3_3D_VP_ATTR_EN_ALT_7_W 0x80000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6__MASK 0x0f000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6__SHIFT 24 +#define NVA3_3D_VP_ATTR_EN_ALT_6_X 0x01000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6_Y 0x02000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6_Z 0x04000000 +#define NVA3_3D_VP_ATTR_EN_ALT_6_W 0x08000000 +#define NVA3_3D_VP_ATTR_EN_ALT_5__MASK 0x00f00000 +#define NVA3_3D_VP_ATTR_EN_ALT_5__SHIFT 20 +#define NVA3_3D_VP_ATTR_EN_ALT_5_X 0x00100000 +#define NVA3_3D_VP_ATTR_EN_ALT_5_Y 0x00200000 +#define NVA3_3D_VP_ATTR_EN_ALT_5_Z 0x00400000 +#define NVA3_3D_VP_ATTR_EN_ALT_5_W 0x00800000 +#define NVA3_3D_VP_ATTR_EN_ALT_4__MASK 0x000f0000 +#define NVA3_3D_VP_ATTR_EN_ALT_4__SHIFT 16 +#define NVA3_3D_VP_ATTR_EN_ALT_4_X 0x00010000 +#define NVA3_3D_VP_ATTR_EN_ALT_4_Y 0x00020000 +#define NVA3_3D_VP_ATTR_EN_ALT_4_Z 0x00040000 +#define NVA3_3D_VP_ATTR_EN_ALT_4_W 0x00080000 +#define NVA3_3D_VP_ATTR_EN_ALT_3__MASK 0x0000f000 +#define NVA3_3D_VP_ATTR_EN_ALT_3__SHIFT 12 +#define NVA3_3D_VP_ATTR_EN_ALT_3_X 0x00001000 +#define NVA3_3D_VP_ATTR_EN_ALT_3_Y 0x00002000 +#define NVA3_3D_VP_ATTR_EN_ALT_3_Z 0x00004000 +#define NVA3_3D_VP_ATTR_EN_ALT_3_W 0x00008000 +#define NVA3_3D_VP_ATTR_EN_ALT_2__MASK 0x00000f00 +#define NVA3_3D_VP_ATTR_EN_ALT_2__SHIFT 8 +#define NVA3_3D_VP_ATTR_EN_ALT_2_X 0x00000100 +#define NVA3_3D_VP_ATTR_EN_ALT_2_Y 0x00000200 +#define NVA3_3D_VP_ATTR_EN_ALT_2_Z 0x00000400 +#define NVA3_3D_VP_ATTR_EN_ALT_2_W 0x00000800 +#define NVA3_3D_VP_ATTR_EN_ALT_1__MASK 0x000000f0 +#define NVA3_3D_VP_ATTR_EN_ALT_1__SHIFT 4 +#define NVA3_3D_VP_ATTR_EN_ALT_1_X 0x00000010 +#define NVA3_3D_VP_ATTR_EN_ALT_1_Y 0x00000020 +#define NVA3_3D_VP_ATTR_EN_ALT_1_Z 0x00000040 +#define NVA3_3D_VP_ATTR_EN_ALT_1_W 0x00000080 +#define NVA3_3D_VP_ATTR_EN_ALT_0__MASK 0x0000000f +#define NVA3_3D_VP_ATTR_EN_ALT_0__SHIFT 0 +#define NVA3_3D_VP_ATTR_EN_ALT_0_X 0x00000001 +#define NVA3_3D_VP_ATTR_EN_ALT_0_Y 0x00000002 +#define NVA3_3D_VP_ATTR_EN_ALT_0_Z 0x00000004 +#define NVA3_3D_VP_ATTR_EN_ALT_0_W 0x00000008 + +#define NVA3_3D_UNK1140 0x00001140 + +#define NVA0_3D_UNK1144 0x00001144 + +#define NVA0_3D_VTX_ATTR_DEFINE 0x0000114c +#define NVA0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff +#define NVA0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0 +#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700 +#define NVA0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8 +#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001 +#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000 +#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000 +#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000 + +#define NVA0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0)) +#define NVA0_3D_VTX_ATTR_DATA__ESIZE 0x00000004 +#define NVA0_3D_VTX_ATTR_DATA__LEN 0x00000004 + +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT(i0) (0x00001160 + 0x4*(i0)) +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT__ESIZE 0x00000004 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT__LEN 0x00000020 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BUFFER__MASK 0x0000001f +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BUFFER__SHIFT 0 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_CONST 0x00000040 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_OFFSET__MASK 0x001fff80 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_OFFSET__SHIFT 7 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT__MASK 0x07e00000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT__SHIFT 21 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32_32_32 0x00200000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32_32 0x00400000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16_16_16 0x00600000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32 0x00800000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16_16 0x00a00000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8_8_8 0x01400000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16 0x01e00000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32 0x02400000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8_8 0x02600000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8 0x03000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16 0x03600000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8 0x03a00000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_10_10_10_2 0x06000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__MASK 0x38000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__SHIFT 27 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SNORM 0x08000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_UNORM 0x10000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SINT 0x18000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_UINT 0x20000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_USCALED 0x28000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SSCALED 0x30000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_FLOAT 0x38000000 +#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BGRA 0x80000000 + +#define NV50_3D_RT_CONTROL 0x0000121c +#define NV50_3D_RT_CONTROL_COUNT__MASK 0x0000000f +#define NV50_3D_RT_CONTROL_COUNT__SHIFT 0 +#define NV50_3D_RT_CONTROL_MAP0__MASK 0x00000070 +#define NV50_3D_RT_CONTROL_MAP0__SHIFT 4 +#define NV50_3D_RT_CONTROL_MAP1__MASK 0x00000380 +#define NV50_3D_RT_CONTROL_MAP1__SHIFT 7 +#define NV50_3D_RT_CONTROL_MAP2__MASK 0x00001c00 +#define NV50_3D_RT_CONTROL_MAP2__SHIFT 10 +#define NV50_3D_RT_CONTROL_MAP3__MASK 0x0000e000 +#define NV50_3D_RT_CONTROL_MAP3__SHIFT 13 +#define NV50_3D_RT_CONTROL_MAP4__MASK 0x00070000 +#define NV50_3D_RT_CONTROL_MAP4__SHIFT 16 +#define NV50_3D_RT_CONTROL_MAP5__MASK 0x00380000 +#define NV50_3D_RT_CONTROL_MAP5__SHIFT 19 +#define NV50_3D_RT_CONTROL_MAP6__MASK 0x01c00000 +#define NV50_3D_RT_CONTROL_MAP6__SHIFT 22 +#define NV50_3D_RT_CONTROL_MAP7__MASK 0x0e000000 +#define NV50_3D_RT_CONTROL_MAP7__SHIFT 25 + +#define NV50_3D_UNK1220 0x00001220 + +#define NV50_3D_RT_ARRAY_MODE 0x00001224 +#define NV50_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NV50_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0 +#define NV50_3D_RT_ARRAY_MODE_MODE__MASK 0x00010000 +#define NV50_3D_RT_ARRAY_MODE_MODE__SHIFT 16 +#define NV50_3D_RT_ARRAY_MODE_MODE_2D_ARRAY 0x00000000 +#define NV50_3D_RT_ARRAY_MODE_MODE_3D 0x00010000 + +#define NV50_3D_ZETA_HORIZ 0x00001228 + +#define NV50_3D_ZETA_VERT 0x0000122c + +#define NV50_3D_ZETA_ARRAY_MODE 0x00001230 +#define NV50_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NV50_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0 +#define NV50_3D_ZETA_ARRAY_MODE_UNK 0x00010000 + +#define NV50_3D_LINKED_TSC 0x00001234 + +#define NV50_3D_UNK1238 0x00001238 + +#define NVA0_3D_DRAW_TFB_BYTES 0x0000123c + +#define NV50_3D_RT_HORIZ(i0) (0x00001240 + 0x8*(i0)) +#define NV50_3D_RT_HORIZ__ESIZE 0x00000008 +#define NV50_3D_RT_HORIZ__LEN 0x00000008 +#define NV50_3D_RT_HORIZ_WIDTH__MASK 0x0fffffff +#define NV50_3D_RT_HORIZ_WIDTH__SHIFT 0 +#define NV50_3D_RT_HORIZ_LINEAR 0x80000000 + +#define NV50_3D_RT_VERT(i0) (0x00001244 + 0x8*(i0)) +#define NV50_3D_RT_VERT__ESIZE 0x00000008 +#define NV50_3D_RT_VERT__LEN 0x00000008 + +#define NV50_3D_CB_DEF_ADDRESS_HIGH 0x00001280 + +#define NV50_3D_CB_DEF_ADDRESS_LOW 0x00001284 + +#define NV50_3D_CB_DEF_SET 0x00001288 +#define NV50_3D_CB_DEF_SET_SIZE__MASK 0x0000ffff +#define NV50_3D_CB_DEF_SET_SIZE__SHIFT 0 +#define NV50_3D_CB_DEF_SET_BUFFER__MASK 0x007f0000 +#define NV50_3D_CB_DEF_SET_BUFFER__SHIFT 16 + +#define NV50_3D_UNK128C 0x0000128c +#define NV50_3D_UNK128C_0__MASK 0x00000003 +#define NV50_3D_UNK128C_0__SHIFT 0 +#define NV50_3D_UNK128C_1__MASK 0x00000030 +#define NV50_3D_UNK128C_1__SHIFT 4 +#define NV50_3D_UNK128C_2__MASK 0x00000300 +#define NV50_3D_UNK128C_2__SHIFT 8 +#define NV50_3D_UNK128C_3__MASK 0x00003000 +#define NV50_3D_UNK128C_3__SHIFT 12 + +#define NV50_3D_CALL_LIMIT_LOG 0x00001290 +#define NV50_3D_CALL_LIMIT_LOG_VP__MASK 0x0000000f +#define NV50_3D_CALL_LIMIT_LOG_VP__SHIFT 0 +#define NV50_3D_CALL_LIMIT_LOG_GP__MASK 0x000000f0 +#define NV50_3D_CALL_LIMIT_LOG_GP__SHIFT 4 +#define NV50_3D_CALL_LIMIT_LOG_FP__MASK 0x00000f00 +#define NV50_3D_CALL_LIMIT_LOG_FP__SHIFT 8 + +#define NV50_3D_STRMOUT_BUFFERS_CTRL 0x00001294 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED 0x00000001 +#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE__MASK 0x00000002 +#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE__SHIFT 1 +#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_PRIMITIVES 0x00000000 +#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET 0x00000002 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__MASK 0x000000f0 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT 4 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MASK 0x000fff00 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT 8 +#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX 0x00000800 + +#define NV50_3D_FP_RESULT_COUNT 0x00001298 + +#define NV50_3D_VTX_UNK129C 0x0000129c + +#define NV50_3D_UNK12A0 0x000012a0 + +#define NV50_3D_UNK12A8 0x000012a8 +#define NV50_3D_UNK12A8_UNK1 0x00000001 +#define NV50_3D_UNK12A8_UNK2__MASK 0x000ffff0 +#define NV50_3D_UNK12A8_UNK2__SHIFT 4 + +#define NV50_3D_UNK12AC 0x000012ac + +#define NV50_3D_UNK12B0 0x000012b0 +#define NV50_3D_UNK12B0_UNK0__MASK 0x000000ff +#define NV50_3D_UNK12B0_UNK0__SHIFT 0 +#define NV50_3D_UNK12B0_UNK1__MASK 0x0000ff00 +#define NV50_3D_UNK12B0_UNK1__SHIFT 8 +#define NV50_3D_UNK12B0_UNK2__MASK 0x00ff0000 +#define NV50_3D_UNK12B0_UNK2__SHIFT 16 +#define NV50_3D_UNK12B0_UNK3__MASK 0xff000000 +#define NV50_3D_UNK12B0_UNK3__SHIFT 24 +#define NV50_3D_UNK12B0_UNK3__MAX 0x00000080 + +#define NV50_3D_UNK12B4 0x000012b4 + +#define NV50_3D_UNK12B8 0x000012b8 + +#define NV50_3D_DEPTH_TEST_ENABLE 0x000012cc + +#define NV50_3D_D3D_FILL_MODE 0x000012d0 +#define NV50_3D_D3D_FILL_MODE_POINT 0x00000001 +#define NV50_3D_D3D_FILL_MODE_WIREFRAME 0x00000002 +#define NV50_3D_D3D_FILL_MODE_SOLID 0x00000003 + +#define NV50_3D_SHADE_MODEL 0x000012d4 +#define NV50_3D_SHADE_MODEL_FLAT 0x00001d00 +#define NV50_3D_SHADE_MODEL_SMOOTH 0x00001d01 + +#define NV50_3D_LOCAL_ADDRESS_HIGH 0x000012d8 + +#define NV50_3D_LOCAL_ADDRESS_LOW 0x000012dc + +#define NV50_3D_LOCAL_SIZE_LOG 0x000012e0 + +#define NV50_3D_BLEND_INDEPENDENT 0x000012e4 + +#define NV50_3D_DEPTH_WRITE_ENABLE 0x000012e8 + +#define NV50_3D_ALPHA_TEST_ENABLE 0x000012ec + +#define NV50_3D_PM_SET(i0) (0x000012f0 + 0x4*(i0)) +#define NV50_3D_PM_SET__ESIZE 0x00000004 +#define NV50_3D_PM_SET__LEN 0x00000004 + +#define NV50_3D_VB_ELEMENT_U8_SETUP 0x00001300 +#define NV50_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000 +#define NV50_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30 +#define NV50_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff +#define NV50_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0 + +#define NV50_3D_VB_ELEMENT_U8 0x00001304 +#define NV50_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff +#define NV50_3D_VB_ELEMENT_U8_I0__SHIFT 0 +#define NV50_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00 +#define NV50_3D_VB_ELEMENT_U8_I1__SHIFT 8 +#define NV50_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000 +#define NV50_3D_VB_ELEMENT_U8_I2__SHIFT 16 +#define NV50_3D_VB_ELEMENT_U8_I3__MASK 0xff000000 +#define NV50_3D_VB_ELEMENT_U8_I3__SHIFT 24 + +#define NV50_3D_D3D_CULL_MODE 0x00001308 +#define NV50_3D_D3D_CULL_MODE_NONE 0x00000001 +#define NV50_3D_D3D_CULL_MODE_FRONT 0x00000002 +#define NV50_3D_D3D_CULL_MODE_BACK 0x00000003 + +#define NV50_3D_DEPTH_TEST_FUNC 0x0000130c +#define NV50_3D_DEPTH_TEST_FUNC_NEVER 0x00000200 +#define NV50_3D_DEPTH_TEST_FUNC_LESS 0x00000201 +#define NV50_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202 +#define NV50_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203 +#define NV50_3D_DEPTH_TEST_FUNC_GREATER 0x00000204 +#define NV50_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205 +#define NV50_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206 +#define NV50_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207 + +#define NV50_3D_ALPHA_TEST_REF 0x00001310 + +#define NV50_3D_ALPHA_TEST_FUNC 0x00001314 +#define NV50_3D_ALPHA_TEST_FUNC_NEVER 0x00000200 +#define NV50_3D_ALPHA_TEST_FUNC_LESS 0x00000201 +#define NV50_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202 +#define NV50_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203 +#define NV50_3D_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NV50_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 +#define NV50_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206 +#define NV50_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207 + +#define NVA0_3D_DRAW_TFB_STRIDE 0x00001318 +#define NVA0_3D_DRAW_TFB_STRIDE__MIN 0x00000001 +#define NVA0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff + +#define NV50_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0)) +#define NV50_3D_BLEND_COLOR__ESIZE 0x00000004 +#define NV50_3D_BLEND_COLOR__LEN 0x00000004 + +#define NV50_3D_UNK132C 0x0000132c + +#define NV50_3D_TSC_FLUSH 0x00001330 +#define NV50_3D_TSC_FLUSH_SPECIFIC 0x00000001 +#define NV50_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_3D_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_3D_TIC_FLUSH 0x00001334 +#define NV50_3D_TIC_FLUSH_SPECIFIC 0x00000001 +#define NV50_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_3D_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_3D_TEX_CACHE_CTL 0x00001338 +#define NV50_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030 +#define NV50_3D_TEX_CACHE_CTL_UNK1__SHIFT 4 + +#define NV50_3D_BLEND_SEPARATE_ALPHA 0x0000133c + +#define NV50_3D_BLEND_EQUATION_RGB 0x00001340 +#define NV50_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NV50_3D_BLEND_EQUATION_RGB_MIN 0x00008007 +#define NV50_3D_BLEND_EQUATION_RGB_MAX 0x00008008 +#define NV50_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NV50_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NV50_3D_BLEND_FUNC_SRC_RGB 0x00001344 + +#define NV50_3D_BLEND_FUNC_DST_RGB 0x00001348 + +#define NV50_3D_BLEND_EQUATION_ALPHA 0x0000134c +#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NV50_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NV50_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NV50_3D_BLEND_FUNC_SRC_ALPHA 0x00001350 + +#define NV50_3D_UNK1354 0x00001354 + +#define NV50_3D_BLEND_FUNC_DST_ALPHA 0x00001358 + +#define NV50_3D_BLEND_ENABLE_COMMON 0x0000135c + +#define NV50_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0)) +#define NV50_3D_BLEND_ENABLE__ESIZE 0x00000004 +#define NV50_3D_BLEND_ENABLE__LEN 0x00000008 + +#define NV50_3D_STENCIL_ENABLE 0x00001380 + +#define NV50_3D_STENCIL_FRONT_OP_FAIL 0x00001384 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a +#define NV50_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 +#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 + +#define NV50_3D_STENCIL_FRONT_FUNC_REF 0x00001394 + +#define NV50_3D_STENCIL_FRONT_MASK 0x00001398 + +#define NV50_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c + +#define NV50_3D_UNK13A0 0x000013a0 + +#define NVA0_3D_DRAW_TFB_BASE 0x000013a4 + +#define NV50_3D_FRAG_COLOR_CLAMP_EN 0x000013a8 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000 +#define NV50_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000 + +#define NV50_3D_SCREEN_Y_CONTROL 0x000013ac +#define NV50_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001 +#define NV50_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010 + +#define NV50_3D_LINE_WIDTH 0x000013b0 + +#define NV50_3D_TEX_LIMITS(i0) (0x000013b4 + 0x4*(i0)) +#define NV50_3D_TEX_LIMITS__ESIZE 0x00000004 +#define NV50_3D_TEX_LIMITS__LEN 0x00000003 +#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f +#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0 +#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000 +#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004 +#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0 +#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4 +#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000 +#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007 + +#define NV50_3D_POINT_COORD_REPLACE_MAP(i0) (0x000013c0 + 0x4*(i0)) +#define NV50_3D_POINT_COORD_REPLACE_MAP__ESIZE 0x00000004 +#define NV50_3D_POINT_COORD_REPLACE_MAP__LEN 0x00000010 + +#define NV50_3D_UNK1400_LANES 0x00001400 + +#define NV50_3D_UNK1404 0x00001404 + +#define NV50_3D_UNK1408 0x00001408 + +#define NV50_3D_VP_START_ID 0x0000140c + +#define NV50_3D_GP_START_ID 0x00001410 + +#define NV50_3D_FP_START_ID 0x00001414 + +#define NVA3_3D_UNK1418 0x00001418 + +#define NV50_3D_UNK141C 0x0000141c + +#define NV50_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420 +#define NV50_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001 +#define NV50_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400 + +#define NV50_3D_VERTEX_ARRAY_FLUSH 0x0000142c + +#define NV50_3D_UNK1430 0x00001430 +#define NV50_3D_UNK1430_UNK0 0x00000010 +#define NV50_3D_UNK1430_UNK1 0x00000100 + +#define NV50_3D_VB_ELEMENT_BASE 0x00001434 + +#define NV50_3D_VB_INSTANCE_BASE 0x00001438 + +#define NV50_3D_CLEAR_FLAGS 0x0000143c +#define NV50_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001 +#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT__MASK 0x00000010 +#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT__SHIFT 4 +#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT_SCISSOR 0x00000000 +#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT 0x00000010 + +#define NV50_3D_CODE_CB_FLUSH 0x00001440 + +#define NV50_3D_BIND_TSC(i0) (0x00001444 + 0x8*(i0)) +#define NV50_3D_BIND_TSC__ESIZE 0x00000008 +#define NV50_3D_BIND_TSC__LEN 0x00000003 +#define NV50_3D_BIND_TSC_VALID 0x00000001 +#define NV50_3D_BIND_TSC_SAMPLER__MASK 0x000000f0 +#define NV50_3D_BIND_TSC_SAMPLER__SHIFT 4 +#define NV50_3D_BIND_TSC_TSC__MASK 0x001ff000 +#define NV50_3D_BIND_TSC_TSC__SHIFT 12 + +#define NV50_3D_BIND_TIC(i0) (0x00001448 + 0x8*(i0)) +#define NV50_3D_BIND_TIC__ESIZE 0x00000008 +#define NV50_3D_BIND_TIC__LEN 0x00000003 +#define NV50_3D_BIND_TIC_VALID 0x00000001 +#define NV50_3D_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NV50_3D_BIND_TIC_TEXTURE__SHIFT 1 +#define NV50_3D_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NV50_3D_BIND_TIC_TIC__SHIFT 9 + +#define NV50_3D_BIND_TSC2(i0) (0x00001468 + 0x8*(i0)) +#define NV50_3D_BIND_TSC2__ESIZE 0x00000008 +#define NV50_3D_BIND_TSC2__LEN 0x00000003 +#define NV50_3D_BIND_TSC2_VALID 0x00000001 +#define NV50_3D_BIND_TSC2_SAMPLER__MASK 0x00000010 +#define NV50_3D_BIND_TSC2_SAMPLER__SHIFT 4 +#define NV50_3D_BIND_TSC2_TSC__MASK 0x001ff000 +#define NV50_3D_BIND_TSC2_TSC__SHIFT 12 + +#define NV50_3D_BIND_TIC2(i0) (0x0000146c + 0x8*(i0)) +#define NV50_3D_BIND_TIC2__ESIZE 0x00000008 +#define NV50_3D_BIND_TIC2__LEN 0x00000003 +#define NV50_3D_BIND_TIC2_VALID 0x00000001 +#define NV50_3D_BIND_TIC2_TEXTURE__MASK 0x00000002 +#define NV50_3D_BIND_TIC2_TEXTURE__SHIFT 1 +#define NV50_3D_BIND_TIC2_TIC__MASK 0x7ffffe00 +#define NV50_3D_BIND_TIC2_TIC__SHIFT 9 + +#define NV50_3D_STRMOUT_MAP(i0) (0x00001480 + 0x4*(i0)) +#define NV50_3D_STRMOUT_MAP__ESIZE 0x00000004 +#define NV50_3D_STRMOUT_MAP__LEN 0x00000020 + +#define NV50_3D_CLIPID_HEIGHT 0x00001504 +#define NV50_3D_CLIPID_HEIGHT__MAX 0x00002000 + +#define NV50_3D_CLIPID_FILL_RECT_HORIZ 0x00001508 +#define NV50_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff +#define NV50_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0 +#define NV50_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000 +#define NV50_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16 + +#define NV50_3D_CLIPID_FILL_RECT_VERT 0x0000150c +#define NV50_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff +#define NV50_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0 +#define NV50_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000 +#define NV50_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16 + +#define NV50_3D_CLIP_DISTANCE_ENABLE 0x00001510 +#define NV50_3D_CLIP_DISTANCE_ENABLE_0 0x00000001 +#define NV50_3D_CLIP_DISTANCE_ENABLE_1 0x00000002 +#define NV50_3D_CLIP_DISTANCE_ENABLE_2 0x00000004 +#define NV50_3D_CLIP_DISTANCE_ENABLE_3 0x00000008 +#define NV50_3D_CLIP_DISTANCE_ENABLE_4 0x00000010 +#define NV50_3D_CLIP_DISTANCE_ENABLE_5 0x00000020 +#define NV50_3D_CLIP_DISTANCE_ENABLE_6 0x00000040 +#define NV50_3D_CLIP_DISTANCE_ENABLE_7 0x00000080 + +#define NV50_3D_SAMPLECNT_ENABLE 0x00001514 + +#define NV50_3D_POINT_SIZE 0x00001518 + +#define NV50_3D_ZCULL_STATCTRS_ENABLE 0x0000151c + +#define NV50_3D_POINT_SPRITE_ENABLE 0x00001520 + +#define NVA0_3D_UNK152C 0x0000152c +#define NVA0_3D_UNK152C_UNK0 0x00000001 +#define NVA0_3D_UNK152C_UNK1 0x00000010 +#define NVA0_3D_UNK152C_UNK2 0x00000100 +#define NVA0_3D_UNK152C_UNK3__MASK 0x000ff000 +#define NVA0_3D_UNK152C_UNK3__SHIFT 12 +#define NVA0_3D_UNK152C_UNK3__MAX 0x00000028 + +#define NV50_3D_COUNTER_RESET 0x00001530 +#define NV50_3D_COUNTER_RESET_SAMPLECNT 0x00000001 +#define NV50_3D_COUNTER_RESET_ZCULL_STATS 0x00000002 +#define NVA0_3D_COUNTER_RESET_STRMOUT_VERTICES 0x00000008 +#define NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK 0x00000010 +#define NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x00000011 +#define NV50_3D_COUNTER_RESET_VFETCH_VERTICES 0x00000012 +#define NV50_3D_COUNTER_RESET_VFETCH_PRIMITIVES 0x00000013 +#define NV50_3D_COUNTER_RESET_VP_LAUNCHES 0x00000015 +#define NV50_3D_COUNTER_RESET_GP_LAUNCHES 0x0000001a +#define NV50_3D_COUNTER_RESET_GP_PRIMITIVES_OUT 0x0000001b +#define NV50_3D_COUNTER_RESET_RAST_PRIMITIVES_PRECLIP 0x0000001c +#define NV50_3D_COUNTER_RESET_RAST_PRIMITIVES_POSTCLIP 0x0000001d +#define NV50_3D_COUNTER_RESET_FP_PIXELS 0x0000001e + +#define NV50_3D_MULTISAMPLE_ENABLE 0x00001534 + +#define NV50_3D_ZETA_ENABLE 0x00001538 + +#define NV50_3D_MULTISAMPLE_CTRL 0x0000153c +#define NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001 +#define NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010 + +#define NV50_3D_NOPERSPECTIVE_BITMAP(i0) (0x00001540 + 0x4*(i0)) +#define NV50_3D_NOPERSPECTIVE_BITMAP__ESIZE 0x00000004 +#define NV50_3D_NOPERSPECTIVE_BITMAP__LEN 0x00000004 + +#define NV50_3D_COND_ADDRESS_HIGH 0x00001550 + +#define NV50_3D_COND_ADDRESS_LOW 0x00001554 + +#define NV50_3D_COND_MODE 0x00001558 +#define NV50_3D_COND_MODE_NEVER 0x00000000 +#define NV50_3D_COND_MODE_ALWAYS 0x00000001 +#define NV50_3D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NV50_3D_COND_MODE_EQUAL 0x00000003 +#define NV50_3D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NV50_3D_TSC_ADDRESS_HIGH 0x0000155c + +#define NV50_3D_TSC_ADDRESS_LOW 0x00001560 +#define NV50_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020 + +#define NV50_3D_TSC_LIMIT 0x00001564 +#define NV50_3D_TSC_LIMIT__MAX 0x00001fff + +#define NV50_3D_UNK1568 0x00001568 + +#define NV50_3D_POLYGON_OFFSET_FACTOR 0x0000156c + +#define NV50_3D_LINE_SMOOTH_ENABLE 0x00001570 + +#define NV50_3D_TIC_ADDRESS_HIGH 0x00001574 + +#define NV50_3D_TIC_ADDRESS_LOW 0x00001578 + +#define NV50_3D_TIC_LIMIT 0x0000157c + +#define NV50_3D_PM_CONTROL(i0) (0x00001580 + 0x4*(i0)) +#define NV50_3D_PM_CONTROL__ESIZE 0x00000004 +#define NV50_3D_PM_CONTROL__LEN 0x00000004 +#define NV50_3D_PM_CONTROL_UNK0 0x00000001 +#define NV50_3D_PM_CONTROL_UNK1__MASK 0x00000070 +#define NV50_3D_PM_CONTROL_UNK1__SHIFT 4 +#define NV50_3D_PM_CONTROL_UNK2__MASK 0x00ffff00 +#define NV50_3D_PM_CONTROL_UNK2__SHIFT 8 +#define NV50_3D_PM_CONTROL_UNK3__MASK 0xff000000 +#define NV50_3D_PM_CONTROL_UNK3__SHIFT 24 + +#define NV50_3D_ZCULL_REGION 0x00001590 + +#define NV50_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594 + +#define NV50_3D_STENCIL_BACK_OP_FAIL 0x00001598 +#define NV50_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 +#define NV50_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a +#define NV50_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 +#define NV50_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 +#define NV50_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 +#define NV50_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_BACK_OP_ZPASS 0x000015a0 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a +#define NV50_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV50_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NV50_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 +#define NV50_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 + +#define NV50_3D_UNK15A8 0x000015a8 +#define NV50_3D_UNK15A8_UNK1__MASK 0x00000007 +#define NV50_3D_UNK15A8_UNK1__SHIFT 0 +#define NV50_3D_UNK15A8_UNK2__MASK 0x00000070 +#define NV50_3D_UNK15A8_UNK2__SHIFT 4 + +#define NV50_3D_UNK15AC 0x000015ac + +#define NV50_3D_UNK15B0 0x000015b0 +#define NV50_3D_UNK15B0_0 0x00000001 +#define NV50_3D_UNK15B0_1 0x00000010 +#define NV50_3D_UNK15B0_2 0x00000100 + +#define NV50_3D_CSAA_ENABLE 0x000015b4 + +#define NV50_3D_FRAMEBUFFER_SRGB 0x000015b8 + +#define NV50_3D_POLYGON_OFFSET_UNITS 0x000015bc + +#define NVA3_3D_UNK15C4 0x000015c4 + +#define NVA3_3D_UNK15C8 0x000015c8 + +#define NV50_3D_LAYER 0x000015cc +#define NV50_3D_LAYER_IDX__MASK 0x0000ffff +#define NV50_3D_LAYER_IDX__SHIFT 0 +#define NV50_3D_LAYER_USE_GP 0x00010000 + +#define NV50_3D_MULTISAMPLE_MODE 0x000015d0 +#define NV50_3D_MULTISAMPLE_MODE_MS1 0x00000000 +#define NV50_3D_MULTISAMPLE_MODE_MS2 0x00000001 +#define NV50_3D_MULTISAMPLE_MODE_MS4 0x00000002 +#define NV50_3D_MULTISAMPLE_MODE_MS8 0x00000003 +#define NV50_3D_MULTISAMPLE_MODE_MS8_ALT 0x00000004 +#define NV50_3D_MULTISAMPLE_MODE_MS2_ALT 0x00000005 +#define NV50_3D_MULTISAMPLE_MODE_UNK6 0x00000006 +#define NV50_3D_MULTISAMPLE_MODE_MS4_CS4 0x00000008 +#define NV50_3D_MULTISAMPLE_MODE_MS4_CS12 0x00000009 +#define NV50_3D_MULTISAMPLE_MODE_MS8_CS8 0x0000000a +#define NV50_3D_MULTISAMPLE_MODE_MS8_CS24 0x0000000b + +#define NV50_3D_VERTEX_BEGIN_D3D 0x000015d4 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NV50_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000 +#define NV84_3D_VERTEX_BEGIN_D3D_PRIMITIVE_ID_CONT 0x20000000 +#define NVA0_3D_VERTEX_BEGIN_D3D_INSTANCE_CONT 0x40000000 + +#define NV50_3D_VERTEX_END_D3D 0x000015d8 +#define NV50_3D_VERTEX_END_D3D_UNK0 0x00000001 +#define NVA0_3D_VERTEX_END_D3D_UNK1 0x00000002 + +#define NV50_3D_VERTEX_BEGIN_GL 0x000015dc +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009 +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x10000000 +#define NV84_3D_VERTEX_BEGIN_GL_PRIMITIVE_ID_CONT 0x20000000 +#define NVA0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x40000000 + +#define NV50_3D_VERTEX_END_GL 0x000015e0 +#define NV50_3D_VERTEX_END_GL_UNK0 0x00000001 +#define NVA0_3D_VERTEX_END_GL_UNK1 0x00000002 + +#define NV50_3D_EDGEFLAG 0x000015e4 + +#define NV50_3D_VB_ELEMENT_U32 0x000015e8 + +#define NV50_3D_VB_ELEMENT_U16_SETUP 0x000015ec +#define NV50_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000 +#define NV50_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30 +#define NV50_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff +#define NV50_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0 + +#define NV50_3D_VB_ELEMENT_U16 0x000015f0 +#define NV50_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff +#define NV50_3D_VB_ELEMENT_U16_I0__SHIFT 0 +#define NV50_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000 +#define NV50_3D_VB_ELEMENT_U16_I1__SHIFT 16 + +#define NV50_3D_VERTEX_BASE_HIGH 0x000015f4 + +#define NV50_3D_VERTEX_BASE_LOW 0x000015f8 + +#define NV50_3D_VERTEX_DATA 0x00001640 + +#define NV50_3D_PRIM_RESTART_ENABLE 0x00001644 + +#define NV50_3D_PRIM_RESTART_INDEX 0x00001648 + +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001 +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010 +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100 +#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000 + +#define NV50_3D_VP_ATTR_EN(i0) (0x00001650 + 0x4*(i0)) +#define NV50_3D_VP_ATTR_EN__ESIZE 0x00000004 +#define NV50_3D_VP_ATTR_EN__LEN 0x00000002 +#define NV50_3D_VP_ATTR_EN_7__MASK 0xf0000000 +#define NV50_3D_VP_ATTR_EN_7__SHIFT 28 +#define NV50_3D_VP_ATTR_EN_7_X 0x10000000 +#define NV50_3D_VP_ATTR_EN_7_Y 0x20000000 +#define NV50_3D_VP_ATTR_EN_7_Z 0x40000000 +#define NV50_3D_VP_ATTR_EN_7_W 0x80000000 +#define NV50_3D_VP_ATTR_EN_6__MASK 0x0f000000 +#define NV50_3D_VP_ATTR_EN_6__SHIFT 24 +#define NV50_3D_VP_ATTR_EN_6_X 0x01000000 +#define NV50_3D_VP_ATTR_EN_6_Y 0x02000000 +#define NV50_3D_VP_ATTR_EN_6_Z 0x04000000 +#define NV50_3D_VP_ATTR_EN_6_W 0x08000000 +#define NV50_3D_VP_ATTR_EN_5__MASK 0x00f00000 +#define NV50_3D_VP_ATTR_EN_5__SHIFT 20 +#define NV50_3D_VP_ATTR_EN_5_X 0x00100000 +#define NV50_3D_VP_ATTR_EN_5_Y 0x00200000 +#define NV50_3D_VP_ATTR_EN_5_Z 0x00400000 +#define NV50_3D_VP_ATTR_EN_5_W 0x00800000 +#define NV50_3D_VP_ATTR_EN_4__MASK 0x000f0000 +#define NV50_3D_VP_ATTR_EN_4__SHIFT 16 +#define NV50_3D_VP_ATTR_EN_4_X 0x00010000 +#define NV50_3D_VP_ATTR_EN_4_Y 0x00020000 +#define NV50_3D_VP_ATTR_EN_4_Z 0x00040000 +#define NV50_3D_VP_ATTR_EN_4_W 0x00080000 +#define NV50_3D_VP_ATTR_EN_3__MASK 0x0000f000 +#define NV50_3D_VP_ATTR_EN_3__SHIFT 12 +#define NV50_3D_VP_ATTR_EN_3_X 0x00001000 +#define NV50_3D_VP_ATTR_EN_3_Y 0x00002000 +#define NV50_3D_VP_ATTR_EN_3_Z 0x00004000 +#define NV50_3D_VP_ATTR_EN_3_W 0x00008000 +#define NV50_3D_VP_ATTR_EN_2__MASK 0x00000f00 +#define NV50_3D_VP_ATTR_EN_2__SHIFT 8 +#define NV50_3D_VP_ATTR_EN_2_X 0x00000100 +#define NV50_3D_VP_ATTR_EN_2_Y 0x00000200 +#define NV50_3D_VP_ATTR_EN_2_Z 0x00000400 +#define NV50_3D_VP_ATTR_EN_2_W 0x00000800 +#define NV50_3D_VP_ATTR_EN_1__MASK 0x000000f0 +#define NV50_3D_VP_ATTR_EN_1__SHIFT 4 +#define NV50_3D_VP_ATTR_EN_1_X 0x00000010 +#define NV50_3D_VP_ATTR_EN_1_Y 0x00000020 +#define NV50_3D_VP_ATTR_EN_1_Z 0x00000040 +#define NV50_3D_VP_ATTR_EN_1_W 0x00000080 +#define NV50_3D_VP_ATTR_EN_0__MASK 0x0000000f +#define NV50_3D_VP_ATTR_EN_0__SHIFT 0 +#define NV50_3D_VP_ATTR_EN_0_X 0x00000001 +#define NV50_3D_VP_ATTR_EN_0_Y 0x00000002 +#define NV50_3D_VP_ATTR_EN_0_Z 0x00000004 +#define NV50_3D_VP_ATTR_EN_0_W 0x00000008 + +#define NV50_3D_POINT_SMOOTH_ENABLE 0x00001658 + +#define NV50_3D_POINT_RASTER_RULES 0x0000165c +#define NV50_3D_POINT_RASTER_RULES_OGL 0x00000000 +#define NV50_3D_POINT_RASTER_RULES_D3D 0x00000001 + +#define NV50_3D_POINT_SPRITE_CTRL 0x00001660 +#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN__MASK 0x00000010 +#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN__SHIFT 4 +#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN_LOWER_LEFT 0x00000000 +#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN_UPPER_LEFT 0x00000010 + +#define NVA0_3D_TEX_MISC 0x00001664 +#define NVA0_3D_TEX_MISC_UNK1 0x00000002 +#define NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004 + +#define NV50_3D_LINE_SMOOTH_BLUR 0x00001668 +#define NV50_3D_LINE_SMOOTH_BLUR_LOW 0x00000000 +#define NV50_3D_LINE_SMOOTH_BLUR_MEDIUM 0x00000001 +#define NV50_3D_LINE_SMOOTH_BLUR_HIGH 0x00000002 + +#define NV50_3D_LINE_STIPPLE_ENABLE 0x0000166c + +#define NV50_3D_COVERAGE_LUT(i0) (0x00001670 + 0x4*(i0)) +#define NV50_3D_COVERAGE_LUT__ESIZE 0x00000004 +#define NV50_3D_COVERAGE_LUT__LEN 0x00000004 +#define NV50_3D_COVERAGE_LUT_0__MASK 0x000000ff +#define NV50_3D_COVERAGE_LUT_0__SHIFT 0 +#define NV50_3D_COVERAGE_LUT_1__MASK 0x0000ff00 +#define NV50_3D_COVERAGE_LUT_1__SHIFT 8 +#define NV50_3D_COVERAGE_LUT_2__MASK 0x00ff0000 +#define NV50_3D_COVERAGE_LUT_2__SHIFT 16 +#define NV50_3D_COVERAGE_LUT_3__MASK 0xff000000 +#define NV50_3D_COVERAGE_LUT_3__SHIFT 24 + +#define NV50_3D_LINE_STIPPLE 0x00001680 +#define NV50_3D_LINE_STIPPLE_FACTOR_M1__MASK 0x000000ff +#define NV50_3D_LINE_STIPPLE_FACTOR_M1__SHIFT 0 +#define NV50_3D_LINE_STIPPLE_PATTERN__MASK 0x00ffff00 +#define NV50_3D_LINE_STIPPLE_PATTERN__SHIFT 8 + +#define NV50_3D_PROVOKING_VERTEX_LAST 0x00001684 + +#define NV50_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688 + +#define NV50_3D_POLYGON_STIPPLE_ENABLE 0x0000168c + +#define NV50_3D_UNK1690 0x00001690 +#define NV50_3D_UNK1690_ALWAYS_DERIV 0x00000001 +#define NV50_3D_UNK1690_UNK16 0x00010000 + +#define NV50_3D_SET_PROGRAM_CB 0x00001694 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM__MASK 0x000000f0 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM__SHIFT 4 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX 0x00000000 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY 0x00000020 +#define NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT 0x00000030 +#define NV50_3D_SET_PROGRAM_CB_INDEX__MASK 0x00000f00 +#define NV50_3D_SET_PROGRAM_CB_INDEX__SHIFT 8 +#define NV50_3D_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000 +#define NV50_3D_SET_PROGRAM_CB_BUFFER__SHIFT 12 +#define NV50_3D_SET_PROGRAM_CB_VALID 0x00000001 + +#define NV50_3D_UNK1698 0x00001698 +#define NV50_3D_UNK1698_0 0x00000001 +#define NV50_3D_UNK1698_1 0x00000010 +#define NV50_3D_UNK1698_2 0x00000100 + +#define NVA3_3D_SAMPLE_SHADING 0x0000169c +#define NVA3_3D_SAMPLE_SHADING_MIN_SAMPLES__MASK 0x0000000f +#define NVA3_3D_SAMPLE_SHADING_MIN_SAMPLES__SHIFT 0 +#define NVA3_3D_SAMPLE_SHADING_ENABLE 0x00000010 + +#define NVA3_3D_UNK16A0 0x000016a0 + +#define NV50_3D_VP_RESULT_MAP_SIZE 0x000016ac + +#define NV50_3D_VP_REG_ALLOC_TEMP 0x000016b0 + +#define NVA0_3D_UNK16B4 0x000016b4 +#define NVA0_3D_UNK16B4_UNK0 0x00000001 +#define NVA3_3D_UNK16B4_UNK1 0x00000002 + +#define NV50_3D_VP_REG_ALLOC_RESULT 0x000016b8 + +#define NV50_3D_VP_RESULT_MAP(i0) (0x000016bc + 0x4*(i0)) +#define NV50_3D_VP_RESULT_MAP__ESIZE 0x00000004 +#define NV50_3D_VP_RESULT_MAP__LEN 0x00000011 +#define NV50_3D_VP_RESULT_MAP_0__MASK 0x000000ff +#define NV50_3D_VP_RESULT_MAP_0__SHIFT 0 +#define NV50_3D_VP_RESULT_MAP_1__MASK 0x0000ff00 +#define NV50_3D_VP_RESULT_MAP_1__SHIFT 8 +#define NV50_3D_VP_RESULT_MAP_2__MASK 0x00ff0000 +#define NV50_3D_VP_RESULT_MAP_2__SHIFT 16 +#define NV50_3D_VP_RESULT_MAP_3__MASK 0xff000000 +#define NV50_3D_VP_RESULT_MAP_3__SHIFT 24 + +#define NV50_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0)) +#define NV50_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004 +#define NV50_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020 + +#define NVA0_3D_STRMOUT_OFFSET(i0) (0x00001780 + 0x4*(i0)) +#define NVA0_3D_STRMOUT_OFFSET__ESIZE 0x00000004 +#define NVA0_3D_STRMOUT_OFFSET__LEN 0x00000004 + +#define NV50_3D_GP_ENABLE 0x00001798 + +#define NV50_3D_GP_REG_ALLOC_TEMP 0x000017a0 + +#define NV50_3D_GP_REG_ALLOC_RESULT 0x000017a8 + +#define NV50_3D_GP_RESULT_MAP_SIZE 0x000017ac + +#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE 0x000017b0 +#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS 0x00000001 +#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP 0x00000002 +#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP 0x00000003 + +#define NV50_3D_RASTERIZE_ENABLE 0x000017b4 + +#define NV50_3D_STRMOUT_ENABLE 0x000017b8 + +#define NV50_3D_GP_RESULT_MAP(i0) (0x000017fc + 0x4*(i0)) +#define NV50_3D_GP_RESULT_MAP__ESIZE 0x00000004 +#define NV50_3D_GP_RESULT_MAP__LEN 0x00000021 +#define NV50_3D_GP_RESULT_MAP_0__MASK 0x000000ff +#define NV50_3D_GP_RESULT_MAP_0__SHIFT 0 +#define NV50_3D_GP_RESULT_MAP_1__MASK 0x0000ff00 +#define NV50_3D_GP_RESULT_MAP_1__SHIFT 8 +#define NV50_3D_GP_RESULT_MAP_2__MASK 0x00ff0000 +#define NV50_3D_GP_RESULT_MAP_2__SHIFT 16 +#define NV50_3D_GP_RESULT_MAP_3__MASK 0xff000000 +#define NV50_3D_GP_RESULT_MAP_3__SHIFT 24 + +#define NV50_3D_POLYGON_OFFSET_CLAMP 0x0000187c + +#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT(i0) (0x00001880 + 0x4*(i0)) +#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT__ESIZE 0x00000004 +#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT__LEN 0x00000020 + +#define NV50_3D_GP_VIEWPORT_ID_ENABLE 0x00001900 + +#define NV50_3D_SEMANTIC_COLOR 0x00001904 +#define NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK 0x000000ff +#define NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT 0 +#define NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK 0x0000ff00 +#define NV50_3D_SEMANTIC_COLOR_BFC0_ID__SHIFT 8 +#define NV50_3D_SEMANTIC_COLOR_COLR_NR__MASK 0x00ff0000 +#define NV50_3D_SEMANTIC_COLOR_COLR_NR__SHIFT 16 +#define NV50_3D_SEMANTIC_COLOR_CLMP_EN 0x01000000 + +#define NV50_3D_SEMANTIC_CLIP 0x00001908 +#define NV50_3D_SEMANTIC_CLIP_CLIP_START__MASK 0x000000ff +#define NV50_3D_SEMANTIC_CLIP_CLIP_START__SHIFT 0 +#define NV50_3D_SEMANTIC_CLIP_CLIP_NUM__MASK 0x00000f00 +#define NV50_3D_SEMANTIC_CLIP_CLIP_NUM__SHIFT 8 + +#define NV50_3D_SEMANTIC_LAYER 0x0000190c +#define NV50_3D_SEMANTIC_LAYER_LAYER_ID__MASK 0x000000ff +#define NV50_3D_SEMANTIC_LAYER_LAYER_ID__SHIFT 0 + +#define NV50_3D_SEMANTIC_PTSZ 0x00001910 +#define NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK 0x00000001 +#define NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__SHIFT 0 +#define NV50_3D_SEMANTIC_PTSZ_PTSZ_ID__MASK 0x00000ff0 +#define NV50_3D_SEMANTIC_PTSZ_PTSZ_ID__SHIFT 4 + +#define NV50_3D_SEMANTIC_PRIM_ID 0x00001914 +#define NV50_3D_SEMANTIC_PRIM_ID_PRIM_ID__MASK 0x000000ff +#define NV50_3D_SEMANTIC_PRIM_ID_PRIM_ID__SHIFT 0 + +#define NV50_3D_CULL_FACE_ENABLE 0x00001918 + +#define NV50_3D_FRONT_FACE 0x0000191c +#define NV50_3D_FRONT_FACE_CW 0x00000900 +#define NV50_3D_FRONT_FACE_CCW 0x00000901 + +#define NV50_3D_CULL_FACE 0x00001920 +#define NV50_3D_CULL_FACE_FRONT 0x00000404 +#define NV50_3D_CULL_FACE_BACK 0x00000405 +#define NV50_3D_CULL_FACE_FRONT_AND_BACK 0x00000408 + +#define NV50_3D_LINE_LAST_PIXEL 0x00001924 + +#define NVA3_3D_FP_MULTISAMPLE 0x00001928 +#define NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK 0x00000001 +#define NVA3_3D_FP_MULTISAMPLE_FORCE_PER_SAMPLE 0x00000002 + +#define NV50_3D_VIEWPORT_TRANSFORM_EN 0x0000192c + +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001 +#define NVA0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002 +#define NVA0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000 +#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000 +#define NV84_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000 + +#define NV50_3D_CLIP_DISTANCE_MODE 0x00001940 +#define NV50_3D_CLIP_DISTANCE_MODE_0__MASK 0x00000001 +#define NV50_3D_CLIP_DISTANCE_MODE_0__SHIFT 0 +#define NV50_3D_CLIP_DISTANCE_MODE_0_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_0_CULL 0x00000001 +#define NV50_3D_CLIP_DISTANCE_MODE_1__MASK 0x00000010 +#define NV50_3D_CLIP_DISTANCE_MODE_1__SHIFT 4 +#define NV50_3D_CLIP_DISTANCE_MODE_1_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_1_CULL 0x00000010 +#define NV50_3D_CLIP_DISTANCE_MODE_2__MASK 0x00000100 +#define NV50_3D_CLIP_DISTANCE_MODE_2__SHIFT 8 +#define NV50_3D_CLIP_DISTANCE_MODE_2_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_2_CULL 0x00000100 +#define NV50_3D_CLIP_DISTANCE_MODE_3__MASK 0x00001000 +#define NV50_3D_CLIP_DISTANCE_MODE_3__SHIFT 12 +#define NV50_3D_CLIP_DISTANCE_MODE_3_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_3_CULL 0x00001000 +#define NV50_3D_CLIP_DISTANCE_MODE_4__MASK 0x00010000 +#define NV50_3D_CLIP_DISTANCE_MODE_4__SHIFT 16 +#define NV50_3D_CLIP_DISTANCE_MODE_4_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_4_CULL 0x00010000 +#define NV50_3D_CLIP_DISTANCE_MODE_5__MASK 0x00100000 +#define NV50_3D_CLIP_DISTANCE_MODE_5__SHIFT 20 +#define NV50_3D_CLIP_DISTANCE_MODE_5_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_5_CULL 0x00100000 +#define NV50_3D_CLIP_DISTANCE_MODE_6__MASK 0x01000000 +#define NV50_3D_CLIP_DISTANCE_MODE_6__SHIFT 24 +#define NV50_3D_CLIP_DISTANCE_MODE_6_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_6_CULL 0x01000000 +#define NV50_3D_CLIP_DISTANCE_MODE_7__MASK 0x10000000 +#define NV50_3D_CLIP_DISTANCE_MODE_7__SHIFT 28 +#define NV50_3D_CLIP_DISTANCE_MODE_7_CLIP 0x00000000 +#define NV50_3D_CLIP_DISTANCE_MODE_7_CULL 0x10000000 + +#define NVA3_3D_UNK1944 0x00001944 + +#define NV50_3D_CLIP_RECTS_EN 0x0000194c + +#define NV50_3D_CLIP_RECTS_MODE 0x00001950 +#define NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000 +#define NV50_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001 +#define NV50_3D_CLIP_RECTS_MODE_NEVER 0x00000002 + +#define NV50_3D_ZCULL_VALIDATE 0x00001954 +#define NV50_3D_ZCULL_VALIDATE_CLEAR_UNK0 0x00000001 +#define NV50_3D_ZCULL_VALIDATE_CLEAR_UNK1 0x00000010 + +#define NV50_3D_ZCULL_INVALIDATE 0x00001958 + +#define NVA3_3D_UNK1960 0x00001960 +#define NVA3_3D_UNK1960_0 0x00000001 +#define NVA3_3D_UNK1960_1 0x00000010 + +#define NV50_3D_UNK1968 0x00001968 +#define NV50_3D_UNK1968_0 0x00000001 +#define NV50_3D_UNK1968_1 0x00000010 + +#define NV50_3D_FP_CTRL_UNK196C 0x0000196c +#define NV50_3D_FP_CTRL_UNK196C_0 0x00000001 +#define NV50_3D_FP_CTRL_UNK196C_1 0x00000010 + +#define NV50_3D_UNK1978 0x00001978 + +#define NV50_3D_CLIPID_ENABLE 0x0000197c + +#define NV50_3D_CLIPID_WIDTH 0x00001980 +#define NV50_3D_CLIPID_WIDTH__MAX 0x00002000 +#define NV50_3D_CLIPID_WIDTH__ALIGN 0x00000040 + +#define NV50_3D_CLIPID_ID 0x00001984 + +#define NV50_3D_FP_INTERPOLANT_CTRL 0x00001988 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK__MASK 0xff000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK__SHIFT 24 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_X 0x01000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_Y 0x02000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_Z 0x04000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_W 0x08000000 +#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__MASK 0x00ff0000 +#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT 16 +#define NV50_3D_FP_INTERPOLANT_CTRL_OFFSET__MASK 0x0000ff00 +#define NV50_3D_FP_INTERPOLANT_CTRL_OFFSET__SHIFT 8 +#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT__MASK 0x000000ff +#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT 0 + +#define NV50_3D_FP_REG_ALLOC_TEMP 0x0000198c + +#define NV50_3D_REG_MODE 0x000019a0 +#define NV50_3D_REG_MODE_PACKED 0x00000001 +#define NV50_3D_REG_MODE_STRIPED 0x00000002 + +#define NV50_3D_FP_CONTROL 0x000019a8 +#define NV50_3D_FP_CONTROL_MULTIPLE_RESULTS 0x00000001 +#define NV50_3D_FP_CONTROL_EXPORTS_Z 0x00000100 +#define NV50_3D_FP_CONTROL_USES_KIL 0x00100000 + +#define NV50_3D_DEPTH_BOUNDS_EN 0x000019bc + +#define NV50_3D_UNK19C0 0x000019c0 + +#define NV50_3D_LOGIC_OP_ENABLE 0x000019c4 + +#define NV50_3D_LOGIC_OP 0x000019c8 +#define NV50_3D_LOGIC_OP_CLEAR 0x00001500 +#define NV50_3D_LOGIC_OP_AND 0x00001501 +#define NV50_3D_LOGIC_OP_AND_REVERSE 0x00001502 +#define NV50_3D_LOGIC_OP_COPY 0x00001503 +#define NV50_3D_LOGIC_OP_AND_INVERTED 0x00001504 +#define NV50_3D_LOGIC_OP_NOOP 0x00001505 +#define NV50_3D_LOGIC_OP_XOR 0x00001506 +#define NV50_3D_LOGIC_OP_OR 0x00001507 +#define NV50_3D_LOGIC_OP_NOR 0x00001508 +#define NV50_3D_LOGIC_OP_EQUIV 0x00001509 +#define NV50_3D_LOGIC_OP_INVERT 0x0000150a +#define NV50_3D_LOGIC_OP_OR_REVERSE 0x0000150b +#define NV50_3D_LOGIC_OP_COPY_INVERTED 0x0000150c +#define NV50_3D_LOGIC_OP_OR_INVERTED 0x0000150d +#define NV50_3D_LOGIC_OP_NAND 0x0000150e +#define NV50_3D_LOGIC_OP_SET 0x0000150f + +#define NV50_3D_ZETA_COMP_ENABLE 0x000019cc + +#define NV50_3D_CLEAR_BUFFERS 0x000019d0 +#define NV50_3D_CLEAR_BUFFERS_Z 0x00000001 +#define NV50_3D_CLEAR_BUFFERS_S 0x00000002 +#define NV50_3D_CLEAR_BUFFERS_R 0x00000004 +#define NV50_3D_CLEAR_BUFFERS_G 0x00000008 +#define NV50_3D_CLEAR_BUFFERS_B 0x00000010 +#define NV50_3D_CLEAR_BUFFERS_A 0x00000020 +#define NV50_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0 +#define NV50_3D_CLEAR_BUFFERS_RT__SHIFT 6 +#define NV50_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00 +#define NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT 10 + +#define NV50_3D_CLIPID_FILL 0x000019d4 + +#define NV50_3D_UNK19D8(i0) (0x000019d8 + 0x4*(i0)) +#define NV50_3D_UNK19D8__ESIZE 0x00000004 +#define NV50_3D_UNK19D8__LEN 0x00000002 + +#define NV50_3D_RT_COMP_ENABLE(i0) (0x000019e0 + 0x4*(i0)) +#define NV50_3D_RT_COMP_ENABLE__ESIZE 0x00000004 +#define NV50_3D_RT_COMP_ENABLE__LEN 0x00000008 + +#define NV50_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0)) +#define NV50_3D_COLOR_MASK__ESIZE 0x00000004 +#define NV50_3D_COLOR_MASK__LEN 0x00000008 +#define NV50_3D_COLOR_MASK_R 0x0000000f +#define NV50_3D_COLOR_MASK_G 0x000000f0 +#define NV50_3D_COLOR_MASK_B 0x00000f00 +#define NV50_3D_COLOR_MASK_A 0x0000f000 + +#define NV50_3D_UNK1A20 0x00001a20 + +#define NV50_3D_DELAY 0x00001a24 + +#define NV50_3D_UNK1A28 0x00001a28 +#define NV50_3D_UNK1A28_0__MASK 0x000000ff +#define NV50_3D_UNK1A28_0__SHIFT 0 +#define NV50_3D_UNK1A28_1 0x00000100 + +#define NV50_3D_UNK1A2C 0x00001a2c + +#define NV50_3D_UNK1A30 0x00001a30 + +#define NV50_3D_UNK1A34 0x00001a34 + +#define NV50_3D_UNK1A38 0x00001a38 + +#define NV50_3D_UNK1A3C 0x00001a3c + +#define NV50_3D_UNK1A40(i0) (0x00001a40 + 0x4*(i0)) +#define NV50_3D_UNK1A40__ESIZE 0x00000004 +#define NV50_3D_UNK1A40__LEN 0x00000010 +#define NV50_3D_UNK1A40_0__MASK 0x00000007 +#define NV50_3D_UNK1A40_0__SHIFT 0 +#define NV50_3D_UNK1A40_1__MASK 0x00000070 +#define NV50_3D_UNK1A40_1__SHIFT 4 +#define NV50_3D_UNK1A40_2__MASK 0x00000700 +#define NV50_3D_UNK1A40_2__SHIFT 8 +#define NV50_3D_UNK1A40_3__MASK 0x00007000 +#define NV50_3D_UNK1A40_3__SHIFT 12 +#define NV50_3D_UNK1A40_4__MASK 0x00070000 +#define NV50_3D_UNK1A40_4__SHIFT 16 +#define NV50_3D_UNK1A40_5__MASK 0x00700000 +#define NV50_3D_UNK1A40_5__SHIFT 20 +#define NV50_3D_UNK1A40_6__MASK 0x07000000 +#define NV50_3D_UNK1A40_6__SHIFT 24 +#define NV50_3D_UNK1A40_7__MASK 0x70000000 +#define NV50_3D_UNK1A40_7__SHIFT 28 + +#define NV50_3D_STRMOUT_ADDRESS_HIGH(i0) (0x00001a80 + 0x10*(i0)) +#define NV50_3D_STRMOUT_ADDRESS_HIGH__ESIZE 0x00000010 +#define NV50_3D_STRMOUT_ADDRESS_HIGH__LEN 0x00000004 + +#define NV50_3D_STRMOUT_ADDRESS_LOW(i0) (0x00001a84 + 0x10*(i0)) +#define NV50_3D_STRMOUT_ADDRESS_LOW__ESIZE 0x00000010 +#define NV50_3D_STRMOUT_ADDRESS_LOW__LEN 0x00000004 + +#define NV50_3D_STRMOUT_NUM_ATTRIBS(i0) (0x00001a88 + 0x10*(i0)) +#define NV50_3D_STRMOUT_NUM_ATTRIBS__ESIZE 0x00000010 +#define NV50_3D_STRMOUT_NUM_ATTRIBS__LEN 0x00000004 +#define NV50_3D_STRMOUT_NUM_ATTRIBS__MAX 0x00000040 + +#define NVA0_3D_STRMOUT_OFFSET_LIMIT(i0) (0x00001a8c + 0x10*(i0)) +#define NVA0_3D_STRMOUT_OFFSET_LIMIT__ESIZE 0x00000010 +#define NVA0_3D_STRMOUT_OFFSET_LIMIT__LEN 0x00000004 + +#define NV50_3D_VERTEX_ARRAY_ATTRIB(i0) (0x00001ac0 + 0x4*(i0)) +#define NV50_3D_VERTEX_ARRAY_ATTRIB__ESIZE 0x00000004 +#define NV50_3D_VERTEX_ARRAY_ATTRIB__LEN 0x00000010 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_BUFFER__MASK 0x0000000f +#define NV50_3D_VERTEX_ARRAY_ATTRIB_BUFFER__SHIFT 0 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_CONST 0x00000010 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_OFFSET__MASK 0x0007ffe0 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_OFFSET__SHIFT 5 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT__MASK 0x01f80000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT__SHIFT 19 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 0x00080000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32 0x00100000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16 0x00180000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32 0x00200000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16 0x00280000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8 0x00500000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16 0x00780000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32 0x00900000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8 0x00980000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8 0x00c00000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16 0x00d80000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8 0x00e80000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_10_10_10_2 0x01800000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__MASK 0x7e000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__SHIFT 25 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x7e000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x24000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x12000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x5a000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x6c000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x48000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x36000000 +#define NV50_3D_VERTEX_ARRAY_ATTRIB_BGRA 0x80000000 + +#define NV50_3D_QUERY_ADDRESS_HIGH 0x00001b00 + +#define NV50_3D_QUERY_ADDRESS_LOW 0x00001b04 + +#define NV50_3D_QUERY_SEQUENCE 0x00001b08 + +#define NV50_3D_QUERY_GET 0x00001b0c +#define NV50_3D_QUERY_GET_MODE__MASK 0x00000003 +#define NV50_3D_QUERY_GET_MODE__SHIFT 0 +#define NV50_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000 +#define NV50_3D_QUERY_GET_MODE_SYNC 0x00000001 +#define NV50_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002 +#define NV50_3D_QUERY_GET_UNK4 0x00000010 +#define NVA0_3D_QUERY_GET_INDEX__MASK 0x000000e0 +#define NVA0_3D_QUERY_GET_INDEX__SHIFT 5 +#define NV50_3D_QUERY_GET_UNK8 0x00000100 +#define NV50_3D_QUERY_GET_UNIT__MASK 0x0000f000 +#define NV50_3D_QUERY_GET_UNIT__SHIFT 12 +#define NV50_3D_QUERY_GET_UNIT_UNK00 0x00000000 +#define NV50_3D_QUERY_GET_UNIT_VFETCH 0x00001000 +#define NV50_3D_QUERY_GET_UNIT_VP 0x00002000 +#define NV50_3D_QUERY_GET_UNIT_RAST 0x00004000 +#define NV50_3D_QUERY_GET_UNIT_STRMOUT 0x00005000 +#define NV50_3D_QUERY_GET_UNIT_GP 0x00006000 +#define NV50_3D_QUERY_GET_UNIT_ZCULL 0x00007000 +#define NV50_3D_QUERY_GET_UNIT_TPROP 0x0000a000 +#define NV50_3D_QUERY_GET_UNIT_UNK0C 0x0000c000 +#define NV50_3D_QUERY_GET_UNIT_CROP 0x0000f000 +#define NV50_3D_QUERY_GET_SYNC_COND__MASK 0x00010000 +#define NV50_3D_QUERY_GET_SYNC_COND__SHIFT 16 +#define NV50_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000 +#define NV50_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000 +#define NV50_3D_QUERY_GET_INTR 0x00100000 +#define NV50_3D_QUERY_GET_TYPE__MASK 0x00800000 +#define NV50_3D_QUERY_GET_TYPE__SHIFT 23 +#define NV50_3D_QUERY_GET_TYPE_QUERY 0x00000000 +#define NV50_3D_QUERY_GET_TYPE_COUNTER 0x00800000 +#define NV50_3D_QUERY_GET_QUERY_SELECT__MASK 0x0f000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT__SHIFT 24 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZERO 0x00000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_SAMPLECNT 0x01000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_STRMOUT_NO_OVERFLOW 0x02000000 +#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_DROPPED_PRIMITIVES 0x03000000 +#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_VERTICES 0x04000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK0 0x05000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK1 0x06000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK2 0x07000000 +#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK3 0x08000000 +#define NVA0_3D_QUERY_GET_QUERY_SELECT_RT_UNK14 0x0c000000 +#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_OFFSET 0x0d000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT__MASK 0x0f000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT__SHIFT 24 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_VFETCH_VERTICES 0x00000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_VFETCH_PRIMITIVES 0x01000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_VP_LAUNCHES 0x02000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_GP_LAUNCHES 0x03000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_GP_PRIMITIVES_OUT 0x04000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_TRANSFORM_FEEDBACK 0x05000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_GENERATED_PRIMITIVES 0x06000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_RAST_PRIMITIVES_PRECLIP 0x07000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_RAST_PRIMITIVES_POSTCLIP 0x08000000 +#define NV50_3D_QUERY_GET_COUNTER_SELECT_FP_PIXELS 0x09000000 +#define NV84_3D_QUERY_GET_COUNTER_SELECT_UNK0A 0x0a000000 +#define NVA0_3D_QUERY_GET_COUNTER_SELECT_UNK0C 0x0c000000 +#define NV50_3D_QUERY_GET_SHORT 0x10000000 + +#define NVA3_3D_VP_RESULT_MAP_ALT(i0) (0x00001b3c + 0x4*(i0)) +#define NVA3_3D_VP_RESULT_MAP_ALT__ESIZE 0x00000004 +#define NVA3_3D_VP_RESULT_MAP_ALT__LEN 0x00000020 +#define NVA3_3D_VP_RESULT_MAP_ALT_0__MASK 0x000000ff +#define NVA3_3D_VP_RESULT_MAP_ALT_0__SHIFT 0 +#define NVA3_3D_VP_RESULT_MAP_ALT_1__MASK 0x0000ff00 +#define NVA3_3D_VP_RESULT_MAP_ALT_1__SHIFT 8 +#define NVA3_3D_VP_RESULT_MAP_ALT_2__MASK 0x00ff0000 +#define NVA3_3D_VP_RESULT_MAP_ALT_2__SHIFT 16 +#define NVA3_3D_VP_RESULT_MAP_ALT_3__MASK 0xff000000 +#define NVA3_3D_VP_RESULT_MAP_ALT_3__SHIFT 24 + +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT(i0) (0x00001c00 + 0x10*(i0)) +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT__ESIZE 0x00000010 +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT__LEN 0x00000020 +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_STRIDE__MASK 0x00000fff +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_STRIDE__SHIFT 0 +#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_ENABLE 0x20000000 + +#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT(i0) (0x00001c04 + 0x10*(i0)) +#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT__ESIZE 0x00000010 +#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT__LEN 0x00000020 + +#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT(i0) (0x00001c08 + 0x10*(i0)) +#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT__ESIZE 0x00000010 +#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT__LEN 0x00000020 + +#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT(i0) (0x00001c0c + 0x10*(i0)) +#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT__ESIZE 0x00000010 +#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT__LEN 0x00000020 + +#define NVA3_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0)) +#define NVA3_3D_IBLEND__ESIZE 0x00000020 +#define NVA3_3D_IBLEND__LEN 0x00000008 + +#define NVA3_3D_IBLEND_SEPARATE_ALPHA(i0) (0x00001e00 + 0x20*(i0)) + +#define NVA3_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0)) +#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NVA3_3D_IBLEND_EQUATION_RGB_MIN 0x00008007 +#define NVA3_3D_IBLEND_EQUATION_RGB_MAX 0x00008008 +#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVA3_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0)) + +#define NVA3_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0)) + +#define NVA3_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0)) +#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NVA3_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NVA3_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVA3_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0)) + +#define NVA3_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0)) + +#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT(i0) (0x00001f00 + 0x8*(i0)) +#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT__ESIZE 0x00000008 +#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT__LEN 0x00000020 + +#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT(i0) (0x00001f04 + 0x8*(i0)) +#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT__ESIZE 0x00000008 +#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT__LEN 0x00000020 + + +#endif /* RNNDB_NV50_3D_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h new file mode 100644 index 00000000000..f26ac45da40 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h @@ -0,0 +1,98 @@ +#ifndef NV_3DDEFS_XML +#define NV_3DDEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nv50_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38) +- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28) +- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20) +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000 +#define NV50_3D_BLEND_FACTOR_ONE 0x00004001 +#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303 +#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305 +#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308 +#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002 +#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004 +#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901 +#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903 + +#endif /* NV_3DDEFS_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_blit.h b/src/gallium/drivers/nouveau/nv50/nv50_blit.h new file mode 100644 index 00000000000..bdd6a63d1f1 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_blit.h @@ -0,0 +1,223 @@ + +#ifndef __NV50_BLIT_H__ +#define __NV50_BLIT_H__ + +#include "util/u_inlines.h" +#include "util/u_format.h" + +void * +nv50_blitter_make_fp(struct pipe_context *, + unsigned mode, + enum pipe_texture_target); + +unsigned +nv50_blit_select_mode(const struct pipe_blit_info *); + +/* Converted to a pipe->blit. */ +void +nv50_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *); + +#define NV50_BLIT_MODE_PASS 0 /* pass through TEX $t0/$s0 output */ +#define NV50_BLIT_MODE_Z24S8 1 /* encode ZS values for RGBA unorm8 */ +#define NV50_BLIT_MODE_S8Z24 2 +#define NV50_BLIT_MODE_X24S8 3 +#define NV50_BLIT_MODE_S8X24 4 +#define NV50_BLIT_MODE_Z24X8 5 +#define NV50_BLIT_MODE_X8Z24 6 +#define NV50_BLIT_MODE_ZS 7 /* put $t0/$s0 into R, $t1/$s1 into G */ +#define NV50_BLIT_MODE_XS 8 /* put $t1/$s1 into G */ +#define NV50_BLIT_MODES 9 + +/* CUBE and RECT textures are reinterpreted as 2D(_ARRAY) */ +#define NV50_BLIT_TEXTURE_BUFFER 0 +#define NV50_BLIT_TEXTURE_1D 1 +#define NV50_BLIT_TEXTURE_2D 2 +#define NV50_BLIT_TEXTURE_3D 3 +#define NV50_BLIT_TEXTURE_1D_ARRAY 4 +#define NV50_BLIT_TEXTURE_2D_ARRAY 5 +#define NV50_BLIT_MAX_TEXTURE_TYPES 6 + +static INLINE unsigned +nv50_blit_texture_type(enum pipe_texture_target target) +{ + switch (target) { + case PIPE_TEXTURE_1D: return NV50_BLIT_TEXTURE_1D; + case PIPE_TEXTURE_2D: return NV50_BLIT_TEXTURE_2D; + case PIPE_TEXTURE_3D: return NV50_BLIT_TEXTURE_3D; + case PIPE_TEXTURE_1D_ARRAY: return NV50_BLIT_TEXTURE_1D_ARRAY; + case PIPE_TEXTURE_2D_ARRAY: return NV50_BLIT_TEXTURE_2D_ARRAY; + default: + assert(target == PIPE_BUFFER); + return NV50_BLIT_TEXTURE_BUFFER; + } +} + +static INLINE unsigned +nv50_blit_get_tgsi_texture_target(enum pipe_texture_target target) +{ + switch (target) { + case PIPE_TEXTURE_1D: return TGSI_TEXTURE_1D; + case PIPE_TEXTURE_2D: return TGSI_TEXTURE_2D; + case PIPE_TEXTURE_3D: return TGSI_TEXTURE_3D; + case PIPE_TEXTURE_1D_ARRAY: return TGSI_TEXTURE_1D_ARRAY; + case PIPE_TEXTURE_2D_ARRAY: return TGSI_TEXTURE_2D_ARRAY; + default: + assert(target == PIPE_BUFFER); + return TGSI_TEXTURE_BUFFER; + } +} + +static INLINE enum pipe_texture_target +nv50_blit_reinterpret_pipe_texture_target(enum pipe_texture_target target) +{ + switch (target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return PIPE_TEXTURE_2D_ARRAY; + case PIPE_TEXTURE_RECT: + return PIPE_TEXTURE_2D; + default: + return target; + } +} + +static INLINE unsigned +nv50_blit_get_filter(const struct pipe_blit_info *info) +{ + if (info->dst.resource->nr_samples < info->src.resource->nr_samples) + return util_format_is_depth_or_stencil(info->src.format) ? 0 : 1; + + if (info->filter != PIPE_TEX_FILTER_LINEAR) + return 0; + + if ((info->dst.box.width == info->src.box.width || + info->dst.box.width == -info->src.box.width) && + (info->dst.box.height == info->src.box.height || + info->dst.box.height == -info->src.box.height)) + return 0; + + return 1; +} + +/* Since shaders cannot export stencil, we cannot copy stencil values when + * rendering to ZETA, so we attach the ZS surface to a colour render target. + */ +static INLINE enum pipe_format +nv50_blit_zeta_to_colour_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return PIPE_FORMAT_R16_UNORM; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + return PIPE_FORMAT_R8G8B8A8_UNORM; + case PIPE_FORMAT_Z32_FLOAT: + return PIPE_FORMAT_R32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return PIPE_FORMAT_R32G32_FLOAT; + default: + assert(0); + return PIPE_FORMAT_NONE; + } +} + + +static INLINE uint16_t +nv50_blit_derive_color_mask(const struct pipe_blit_info *info) +{ + const unsigned mask = info->mask; + + uint16_t color_mask = 0; + + switch (info->dst.format) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + if (mask & PIPE_MASK_S) + color_mask |= 0x1000; + /* fall through */ + case PIPE_FORMAT_Z24X8_UNORM: + if (mask & PIPE_MASK_Z) + color_mask |= 0x0111; + break; + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + if (mask & PIPE_MASK_Z) + color_mask |= 0x1110; + if (mask & PIPE_MASK_S) + color_mask |= 0x0001; + break; + default: + if (mask & (PIPE_MASK_R | PIPE_MASK_Z)) color_mask |= 0x0001; + if (mask & (PIPE_MASK_G | PIPE_MASK_S)) color_mask |= 0x0010; + if (mask & PIPE_MASK_B) color_mask |= 0x0100; + if (mask & PIPE_MASK_A) color_mask |= 0x1000; + break; + } + + return color_mask; +} + +static INLINE uint32_t +nv50_blit_eng2d_get_mask(const struct pipe_blit_info *info) +{ + uint32_t mask = 0; + + switch (info->dst.format) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + if (info->mask & PIPE_MASK_Z) mask |= 0x00ffffff; + if (info->mask & PIPE_MASK_S) mask |= 0xff000000; + break; + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + if (info->mask & PIPE_MASK_Z) mask |= 0xffffff00; + if (info->mask & PIPE_MASK_S) mask |= 0x000000ff; + break; + case PIPE_FORMAT_X8Z24_UNORM: + if (info->mask & PIPE_MASK_Z) mask = 0x00ffffff; + break; + default: + mask = 0xffffffff; + break; + } + return mask; +} + +#if NOUVEAU_DRIVER == 0xc0 +# define nv50_format_table nvc0_format_table +#endif + +/* return TRUE for formats that can be converted among each other by NVC0_2D */ +static INLINE boolean +nv50_2d_dst_format_faithful(enum pipe_format format) +{ + const uint64_t mask = + NV50_ENG2D_SUPPORTED_FORMATS & + ~NV50_ENG2D_NOCONVERT_FORMATS; + uint8_t id = nv50_format_table[format].rt; + return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0))); +} +static INLINE boolean +nv50_2d_src_format_faithful(enum pipe_format format) +{ + const uint64_t mask = + NV50_ENG2D_SUPPORTED_FORMATS & + ~(NV50_ENG2D_LUMINANCE_FORMATS | NV50_ENG2D_INTENSITY_FORMATS); + uint8_t id = nv50_format_table[format].rt; + return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0))); +} + +static INLINE boolean +nv50_2d_format_supported(enum pipe_format format) +{ + uint8_t id = nv50_format_table[format].rt; + return (id >= 0xc0) && + (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))); +} + +static INLINE boolean +nv50_2d_dst_format_ops_supported(enum pipe_format format) +{ + uint8_t id = nv50_format_table[format].rt; + return (id >= 0xc0) && + (NV50_ENG2D_OPERATION_FORMATS & (1ULL << (id - 0xc0))); +} + +#endif /* __NV50_BLIT_H__ */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c new file mode 100644 index 00000000000..b6bdf79b389 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -0,0 +1,317 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "util/u_framebuffer.h" + +#ifdef NV50_WITH_DRAW_MODULE +#include "draw/draw_context.h" +#endif + +#include "nv50/nv50_context.h" +#include "nv50/nv50_screen.h" +#include "nv50/nv50_resource.h" + +static void +nv50_flush(struct pipe_context *pipe, + struct pipe_fence_handle **fence, + unsigned flags) +{ + struct nouveau_screen *screen = nouveau_screen(pipe->screen); + + if (fence) + nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence); + + PUSH_KICK(screen->pushbuf); + + nouveau_context_update_frame_stats(nouveau_context(pipe)); +} + +static void +nv50_texture_barrier(struct pipe_context *pipe) +{ + struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf; + + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1); + PUSH_DATA (push, 0x20); +} + +void +nv50_default_kick_notify(struct nouveau_pushbuf *push) +{ + struct nv50_screen *screen = push->user_priv; + + if (screen) { + nouveau_fence_next(&screen->base); + nouveau_fence_update(&screen->base, TRUE); + if (screen->cur_ctx) + screen->cur_ctx->state.flushed = TRUE; + } +} + +static void +nv50_context_unreference_resources(struct nv50_context *nv50) +{ + unsigned s, i; + + nouveau_bufctx_del(&nv50->bufctx_3d); + nouveau_bufctx_del(&nv50->bufctx); + + util_unreference_framebuffer_state(&nv50->framebuffer); + + for (i = 0; i < nv50->num_vtxbufs; ++i) + pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL); + + pipe_resource_reference(&nv50->idxbuf.buffer, NULL); + + for (s = 0; s < 3; ++s) { + for (i = 0; i < nv50->num_textures[s]; ++i) + pipe_sampler_view_reference(&nv50->textures[s][i], NULL); + + for (i = 0; i < NV50_MAX_PIPE_CONSTBUFS; ++i) + if (!nv50->constbuf[s][i].user) + pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL); + } +} + +static void +nv50_destroy(struct pipe_context *pipe) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + if (nv50_context_screen(nv50)->cur_ctx == nv50) { + nv50->base.pushbuf->kick_notify = NULL; + nv50_context_screen(nv50)->cur_ctx = NULL; + nouveau_pushbuf_bufctx(nv50->base.pushbuf, NULL); + } + /* need to flush before destroying the bufctx */ + nouveau_pushbuf_kick(nv50->base.pushbuf, nv50->base.pushbuf->channel); + + nv50_context_unreference_resources(nv50); + +#ifdef NV50_WITH_DRAW_MODULE + draw_destroy(nv50->draw); +#endif + + nouveau_context_destroy(&nv50->base); +} + +static int +nv50_invalidate_resource_storage(struct nouveau_context *ctx, + struct pipe_resource *res, + int ref) +{ + struct nv50_context *nv50 = nv50_context(&ctx->pipe); + unsigned s, i; + + if (res->bind & PIPE_BIND_RENDER_TARGET) { + for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) { + if (nv50->framebuffer.cbufs[i] && + nv50->framebuffer.cbufs[i]->texture == res) { + nv50->dirty |= NV50_NEW_FRAMEBUFFER; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + if (!--ref) + return ref; + } + } + } + if (res->bind & PIPE_BIND_DEPTH_STENCIL) { + if (nv50->framebuffer.zsbuf && + nv50->framebuffer.zsbuf->texture == res) { + nv50->dirty |= NV50_NEW_FRAMEBUFFER; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + if (!--ref) + return ref; + } + } + + if (res->bind & PIPE_BIND_VERTEX_BUFFER) { + for (i = 0; i < nv50->num_vtxbufs; ++i) { + if (nv50->vtxbuf[i].buffer == res) { + nv50->dirty |= NV50_NEW_ARRAYS; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX); + if (!--ref) + return ref; + } + } + } + if (res->bind & PIPE_BIND_INDEX_BUFFER) { + if (nv50->idxbuf.buffer == res) + if (!--ref) + return ref; + } + + if (res->bind & PIPE_BIND_SAMPLER_VIEW) { + for (s = 0; s < 5; ++s) { + for (i = 0; i < nv50->num_textures[s]; ++i) { + if (nv50->textures[s][i] && + nv50->textures[s][i]->texture == res) { + nv50->dirty |= NV50_NEW_TEXTURES; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + if (!--ref) + return ref; + } + } + } + } + + if (res->bind & PIPE_BIND_CONSTANT_BUFFER) { + for (s = 0; s < 5; ++s) { + for (i = 0; i < nv50->num_vtxbufs; ++i) { + if (!nv50->constbuf[s][i].user && + nv50->constbuf[s][i].u.buf == res) { + nv50->dirty |= NV50_NEW_CONSTBUF; + nv50->constbuf_dirty[s] |= 1 << i; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i)); + if (!--ref) + return ref; + } + } + } + } + + return ref; +} + +struct pipe_context * +nv50_create(struct pipe_screen *pscreen, void *priv) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + struct nv50_context *nv50; + struct pipe_context *pipe; + int ret; + uint32_t flags; + + nv50 = CALLOC_STRUCT(nv50_context); + if (!nv50) + return NULL; + pipe = &nv50->base.pipe; + + if (!nv50_blitctx_create(nv50)) + goto out_err; + + nv50->base.pushbuf = screen->base.pushbuf; + nv50->base.client = screen->base.client; + + ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT, + &nv50->bufctx_3d); + if (!ret) + ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx); + if (ret) + goto out_err; + + nv50->base.screen = &screen->base; + nv50->base.copy_data = nv50_m2mf_copy_linear; + nv50->base.push_data = nv50_sifc_linear_u8; + nv50->base.push_cb = nv50_cb_push; + + nv50->screen = screen; + pipe->screen = pscreen; + pipe->priv = priv; + + pipe->destroy = nv50_destroy; + + pipe->draw_vbo = nv50_draw_vbo; + pipe->clear = nv50_clear; + + pipe->flush = nv50_flush; + pipe->texture_barrier = nv50_texture_barrier; + + if (!screen->cur_ctx) { + screen->cur_ctx = nv50; + nouveau_pushbuf_bufctx(screen->base.pushbuf, nv50->bufctx); + } + nv50->base.pushbuf->kick_notify = nv50_default_kick_notify; + + nv50_init_query_functions(nv50); + nv50_init_surface_functions(nv50); + nv50_init_state_functions(nv50); + nv50_init_resource_functions(pipe); + + nv50->base.invalidate_resource_storage = nv50_invalidate_resource_storage; + +#ifdef NV50_WITH_DRAW_MODULE + /* no software fallbacks implemented */ + nv50->draw = draw_create(pipe); + assert(nv50->draw); + draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); +#endif + + if (screen->base.device->chipset < 0x84 || + debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) { + /* PMPEG */ + nouveau_context_init_vdec(&nv50->base); + } else if (screen->base.device->chipset < 0x98 || + screen->base.device->chipset == 0xa0) { + /* VP2 */ + pipe->create_video_codec = nv84_create_decoder; + pipe->create_video_buffer = nv84_video_buffer_create; + } else { + /* VP3/4 */ + pipe->create_video_codec = nv98_create_decoder; + pipe->create_video_buffer = nv98_video_buffer_create; + } + + flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + + BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->code); + BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms); + BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc); + BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo); + + flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; + + BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo); + BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo); + + nv50->base.scratch.bo_size = 2 << 20; + + return pipe; + +out_err: + if (nv50) { + if (nv50->bufctx_3d) + nouveau_bufctx_del(&nv50->bufctx_3d); + if (nv50->bufctx) + nouveau_bufctx_del(&nv50->bufctx); + if (nv50->blit) + FREE(nv50->blit); + FREE(nv50); + } + return NULL; +} + +void +nv50_bufctx_fence(struct nouveau_bufctx *bufctx, boolean on_flush) +{ + struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending; + struct nouveau_list *it; + + for (it = list->next; it != list; it = it->next) { + struct nouveau_bufref *ref = (struct nouveau_bufref *)it; + struct nv04_resource *res = ref->priv; + if (res) + nv50_resource_validate(res, (unsigned)ref->priv_data); + } +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h new file mode 100644 index 00000000000..ee6eb0ef715 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -0,0 +1,322 @@ +#ifndef __NV50_CONTEXT_H__ +#define __NV50_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_inlines.h" +#include "util/u_dynarray.h" + +#ifdef NV50_WITH_DRAW_MODULE +#include "draw/draw_vertex.h" +#endif + +#include "nv50/nv50_debug.h" +#include "nv50/nv50_winsys.h" +#include "nv50/nv50_stateobj.h" +#include "nv50/nv50_screen.h" +#include "nv50/nv50_program.h" +#include "nv50/nv50_resource.h" +#include "nv50/nv50_transfer.h" + +#include "nouveau_context.h" +#include "nv_object.xml.h" +#include "nv_m2mf.xml.h" +#include "nv50/nv50_3ddefs.xml.h" +#include "nv50/nv50_3d.xml.h" +#include "nv50/nv50_2d.xml.h" + +#define NV50_NEW_BLEND (1 << 0) +#define NV50_NEW_RASTERIZER (1 << 1) +#define NV50_NEW_ZSA (1 << 2) +#define NV50_NEW_VERTPROG (1 << 3) +#define NV50_NEW_GMTYPROG (1 << 6) +#define NV50_NEW_FRAGPROG (1 << 7) +#define NV50_NEW_BLEND_COLOUR (1 << 8) +#define NV50_NEW_STENCIL_REF (1 << 9) +#define NV50_NEW_CLIP (1 << 10) +#define NV50_NEW_SAMPLE_MASK (1 << 11) +#define NV50_NEW_FRAMEBUFFER (1 << 12) +#define NV50_NEW_STIPPLE (1 << 13) +#define NV50_NEW_SCISSOR (1 << 14) +#define NV50_NEW_VIEWPORT (1 << 15) +#define NV50_NEW_ARRAYS (1 << 16) +#define NV50_NEW_VERTEX (1 << 17) +#define NV50_NEW_CONSTBUF (1 << 18) +#define NV50_NEW_TEXTURES (1 << 19) +#define NV50_NEW_SAMPLERS (1 << 20) +#define NV50_NEW_STRMOUT (1 << 21) +#define NV50_NEW_CONTEXT (1 << 31) + +#define NV50_BIND_FB 0 +#define NV50_BIND_VERTEX 1 +#define NV50_BIND_VERTEX_TMP 2 +#define NV50_BIND_INDEX 3 +#define NV50_BIND_TEXTURES 4 +#define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i)) +#define NV50_BIND_SO 53 +#define NV50_BIND_SCREEN 54 +#define NV50_BIND_TLS 55 +#define NV50_BIND_COUNT 56 +#define NV50_BIND_2D 0 +#define NV50_BIND_M2MF 0 +#define NV50_BIND_FENCE 1 + +#define NV50_CB_TMP 123 +/* fixed constant buffer binding points - low indices for user's constbufs */ +#define NV50_CB_PVP 124 +#define NV50_CB_PGP 126 +#define NV50_CB_PFP 125 +#define NV50_CB_AUX 127 + + +struct nv50_blitctx; + +boolean nv50_blitctx_create(struct nv50_context *); + +struct nv50_context { + struct nouveau_context base; + + struct nv50_screen *screen; + + struct nouveau_bufctx *bufctx_3d; + struct nouveau_bufctx *bufctx; + + uint32_t dirty; + + struct { + uint32_t instance_elts; /* bitmask of per-instance elements */ + uint32_t instance_base; + uint32_t interpolant_ctrl; + uint32_t semantic_color; + uint32_t semantic_psize; + int32_t index_bias; + boolean uniform_buffer_bound[3]; + boolean prim_restart; + boolean point_sprite; + boolean rt_serialize; + boolean flushed; + boolean rasterizer_discard; + uint8_t tls_required; + boolean new_tls_space; + uint8_t num_vtxbufs; + uint8_t num_vtxelts; + uint8_t num_textures[3]; + uint8_t num_samplers[3]; + uint8_t prim_size; + uint16_t scissor; + } state; + + struct nv50_blend_stateobj *blend; + struct nv50_rasterizer_stateobj *rast; + struct nv50_zsa_stateobj *zsa; + struct nv50_vertex_stateobj *vertex; + + struct nv50_program *vertprog; + struct nv50_program *gmtyprog; + struct nv50_program *fragprog; + + struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS]; + uint16_t constbuf_dirty[3]; + uint16_t constbuf_valid[3]; + + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned num_vtxbufs; + struct pipe_index_buffer idxbuf; + uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ + uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ + uint32_t vbo_constant; /* bitmask of user buffers with stride 0 */ + uint32_t vb_elt_first; /* from pipe_draw_info, for vertex upload */ + uint32_t vb_elt_limit; /* max - min element (count - 1) */ + uint32_t instance_off; /* base vertex for instanced arrays */ + uint32_t instance_max; /* max instance for current draw call */ + + struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS]; + unsigned num_textures[3]; + struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS]; + unsigned num_samplers[3]; + + uint8_t num_so_targets; + uint8_t so_targets_dirty; + struct pipe_stream_output_target *so_target[4]; + + struct pipe_framebuffer_state framebuffer; + struct pipe_blend_color blend_colour; + struct pipe_stencil_ref stencil_ref; + struct pipe_poly_stipple stipple; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + struct pipe_clip_state clip; + + unsigned sample_mask; + + boolean vbo_push_hint; + + struct pipe_query *cond_query; + boolean cond_cond; + uint cond_mode; + + struct nv50_blitctx *blit; + +#ifdef NV50_WITH_DRAW_MODULE + struct draw_context *draw; +#endif +}; + +static INLINE struct nv50_context * +nv50_context(struct pipe_context *pipe) +{ + return (struct nv50_context *)pipe; +} + +static INLINE struct nv50_screen * +nv50_context_screen(struct nv50_context *nv50) +{ + return nv50_screen(&nv50->base.screen->base); +} + +/* return index used in nv50_context arrays for a specific shader type */ +static INLINE unsigned +nv50_context_shader_stage(unsigned pipe) +{ + switch (pipe) { + case PIPE_SHADER_VERTEX: return 0; + case PIPE_SHADER_FRAGMENT: return 1; + case PIPE_SHADER_GEOMETRY: return 2; + case PIPE_SHADER_COMPUTE: return 3; + default: + assert(!"invalid/unhandled shader type"); + return 0; + } +} + +/* nv50_context.c */ +struct pipe_context *nv50_create(struct pipe_screen *, void *); + +void nv50_bufctx_fence(struct nouveau_bufctx *, boolean on_flush); + +void nv50_default_kick_notify(struct nouveau_pushbuf *); + +/* nv50_draw.c */ +extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); + +/* nv50_query.c */ +void nv50_init_query_functions(struct nv50_context *); +void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, + struct pipe_query *, unsigned result_offset); +void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *); +void nva0_so_target_save_offset(struct pipe_context *, + struct pipe_stream_output_target *, + unsigned index, boolean seralize); + +#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) + +/* nv50_shader_state.c */ +void nv50_vertprog_validate(struct nv50_context *); +void nv50_gmtyprog_validate(struct nv50_context *); +void nv50_fragprog_validate(struct nv50_context *); +void nv50_fp_linkage_validate(struct nv50_context *); +void nv50_gp_linkage_validate(struct nv50_context *); +void nv50_constbufs_validate(struct nv50_context *); +void nv50_validate_derived_rs(struct nv50_context *); +void nv50_stream_output_validate(struct nv50_context *); + +/* nv50_state.c */ +extern void nv50_init_state_functions(struct nv50_context *); + +/* nv50_state_validate.c */ +/* @words: check for space before emitting relocs */ +extern boolean nv50_state_validate(struct nv50_context *, uint32_t state_mask, + unsigned space_words); + +/* nv50_surface.c */ +extern void nv50_clear(struct pipe_context *, unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil); +extern void nv50_init_surface_functions(struct nv50_context *); + +/* nv50_tex.c */ +void nv50_validate_textures(struct nv50_context *); +void nv50_validate_samplers(struct nv50_context *); + +struct pipe_sampler_view * +nv50_create_texture_view(struct pipe_context *, + struct pipe_resource *, + const struct pipe_sampler_view *, + uint32_t flags, + enum pipe_texture_target); +struct pipe_sampler_view * +nv50_create_sampler_view(struct pipe_context *, + struct pipe_resource *, + const struct pipe_sampler_view *); + +/* nv50_transfer.c */ +void +nv50_m2mf_transfer_rect(struct nv50_context *, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy); +void +nv50_sifc_linear_u8(struct nouveau_context *pipe, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data); +void +nv50_m2mf_copy_linear(struct nouveau_context *pipe, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size); +void +nv50_cb_push(struct nouveau_context *nv, + struct nouveau_bo *bo, unsigned domain, + unsigned base, unsigned size, + unsigned offset, unsigned words, const uint32_t *data); + +/* nv50_vbo.c */ +void nv50_draw_vbo(struct pipe_context *, const struct pipe_draw_info *); + +void * +nv50_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements); +void +nv50_vertex_state_delete(struct pipe_context *pipe, void *hwcso); + +void nv50_vertex_arrays_validate(struct nv50_context *nv50); + +/* nv50_push.c */ +void nv50_push_vbo(struct nv50_context *, const struct pipe_draw_info *); + +/* nv84_video.c */ +struct pipe_video_codec * +nv84_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ); + +struct pipe_video_buffer * +nv84_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *template); + +int +nv84_screen_get_video_param(struct pipe_screen *pscreen, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint, + enum pipe_video_cap param); + +boolean +nv84_screen_video_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint); + +/* nv98_video.c */ +struct pipe_video_codec * +nv98_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ); + +struct pipe_video_buffer * +nv98_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *template); + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_debug.h b/src/gallium/drivers/nouveau/nv50/nv50_debug.h new file mode 100644 index 00000000000..f3dee621519 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_debug.h @@ -0,0 +1,25 @@ + +#ifndef __NV50_DEBUG_H__ +#define __NV50_DEBUG_H__ + +#include <stdio.h> + +#include "util/u_debug.h" + +#define NV50_DEBUG_MISC 0x0001 +#define NV50_DEBUG_SHADER 0x0100 +#define NV50_DEBUG_PROG_IR 0x0200 +#define NV50_DEBUG_PROG_RA 0x0400 +#define NV50_DEBUG_PROG_CFLOW 0x0800 +#define NV50_DEBUG_PROG_ALL 0x1f00 + +#define NV50_DEBUG 0 + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args) + +#define NV50_DBGMSG(ch, args...) \ + if ((NV50_DEBUG) & (NV50_DEBUG_##ch)) \ + debug_printf(args) + +#endif /* __NV50_DEBUG_H__ */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h new file mode 100644 index 00000000000..2e42843fa56 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h @@ -0,0 +1,200 @@ +#ifndef NV50_DEFS_XML +#define NV50_DEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- rnndb/nv50_defs.xml ( 7783 bytes, from 2013-02-14 13:56:25) +- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nvchipsets.xml ( 3704 bytes, from 2012-08-18 12:48:55) + +Copyright (C) 2006-2013 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_VSTATUS_IDLE 0x00000000 +#define NV50_VSTATUS_BUSY 0x00000001 +#define NV50_VSTATUS_UNK2 0x00000002 +#define NV50_VSTATUS_WAITING 0x00000003 +#define NV50_VSTATUS_BLOCKED 0x00000005 +#define NV50_VSTATUS_FAULTED 0x00000006 +#define NV50_VSTATUS_PAUSED 0x00000007 +#define NV50_SURFACE_FORMAT_BITMAP 0x0000001c +#define NV50_SURFACE_FORMAT_UNK1D 0x0000001d +#define NV50_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0 +#define NV50_SURFACE_FORMAT_RGBA32_SINT 0x000000c1 +#define NV50_SURFACE_FORMAT_RGBA32_UINT 0x000000c2 +#define NV50_SURFACE_FORMAT_RGBX32_FLOAT 0x000000c3 +#define NV50_SURFACE_FORMAT_RGBX32_SINT 0x000000c4 +#define NV50_SURFACE_FORMAT_RGBX32_UINT 0x000000c5 +#define NV50_SURFACE_FORMAT_RGBA16_UNORM 0x000000c6 +#define NV50_SURFACE_FORMAT_RGBA16_SNORM 0x000000c7 +#define NV50_SURFACE_FORMAT_RGBA16_SINT 0x000000c8 +#define NV50_SURFACE_FORMAT_RGBA16_UINT 0x000000c9 +#define NV50_SURFACE_FORMAT_RGBA16_FLOAT 0x000000ca +#define NV50_SURFACE_FORMAT_RG32_FLOAT 0x000000cb +#define NV50_SURFACE_FORMAT_RG32_SINT 0x000000cc +#define NV50_SURFACE_FORMAT_RG32_UINT 0x000000cd +#define NV50_SURFACE_FORMAT_RGBX16_FLOAT 0x000000ce +#define NV50_SURFACE_FORMAT_BGRA8_UNORM 0x000000cf +#define NV50_SURFACE_FORMAT_BGRA8_SRGB 0x000000d0 +#define NV50_SURFACE_FORMAT_RGB10_A2_UNORM 0x000000d1 +#define NV50_SURFACE_FORMAT_RGB10_A2_UINT 0x000000d2 +#define NV50_SURFACE_FORMAT_RGBA8_UNORM 0x000000d5 +#define NV50_SURFACE_FORMAT_RGBA8_SRGB 0x000000d6 +#define NV50_SURFACE_FORMAT_RGBA8_SNORM 0x000000d7 +#define NV50_SURFACE_FORMAT_RGBA8_SINT 0x000000d8 +#define NV50_SURFACE_FORMAT_RGBA8_UINT 0x000000d9 +#define NV50_SURFACE_FORMAT_RG16_UNORM 0x000000da +#define NV50_SURFACE_FORMAT_RG16_SNORM 0x000000db +#define NV50_SURFACE_FORMAT_RG16_SINT 0x000000dc +#define NV50_SURFACE_FORMAT_RG16_UINT 0x000000dd +#define NV50_SURFACE_FORMAT_RG16_FLOAT 0x000000de +#define NV50_SURFACE_FORMAT_BGR10_A2_UNORM 0x000000df +#define NV50_SURFACE_FORMAT_R11G11B10_FLOAT 0x000000e0 +#define NV50_SURFACE_FORMAT_R32_SINT 0x000000e3 +#define NV50_SURFACE_FORMAT_R32_UINT 0x000000e4 +#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5 +#define NV50_SURFACE_FORMAT_BGRX8_UNORM 0x000000e6 +#define NV50_SURFACE_FORMAT_BGRX8_SRGB 0x000000e7 +#define NV50_SURFACE_FORMAT_B5G6R5_UNORM 0x000000e8 +#define NV50_SURFACE_FORMAT_BGR5_A1_UNORM 0x000000e9 +#define NV50_SURFACE_FORMAT_RG8_UNORM 0x000000ea +#define NV50_SURFACE_FORMAT_RG8_SNORM 0x000000eb +#define NV50_SURFACE_FORMAT_RG8_SINT 0x000000ec +#define NV50_SURFACE_FORMAT_RG8_UINT 0x000000ed +#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee +#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef +#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0 +#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1 +#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2 +#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3 +#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4 +#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5 +#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6 +#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7 +#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM 0x000000f8 +#define NV50_SURFACE_FORMAT_RGBX8_UNORM 0x000000f9 +#define NV50_SURFACE_FORMAT_RGBX8_SRGB 0x000000fa +#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFB 0x000000fb +#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFC 0x000000fc +#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFD 0x000000fd +#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFE 0x000000fe +#define NV50_SURFACE_FORMAT_Y32_UINT_UNKFF 0x000000ff +#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a +#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013 +#define NV50_ZETA_FORMAT_S8_Z24_UNORM 0x00000014 +#define NV50_ZETA_FORMAT_Z24_X8_UNORM 0x00000015 +#define NV50_ZETA_FORMAT_Z24_S8_UNORM 0x00000016 +#define NV50_ZETA_FORMAT_Z24_C8_UNORM 0x00000018 +#define NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT 0x00000019 +#define NV50_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d +#define NV50_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e +#define NV50_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f +#define NVE4_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002 +#define NVE4_IMAGE_FORMAT_RGBA32_SINT 0x00000003 +#define NVE4_IMAGE_FORMAT_RGBA32_UINT 0x00000004 +#define NVE4_IMAGE_FORMAT_RGBA16_UNORM 0x00000008 +#define NVE4_IMAGE_FORMAT_RGBA16_SNORM 0x00000009 +#define NVE4_IMAGE_FORMAT_RGBA16_SINT 0x0000000a +#define NVE4_IMAGE_FORMAT_RGBA16_UINT 0x0000000b +#define NVE4_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c +#define NVE4_IMAGE_FORMAT_RG32_FLOAT 0x0000000d +#define NVE4_IMAGE_FORMAT_RG32_SINT 0x0000000e +#define NVE4_IMAGE_FORMAT_RG32_UINT 0x0000000f +#define NVE4_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013 +#define NVE4_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015 +#define NVE4_IMAGE_FORMAT_RGBA8_UNORM 0x00000018 +#define NVE4_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a +#define NVE4_IMAGE_FORMAT_RGBA8_SINT 0x0000001b +#define NVE4_IMAGE_FORMAT_RGBA8_UINT 0x0000001c +#define NVE4_IMAGE_FORMAT_RG16_UNORM 0x0000001d +#define NVE4_IMAGE_FORMAT_RG16_SNORM 0x0000001e +#define NVE4_IMAGE_FORMAT_RG16_SINT 0x0000001f +#define NVE4_IMAGE_FORMAT_RG16_UINT 0x00000020 +#define NVE4_IMAGE_FORMAT_RG16_FLOAT 0x00000021 +#define NVE4_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024 +#define NVE4_IMAGE_FORMAT_R32_SINT 0x00000027 +#define NVE4_IMAGE_FORMAT_R32_UINT 0x00000028 +#define NVE4_IMAGE_FORMAT_R32_FLOAT 0x00000029 +#define NVE4_IMAGE_FORMAT_RG8_UNORM 0x0000002e +#define NVE4_IMAGE_FORMAT_RG8_SNORM 0x0000002f +#define NVE4_IMAGE_FORMAT_RG8_SINT 0x00000030 +#define NVE4_IMAGE_FORMAT_RG8_UINT 0x00000031 +#define NVE4_IMAGE_FORMAT_R16_UNORM 0x00000032 +#define NVE4_IMAGE_FORMAT_R16_SNORM 0x00000033 +#define NVE4_IMAGE_FORMAT_R16_SINT 0x00000034 +#define NVE4_IMAGE_FORMAT_R16_UINT 0x00000035 +#define NVE4_IMAGE_FORMAT_R16_FLOAT 0x00000036 +#define NVE4_IMAGE_FORMAT_R8_UNORM 0x00000037 +#define NVE4_IMAGE_FORMAT_R8_SNORM 0x00000038 +#define NVE4_IMAGE_FORMAT_R8_SINT 0x00000039 +#define NVE4_IMAGE_FORMAT_R8_UINT 0x0000003a +#define NV50_QUERY__SIZE 0x00000010 +#define NV50_QUERY_COUNTER 0x00000000 + +#define NV50_QUERY_RES 0x00000004 + +#define NV50_QUERY_TIME 0x00000008 + + +#endif /* NV50_DEFS_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_draw.c b/src/gallium/drivers/nouveau/nv50/nv50_draw.c new file mode 100644 index 00000000000..fa68cd8ee6a --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_draw.c @@ -0,0 +1,88 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "draw/draw_pipe.h" + +#include "nv50/nv50_context.h" + +struct nv50_render_stage { + struct draw_stage stage; + struct nv50_context *nv50; +}; + +static INLINE struct nv50_render_stage * +nv50_render_stage(struct draw_stage *stage) +{ + return (struct nv50_render_stage *)stage; +} + +static void +nv50_render_point(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_line(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_flush(struct draw_stage *stage, unsigned flags) +{ +} + +static void +nv50_render_reset_stipple_counter(struct draw_stage *stage) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_destroy(struct draw_stage *stage) +{ + FREE(stage); +} + +struct draw_stage * +nv50_draw_render_stage(struct nv50_context *nv50) +{ + struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage); + + rs->nv50 = nv50; + rs->stage.draw = nv50->draw; + rs->stage.destroy = nv50_render_destroy; + rs->stage.point = nv50_render_point; + rs->stage.line = nv50_render_line; + rs->stage.tri = nv50_render_tri; + rs->stage.flush = nv50_render_flush; + rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter; + + return &rs->stage; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c new file mode 100644 index 00000000000..0a7e812ba13 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c @@ -0,0 +1,504 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#if NOUVEAU_DRIVER == 0xc0 +# include "nvc0/nvc0_screen.h" +# include "nvc0/nvc0_3d.xml.h" +#else +# include "nv50/nv50_screen.h" +# include "nv50/nv50_3d.xml.h" +#endif +#include "nv50/nv50_texture.xml.h" +#include "nv50/nv50_defs.xml.h" + +#include "pipe/p_defines.h" + +/* Abbreviated usage masks: + * T: texturing + * R: render target + * B: render target, blendable + * C: render target (color), blendable only on nvc0 + * D: scanout/display target, blendable + * Z: depth/stencil + * V: vertex fetch + * I: image / surface, implies T + */ +#define U_V PIPE_BIND_VERTEX_BUFFER +#define U_T PIPE_BIND_SAMPLER_VIEW +#define U_I PIPE_BIND_SHADER_RESOURCE | PIPE_BIND_COMPUTE_RESOURCE +#define U_TR PIPE_BIND_RENDER_TARGET | U_T +#define U_IR U_TR | U_I +#define U_TB PIPE_BIND_BLENDABLE | U_TR +#define U_IB PIPE_BIND_BLENDABLE | U_IR +#define U_TD PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB +#define U_TZ PIPE_BIND_DEPTH_STENCIL | U_T +#define U_TV U_V | U_T +#define U_TRV U_V | U_TR +#define U_IRV U_V | U_IR +#define U_TBV U_V | U_TB +#define U_IBV U_V | U_IB +#define U_TDV U_V | U_TD +#if NOUVEAU_DRIVER == 0xc0 +# define U_TC U_TB +# define U_IC U_IB +# define U_TCV U_TBV +# define U_ICV U_IBV +# define U_tV U_TV +#else +# define U_TC U_TR +# define U_IC U_IR +# define U_TCV U_TRV +# define U_ICV U_IRV +# define U_tV U_V +#endif + +#define NV50_SURFACE_FORMAT_NONE 0 +#define NV50_ZETA_FORMAT_NONE 0 + +/* for vertex buffers: */ +#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8 +#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16 +#define NV50_TIC_0_FMT_32_32_32 NVC0_TIC_0_FMT_32_32_32 + +#if NOUVEAU_DRIVER == 0xc0 +# define NVXX_3D_VAF_SIZE(s) NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##s +# define NVXX_3D_VAF_TYPE(t) NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_##t +#else +# define NVXX_3D_VAF_SIZE(s) NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_##s +# define NVXX_3D_VAF_TYPE(t) NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_##t +#endif + +#define TBLENT_A_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u, br) \ + [PIPE_FORMAT_##pf] = { \ + sf, \ + (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \ + (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \ + (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \ + (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \ + (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ + (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ + (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ + (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ + NV50_TIC_0_FMT_##sz, \ + NVXX_3D_VAF_SIZE(sz) | \ + NVXX_3D_VAF_TYPE(t0) | (br << 31), \ + U_##u \ + } + +#define TBLENT_B_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \ + [PIPE_FORMAT_##pf] = { \ + sf, \ + (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \ + (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \ + (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \ + (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \ + (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ + (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ + (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ + (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ + NV50_TIC_0_FMT_##sz, 0, U_##u \ + } + +#define C4A(p, n, r, g, b, a, t, s, u, br) \ + TBLENT_A_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u, br) +#define C4B(p, n, r, g, b, a, t, s, u) \ + TBLENT_B_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u) + +#define ZXB(p, n, r, g, b, a, t, s, u) \ + TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \ + r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u) +#define ZSB(p, n, r, g, b, a, t, s, u) \ + TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \ + r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u) +#define SZB(p, n, r, g, b, a, t, s, u) \ + TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \ + r, g, b, ONE_FLOAT, UINT, t, UINT, UINT, s, u) + +#define F3A(p, n, r, g, b, a, t, s, u) \ + C4A(p, n, r, g, b, ONE_FLOAT, t, s, u, 0) +#define I3A(p, n, r, g, b, a, t, s, u) \ + C4A(p, n, r, g, b, ONE_INT, t, s, u, 0) +#define F3B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, r, g, b, ONE_FLOAT, t, s, u) +#define I3B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, r, g, b, ONE_INT, t, s, u) + +#define F2A(p, n, r, g, b, a, t, s, u) \ + C4A(p, n, r, g, ZERO, ONE_FLOAT, t, s, u, 0) +#define I2A(p, n, r, g, b, a, t, s, u) \ + C4A(p, n, r, g, ZERO, ONE_INT, t, s, u, 0) +#define F2B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, r, g, ZERO, ONE_FLOAT, t, s, u) +#define I2B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, r, g, ZERO, ONE_INT, t, s, u) + +#define F1A(p, n, r, g, b, a, t, s, u) \ + C4A(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u, 0) +#define I1A(p, n, r, g, b, a, t, s, u) \ + C4A(p, n, r, ZERO, ZERO, ONE_INT, t, s, u, 0) +#define F1B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u) +#define I1B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, r, ZERO, ZERO, ONE_INT, t, s, u) + +#define A1B(p, n, r, g, b, a, t, s, u) \ + C4B(p, n, ZERO, ZERO, ZERO, a, t, s, u) + +#if NOUVEAU_DRIVER == 0xc0 +const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = +#else +const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = +#endif +{ + C4A(B8G8R8A8_UNORM, BGRA8_UNORM, C2, C1, C0, C3, UNORM, 8_8_8_8, TDV, 1), + F3A(B8G8R8X8_UNORM, BGRX8_UNORM, C2, C1, C0, xx, UNORM, 8_8_8_8, TD), + C4A(B8G8R8A8_SRGB, BGRA8_SRGB, C2, C1, C0, C3, UNORM, 8_8_8_8, TD, 1), + F3A(B8G8R8X8_SRGB, BGRX8_SRGB, C2, C1, C0, xx, UNORM, 8_8_8_8, TD), + C4A(R8G8B8A8_UNORM, RGBA8_UNORM, C0, C1, C2, C3, UNORM, 8_8_8_8, IBV, 0), + F3A(R8G8B8X8_UNORM, RGBX8_UNORM, C0, C1, C2, xx, UNORM, 8_8_8_8, TB), + C4A(R8G8B8A8_SRGB, RGBA8_SRGB, C0, C1, C2, C3, UNORM, 8_8_8_8, TB, 0), + F3B(R8G8B8X8_SRGB, RGBX8_SRGB, C0, C1, C2, xx, UNORM, 8_8_8_8, TB), + + ZXB(Z16_UNORM, Z16_UNORM, C0, C0, C0, xx, UNORM, Z16, TZ), + ZXB(Z32_FLOAT, Z32_FLOAT, C0, C0, C0, xx, FLOAT, Z32, TZ), + ZXB(Z24X8_UNORM, Z24_X8_UNORM, C0, C0, C0, xx, UNORM, Z24_X8, TZ), + ZSB(Z24_UNORM_S8_UINT, Z24_S8_UNORM, C0, C0, C0, xx, UNORM, Z24_S8, TZ), + ZSB(X24S8_UINT, NONE, C1, C1, C1, xx, UNORM, Z24_S8, T), + SZB(S8_UINT_Z24_UNORM, S8_Z24_UNORM, C1, C1, C1, xx, UNORM, S8_Z24, TZ), + SZB(S8X24_UINT, NONE, C0, C0, C0, xx, UNORM, S8_Z24, T), + ZSB(Z32_FLOAT_S8X24_UINT, Z32_S8_X24_FLOAT, C0, C0, C0, xx, FLOAT, + Z32_S8_X24, TZ), + ZSB(X32_S8X24_UINT, NONE, C1, C1, C1, xx, FLOAT, Z32_S8_X24, T), + + F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, T), + C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TB), + F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TB), + C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T), + F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T), + F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T), + + C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2, + IBV, 0), + C4A(B10G10R10A2_UNORM, BGR10_A2_UNORM, C2, C1, C0, C3, UNORM, 10_10_10_2, + TBV, 1), + C4A(R10G10B10A2_SNORM, NONE, C0, C1, C2, C3, SNORM, 10_10_10_2, TV, 0), + C4A(B10G10R10A2_SNORM, NONE, C2, C1, C0, C3, SNORM, 10_10_10_2, TV, 1), + + F3B(R11G11B10_FLOAT, R11G11B10_FLOAT, C0, C1, C2, xx, FLOAT, 11_11_10, IB), + + F3B(L8_UNORM, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB), + F3B(L8_SRGB, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB), + F3B(L8_SNORM, R8_SNORM, C0, C0, C0, xx, SNORM, 8, TC), + I3B(L8_SINT, R8_SINT, C0, C0, C0, xx, SINT, 8, TR), + I3B(L8_UINT, R8_UINT, C0, C0, C0, xx, UINT, 8, TR), + F3B(L16_UNORM, R16_UNORM, C0, C0, C0, xx, UNORM, 16, TC), + F3B(L16_SNORM, R16_SNORM, C0, C0, C0, xx, SNORM, 16, TC), + F3B(L16_FLOAT, R16_FLOAT, C0, C0, C0, xx, FLOAT, 16, TB), + I3B(L16_SINT, R16_SINT, C0, C0, C0, xx, SINT, 16, TR), + I3B(L16_UINT, R16_UINT, C0, C0, C0, xx, UINT, 16, TR), + F3B(L32_FLOAT, R32_FLOAT, C0, C0, C0, xx, FLOAT, 32, TB), + I3B(L32_SINT, R32_SINT, C0, C0, C0, xx, SINT, 32, TR), + I3B(L32_UINT, R32_UINT, C0, C0, C0, xx, UINT, 32, TR), + + C4B(I8_UNORM, R8_UNORM, C0, C0, C0, C0, UNORM, 8, TR), + C4B(I8_SNORM, R8_SNORM, C0, C0, C0, C0, SNORM, 8, TR), + C4B(I8_SINT, R8_SINT, C0, C0, C0, C0, SINT, 8, TR), + C4B(I8_UINT, R8_UINT, C0, C0, C0, C0, UINT, 8, TR), + C4B(I16_UNORM, R16_UNORM, C0, C0, C0, C0, UNORM, 16, TR), + C4B(I16_SNORM, R16_SNORM, C0, C0, C0, C0, SNORM, 16, TR), + C4B(I16_FLOAT, R16_FLOAT, C0, C0, C0, C0, FLOAT, 16, TR), + C4B(I16_SINT, R16_SINT, C0, C0, C0, C0, SINT, 16, TR), + C4B(I16_UINT, R16_UINT, C0, C0, C0, C0, UINT, 16, TR), + C4B(I32_FLOAT, R32_FLOAT, C0, C0, C0, C0, FLOAT, 32, TR), + C4B(I32_SINT, R32_SINT, C0, C0, C0, C0, SINT, 32, TR), + C4B(I32_UINT, R32_UINT, C0, C0, C0, C0, UINT, 32, TR), + + A1B(A8_UNORM, A8_UNORM, xx, xx, xx, C0, UNORM, 8, TB), + A1B(A8_SNORM, R8_SNORM, xx, xx, xx, C0, SNORM, 8, T), + A1B(A8_SINT, R8_SINT, xx, xx, xx, C0, SINT, 8, T), + A1B(A8_UINT, R8_UINT, xx, xx, xx, C0, UINT, 8, T), + A1B(A16_UNORM, R16_UNORM, xx, xx, xx, C0, UNORM, 16, T), + A1B(A16_SNORM, R16_SNORM, xx, xx, xx, C0, SNORM, 16, T), + A1B(A16_FLOAT, R16_FLOAT, xx, xx, xx, C0, FLOAT, 16, T), + A1B(A16_SINT, R16_SINT, xx, xx, xx, C0, SINT, 16, T), + A1B(A16_UINT, R16_UINT, xx, xx, xx, C0, UINT, 16, T), + A1B(A32_FLOAT, R32_FLOAT, xx, xx, xx, C0, FLOAT, 32, T), + A1B(A32_SINT, R32_SINT, xx, xx, xx, C0, SINT, 32, T), + A1B(A32_UINT, R32_UINT, xx, xx, xx, C0, UINT, 32, T), + + C4B(L4A4_UNORM, NONE, C0, C0, C0, C1, UNORM, 4_4, T), + C4B(L8A8_UNORM, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T), + C4B(L8A8_SNORM, RG8_SNORM, C0, C0, C0, C1, SNORM, 8_8, T), + C4B(L8A8_SRGB, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T), + C4B(L8A8_SINT, RG8_SINT, C0, C0, C0, C1, SINT, 8_8, T), + C4B(L8A8_UINT, RG8_UINT, C0, C0, C0, C1, UINT, 8_8, T), + C4B(L16A16_UNORM, RG16_UNORM, C0, C0, C0, C1, UNORM, 16_16, T), + C4B(L16A16_SNORM, RG16_SNORM, C0, C0, C0, C1, SNORM, 16_16, T), + C4B(L16A16_FLOAT, RG16_FLOAT, C0, C0, C0, C1, FLOAT, 16_16, T), + C4B(L16A16_SINT, RG16_SINT, C0, C0, C0, C1, SINT, 16_16, T), + C4B(L16A16_UINT, RG16_UINT, C0, C0, C0, C1, UINT, 16_16, T), + C4B(L32A32_FLOAT, RG32_FLOAT, C0, C0, C0, C1, FLOAT, 32_32, T), + C4B(L32A32_SINT, RG32_SINT, C0, C0, C0, C1, SINT, 32_32, T), + C4B(L32A32_UINT, RG32_UINT, C0, C0, C0, C1, UINT, 32_32, T), + + F3B(DXT1_RGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T), + F3B(DXT1_SRGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T), + C4B(DXT1_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T), + C4B(DXT1_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T), + C4B(DXT3_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T), + C4B(DXT3_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T), + C4B(DXT5_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T), + C4B(DXT5_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T), + + F1B(RGTC1_UNORM, NONE, C0, xx, xx, xx, UNORM, RGTC1, T), + F1B(RGTC1_SNORM, NONE, C0, xx, xx, xx, SNORM, RGTC1, T), + F2B(RGTC2_UNORM, NONE, C0, C1, xx, xx, UNORM, RGTC2, T), + F2B(RGTC2_SNORM, NONE, C0, C1, xx, xx, SNORM, RGTC2, T), + F3B(LATC1_UNORM, NONE, C0, C0, C0, xx, UNORM, RGTC1, T), + F3B(LATC1_SNORM, NONE, C0, C0, C0, xx, SNORM, RGTC1, T), + C4B(LATC2_UNORM, NONE, C0, C0, C0, C1, UNORM, RGTC2, T), + C4B(LATC2_SNORM, NONE, C0, C0, C0, C1, SNORM, RGTC2, T), + + C4A(R32G32B32A32_FLOAT, RGBA32_FLOAT, C0, C1, C2, C3, FLOAT, 32_32_32_32, + IBV, 0), + C4A(R32G32B32A32_UNORM, NONE, C0, C1, C2, C3, UNORM, 32_32_32_32, TV, 0), + C4A(R32G32B32A32_SNORM, NONE, C0, C1, C2, C3, SNORM, 32_32_32_32, TV, 0), + C4A(R32G32B32A32_SINT, RGBA32_SINT, C0, C1, C2, C3, SINT, 32_32_32_32, + IRV, 0), + C4A(R32G32B32A32_UINT, RGBA32_UINT, C0, C1, C2, C3, UINT, 32_32_32_32, + IRV, 0), + F3B(R32G32B32X32_FLOAT, RGBX32_FLOAT, C0, C1, C2, xx, FLOAT, 32_32_32_32, TB), + I3B(R32G32B32X32_SINT, RGBX32_SINT, C0, C1, C2, xx, SINT, 32_32_32_32, TR), + I3B(R32G32B32X32_UINT, RGBX32_UINT, C0, C1, C2, xx, UINT, 32_32_32_32, TR), + + F2A(R32G32_FLOAT, RG32_FLOAT, C0, C1, xx, xx, FLOAT, 32_32, IBV), + F2A(R32G32_UNORM, NONE, C0, C1, xx, xx, UNORM, 32_32, TV), + F2A(R32G32_SNORM, NONE, C0, C1, xx, xx, SNORM, 32_32, TV), + I2A(R32G32_SINT, RG32_SINT, C0, C1, xx, xx, SINT, 32_32, IRV), + I2A(R32G32_UINT, RG32_UINT, C0, C1, xx, xx, UINT, 32_32, IRV), + + F1A(R32_FLOAT, R32_FLOAT, C0, xx, xx, xx, FLOAT, 32, IBV), + F1A(R32_UNORM, NONE, C0, xx, xx, xx, UNORM, 32, TV), + F1A(R32_SNORM, NONE, C0, xx, xx, xx, SNORM, 32, TV), + I1A(R32_SINT, R32_SINT, C0, xx, xx, xx, SINT, 32, IRV), + I1A(R32_UINT, R32_UINT, C0, xx, xx, xx, UINT, 32, IRV), + + C4A(R16G16B16A16_FLOAT, RGBA16_FLOAT, C0, C1, C2, C3, FLOAT, 16_16_16_16, + IBV, 0), + C4A(R16G16B16A16_UNORM, RGBA16_UNORM, C0, C1, C2, C3, UNORM, 16_16_16_16, + ICV, 0), + C4A(R16G16B16A16_SNORM, RGBA16_SNORM, C0, C1, C2, C3, SNORM, 16_16_16_16, + ICV, 0), + C4A(R16G16B16A16_SINT, RGBA16_SINT, C0, C1, C2, C3, SINT, 16_16_16_16, + IRV, 0), + C4A(R16G16B16A16_UINT, RGBA16_UINT, C0, C1, C2, C3, UINT, 16_16_16_16, + IRV, 0), + F3B(R16G16B16X16_FLOAT, RGBX16_FLOAT, C0, C1, C2, xx, FLOAT, 16_16_16_16, TB), + F3B(R16G16B16X16_UNORM, RGBA16_UNORM, C0, C1, C2, xx, UNORM, 16_16_16_16, T), + F3B(R16G16B16X16_SNORM, RGBA16_SNORM, C0, C1, C2, xx, SNORM, 16_16_16_16, T), + I3B(R16G16B16X16_SINT, RGBA16_SINT, C0, C1, C2, xx, SINT, 16_16_16_16, T), + I3B(R16G16B16X16_UINT, RGBA16_UINT, C0, C1, C2, xx, UINT, 16_16_16_16, T), + + F2A(R16G16_FLOAT, RG16_FLOAT, C0, C1, xx, xx, FLOAT, 16_16, IBV), + F2A(R16G16_UNORM, RG16_UNORM, C0, C1, xx, xx, UNORM, 16_16, ICV), + F2A(R16G16_SNORM, RG16_SNORM, C0, C1, xx, xx, SNORM, 16_16, ICV), + I2A(R16G16_SINT, RG16_SINT, C0, C1, xx, xx, SINT, 16_16, IRV), + I2A(R16G16_UINT, RG16_UINT, C0, C1, xx, xx, UINT, 16_16, IRV), + + F1A(R16_FLOAT, R16_FLOAT, C0, xx, xx, xx, FLOAT, 16, IBV), + F1A(R16_UNORM, R16_UNORM, C0, xx, xx, xx, UNORM, 16, ICV), + F1A(R16_SNORM, R16_SNORM, C0, xx, xx, xx, SNORM, 16, ICV), + I1A(R16_SINT, R16_SINT, C0, xx, xx, xx, SINT, 16, IRV), + I1A(R16_UINT, R16_UINT, C0, xx, xx, xx, UINT, 16, IRV), + + C4A(R8G8B8A8_SNORM, RGBA8_SNORM, C0, C1, C2, C3, SNORM, 8_8_8_8, ICV, 0), + C4A(R8G8B8A8_SINT, RGBA8_SINT, C0, C1, C2, C3, SINT, 8_8_8_8, IRV, 0), + C4A(R8G8B8A8_UINT, RGBA8_UINT, C0, C1, C2, C3, UINT, 8_8_8_8, IRV, 0), + F3B(R8G8B8X8_SNORM, RGBA8_SNORM, C0, C1, C2, xx, SNORM, 8_8_8_8, T), + I3B(R8G8B8X8_SINT, RGBA8_SINT, C0, C1, C2, xx, SINT, 8_8_8_8, T), + I3B(R8G8B8X8_UINT, RGBA8_UINT, C0, C1, C2, xx, UINT, 8_8_8_8, T), + + F2A(R8G8_UNORM, RG8_UNORM, C0, C1, xx, xx, UNORM, 8_8, IBV), + F2A(R8G8_SNORM, RG8_SNORM, C0, C1, xx, xx, SNORM, 8_8, ICV), + I2A(R8G8_SINT, RG8_SINT, C0, C1, xx, xx, SINT, 8_8, IRV), + I2A(R8G8_UINT, RG8_UINT, C0, C1, xx, xx, UINT, 8_8, IRV), + + F1A(R8_UNORM, R8_UNORM, C0, xx, xx, xx, UNORM, 8, IBV), + F1A(R8_SNORM, R8_SNORM, C0, xx, xx, xx, SNORM, 8, ICV), + I1A(R8_SINT, R8_SINT, C0, xx, xx, xx, SINT, 8, IRV), + I1A(R8_UINT, R8_UINT, C0, xx, xx, xx, UINT, 8, IRV), + + F3B(R8G8_B8G8_UNORM, NONE, C0, C1, C2, xx, UNORM, U8_YA8_V8_YB8, T), + F3B(G8R8_B8R8_UNORM, NONE, C1, C0, C2, xx, UNORM, U8_YA8_V8_YB8, T), + F3B(G8R8_G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, YA8_U8_YB8_V8, T), + F3B(R8G8_R8B8_UNORM, NONE, C1, C0, C2, xx, UNORM, YA8_U8_YB8_V8, T), + + F1B(R1_UNORM, BITMAP, C0, xx, xx, xx, UNORM, BITMAP, T), + + C4B(R4A4_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 4_4, T), + C4B(R8A8_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 8_8, T), + C4B(A4R4_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 4_4, T), + C4B(A8R8_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 8_8, T), + + TBLENT_B_(R8SG8SB8UX8U_NORM, 0, + C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 8_8_8_8, T), + TBLENT_B_(R5SG5SB6U_NORM, 0, + C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 5_5_6, T), + + /* vertex-only formats: */ + + C4A(R32G32B32A32_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 32_32_32_32, V, 0), + C4A(R32G32B32A32_USCALED, NONE, C0, C1, C2, C3, USCALED, 32_32_32_32, V, 0), + F3A(R32G32B32_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, tV), + F3A(R32G32B32_UNORM, NONE, C0, C1, C2, xx, UNORM, 32_32_32, V), + F3A(R32G32B32_SNORM, NONE, C0, C1, C2, xx, SNORM, 32_32_32, V), + I3A(R32G32B32_SINT, NONE, C0, C1, C2, xx, SINT, 32_32_32, tV), + I3A(R32G32B32_UINT, NONE, C0, C1, C2, xx, UINT, 32_32_32, tV), + F3A(R32G32B32_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 32_32_32, V), + F3A(R32G32B32_USCALED, NONE, C0, C1, C2, xx, USCALED, 32_32_32, V), + F2A(R32G32_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 32_32, V), + F2A(R32G32_USCALED, NONE, C0, C1, xx, xx, USCALED, 32_32, V), + F1A(R32_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 32, V), + F1A(R32_USCALED, NONE, C0, xx, xx, xx, USCALED, 32, V), + + C4A(R16G16B16A16_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 16_16_16_16, V, 0), + C4A(R16G16B16A16_USCALED, NONE, C0, C1, C2, C3, USCALED, 16_16_16_16, V, 0), + F3A(R16G16B16_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 16_16_16, V), + F3A(R16G16B16_UNORM, NONE, C0, C1, C2, xx, UNORM, 16_16_16, V), + F3A(R16G16B16_SNORM, NONE, C0, C1, C2, xx, SNORM, 16_16_16, V), + I3A(R16G16B16_SINT, NONE, C0, C1, C2, xx, SINT, 16_16_16, V), + I3A(R16G16B16_UINT, NONE, C0, C1, C2, xx, UINT, 16_16_16, V), + F3A(R16G16B16_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 16_16_16, V), + F3A(R16G16B16_USCALED, NONE, C0, C1, C2, xx, USCALED, 16_16_16, V), + F2A(R16G16_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 16_16, V), + F2A(R16G16_USCALED, NONE, C0, C1, xx, xx, USCALED, 16_16, V), + F1A(R16_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 16, V), + F1A(R16_USCALED, NONE, C0, xx, xx, xx, USCALED, 16, V), + + C4A(R8G8B8A8_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 8_8_8_8, V, 0), + C4A(R8G8B8A8_USCALED, NONE, C0, C1, C2, C3, USCALED, 8_8_8_8, V, 0), + F3A(R8G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, 8_8_8, V), + F3A(R8G8B8_SNORM, NONE, C0, C1, C2, xx, SNORM, 8_8_8, V), + I2A(R8G8B8_SINT, NONE, C0, C1, C2, xx, SINT, 8_8_8, V), + I2A(R8G8B8_UINT, NONE, C0, C1, C2, xx, UINT, 8_8_8, V), + F3A(R8G8B8_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 8_8_8, V), + F3A(R8G8B8_USCALED, NONE, C0, C1, C2, xx, USCALED, 8_8_8, V), + F2A(R8G8_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 8_8, V), + F2A(R8G8_USCALED, NONE, C0, C1, xx, xx, USCALED, 8_8, V), + F1A(R8_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 8, V), + F1A(R8_USCALED, NONE, C0, xx, xx, xx, USCALED, 8, V), + + /* FIXED types: not supported natively, converted on VBO push */ + + C4B(R32G32B32A32_FIXED, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V), + F3B(R32G32B32_FIXED, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V), + F2B(R32G32_FIXED, NONE, C0, C1, xx, xx, FLOAT, 32_32, V), + F1B(R32_FIXED, NONE, C0, xx, xx, xx, FLOAT, 32, V), + + C4B(R64G64B64A64_FLOAT, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V), + F3B(R64G64B64_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V), + F2B(R64G64_FLOAT, NONE, C0, C1, xx, xx, FLOAT, 32_32, V), + F1B(R64_FLOAT, NONE, C0, xx, xx, xx, FLOAT, 32, V), +}; + +#if 0 +const uint8_t nv50_rt_format_map[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_Z16_UNORM] = NV50_ZETA_FORMAT_Z16_UNORM, + [PIPE_FORMAT_Z24X8_UNORM] = NV50_ZETA_FORMAT_Z24_X8_UNORM, + [PIPE_FORMAT_Z24_UNORM_S8_UINT] = NV50_ZETA_FORMAT_Z24_S8_UNORM, + [PIPE_FORMAT_S8_UINT_Z24_UNORM] = NV50_ZETA_FORMAT_S8_Z24_UNORM, + [PIPE_FORMAT_Z32_FLOAT] = NV50_ZETA_FORMAT_Z32_FLOAT, + [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT, + + [PIPE_FORMAT_R1_UNORM] = NV50_SURFACE_FORMAT_BITMAP, + + [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV50_SURFACE_FORMAT_RGBA32_FLOAT, + [PIPE_FORMAT_R32G32B32X32_FLOAT] = NV50_SURFACE_FORMAT_RGBX32_FLOAT, + [PIPE_FORMAT_R32G32B32A32_SINT] = NV50_SURFACE_FORMAT_RGBA32_SINT, + [PIPE_FORMAT_R32G32B32X32_SINT] = NV50_SURFACE_FORMAT_RGBX32_SINT, + [PIPE_FORMAT_R32G32B32A32_UINT] = NV50_SURFACE_FORMAT_RGBA32_UINT, + [PIPE_FORMAT_R32G32B32X32_UINT] = NV50_SURFACE_FORMAT_RGBX32_UINT, + + [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV50_SURFACE_FORMAT_RGBA16_FLOAT, + [PIPE_FORMAT_R16G16B16X16_FLOAT] = NV50_SURFACE_FORMAT_RGBX16_FLOAT, + [PIPE_FORMAT_R16G16B16A16_UNORM] = NV50_SURFACE_FORMAT_RGBA16_UNORM, + [PIPE_FORMAT_R16G16B16A16_SNORM] = NV50_SURFACE_FORMAT_RGBA16_SNORM, + [PIPE_FORMAT_R16G16B16A16_SINT] = NV50_SURFACE_FORMAT_RGBA16_SINT, + [PIPE_FORMAT_R16G16B16A16_UINT] = NV50_SURFACE_FORMAT_RGBA16_UINT, + + [PIPE_FORMAT_B8G8R8A8_UNORM] = NV50_SURFACE_FORMAT_BGRA8_UNORM, + [PIPE_FORMAT_R8G8B8A8_UNORM] = NV50_SURFACE_FORMAT_RGBA8_UNORM, + [PIPE_FORMAT_B8G8R8X8_UNORM] = NV50_SURFACE_FORMAT_BGRX8_UNORM, + [PIPE_FORMAT_R8G8B8X8_UNORM] = NV50_SURFACE_FORMAT_RGBX8_UNORM, + [PIPE_FORMAT_B8G8R8A8_SRGB] = NV50_SURFACE_FORMAT_BGRA8_SRGB, + [PIPE_FORMAT_R8G8B8A8_SRGB] = NV50_SURFACE_FORMAT_RGBA8_SRGB, + [PIPE_FORMAT_B8G8R8X8_SRGB] = NV50_SURFACE_FORMAT_BGRX8_SRGB, + [PIPE_FORMAT_R8G8B8X8_SRGB] = NV50_SURFACE_FORMAT_RGBX8_SRGB, + [PIPE_FORMAT_R8G8B8A8_SNORM] = NV50_SURFACE_FORMAT_RGBA8_SNORM, + [PIPE_FORMAT_R8G8B8A8_SINT] = NV50_SURFACE_FORMAT_RGBA8_SINT, + [PIPE_FORMAT_R8G8B8A8_UINT] = NV50_SURFACE_FORMAT_RGBA8_UINT, + + [PIPE_FORMAT_R11G11B10_FLOAT] = NV50_SURFACE_FORMAT_R11G11B10_FLOAT, + + [PIPE_FORMAT_B10G10R10A2_UNORM] = NV50_SURFACE_FORMAT_BGR10_A2_UNORM, + [PIPE_FORMAT_R10G10B10A2_UNORM] = NV50_SURFACE_FORMAT_RGB10_A2_UNORM, + [PIPE_FORMAT_R10G10B10A2_UINT] = NV50_SURFACE_FORMAT_RGB10_A2_UINT, + + [PIPE_FORMAT_B5G6R5_UNORM] = NV50_SURFACE_FORMAT_B5G6R5_UNORM, + + [PIPE_FORMAT_B5G5R5A1_UNORM] = NV50_SURFACE_FORMAT_BGR5_A1_UNORM, + [PIPE_FORMAT_B5G5R5X1_UNORM] = NV50_SURFACE_FORMAT_BGR5_X1_UNORM, + + [PIPE_FORMAT_R32G32_FLOAT] = NV50_SURFACE_FORMAT_RG32_FLOAT, + [PIPE_FORMAT_R32G32_SINT] = NV50_SURFACE_FORMAT_RG32_SINT, + [PIPE_FORMAT_R32G32_UINT] = NV50_SURFACE_FORMAT_RG32_UINT, + + [PIPE_FORMAT_R16G16_FLOAT] = NV50_SURFACE_FORMAT_RG16_FLOAT, + [PIPE_FORMAT_R16G16_UNORM] = NV50_SURFACE_FORMAT_RG16_UNORM, + [PIPE_FORMAT_R16G16_SNORM] = NV50_SURFACE_FORMAT_RG16_SNORM, + [PIPE_FORMAT_R16G16_SINT] = NV50_SURFACE_FORMAT_RG16_SINT, + [PIPE_FORMAT_R16G16_UINT] = NV50_SURFACE_FORMAT_RG16_UINT, + + [PIPE_FORMAT_R8G8_UNORM] = NV50_SURFACE_FORMAT_RG8_UNORM, + [PIPE_FORMAT_R8G8_SNORM] = NV50_SURFACE_FORMAT_RG8_SNORM, + [PIPE_FORMAT_R8G8_SINT] = NV50_SURFACE_FORMAT_RG8_SINT, + [PIPE_FORMAT_R8G8_UINT] = NV50_SURFACE_FORMAT_RG8_UINT, + + [PIPE_FORMAT_R32_FLOAT] = NV50_SURFACE_FORMAT_R32_FLOAT, + [PIPE_FORMAT_R32_SINT] = NV50_SURFACE_FORMAT_R32_SINT, + [PIPE_FORMAT_R32_UINT] = NV50_SURFACE_FORMAT_R32_UINT, + + [PIPE_FORMAT_R16_FLOAT] = NV50_SURFACE_FORMAT_R16_FLOAT, + [PIPE_FORMAT_R16_UNORM] = NV50_SURFACE_FORMAT_R16_UNORM, + [PIPE_FORMAT_R16_SNORM] = NV50_SURFACE_FORMAT_R16_SNORM, + [PIPE_FORMAT_R16_SINT] = NV50_SURFACE_FORMAT_R16_SINT, + [PIPE_FORMAT_R16_UINT] = NV50_SURFACE_FORMAT_R16_UINT, + + [PIPE_FORMAT_R8_UNORM] = NV50_SURFACE_FORMAT_R8_UNORM, + [PIPE_FORMAT_R8_SNORM] = NV50_SURFACE_FORMAT_R8_SNORM, + [PIPE_FORMAT_R8_SINT] = NV50_SURFACE_FORMAT_R8_SINT, + [PIPE_FORMAT_R8_UINT] = NV50_SURFACE_FORMAT_R8_UINT, + + [PIPE_FORMAT_A8_UNORM] = NV50_SURFACE_FORMAT_A8_UNORM +}; +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c new file mode 100644 index 00000000000..513d8f96aac --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c @@ -0,0 +1,498 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nv50/nv50_context.h" +#include "nv50/nv50_resource.h" + +uint32_t +nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz) +{ + uint32_t tile_mode = 0x000; + + if (ny > 64) tile_mode = 0x040; /* height 128 tiles */ + else + if (ny > 32) tile_mode = 0x030; /* height 64 tiles */ + else + if (ny > 16) tile_mode = 0x020; /* height 32 tiles */ + else + if (ny > 8) tile_mode = 0x010; /* height 16 tiles */ + + if (nz == 1) + return tile_mode; + else + if (tile_mode > 0x020) + tile_mode = 0x020; + + if (nz > 16 && tile_mode < 0x020) + return tile_mode | 0x500; /* depth 32 tiles */ + if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */ + if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */ + if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */ + + return tile_mode | 0x100; +} + +static uint32_t +nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz) +{ + return nv50_tex_choose_tile_dims_helper(nx, ny * 2, nz); +} + +static uint32_t +nv50_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed) +{ + const unsigned ms = mt->ms_x + mt->ms_y; + + uint32_t tile_flags; + + if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR)) + return 0; + if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR)) + return 0; + + switch (mt->base.base.format) { + case PIPE_FORMAT_Z16_UNORM: + tile_flags = 0x6c + ms; + break; + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + tile_flags = 0x18 + ms; + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + tile_flags = 0x128 + ms; + break; + case PIPE_FORMAT_Z32_FLOAT: + tile_flags = 0x40 + ms; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + tile_flags = 0x60 + ms; + break; + default: + switch (util_format_get_blocksizebits(mt->base.base.format)) { + case 128: + assert(ms < 3); + tile_flags = 0x74; + break; + case 64: + switch (ms) { + case 2: tile_flags = 0xfc; break; + case 3: tile_flags = 0xfd; break; + default: + tile_flags = 0x70; + break; + } + break; + case 32: + if (mt->base.base.bind & PIPE_BIND_SCANOUT) { + assert(ms == 0); + tile_flags = 0x7a; + } else { + switch (ms) { + case 2: tile_flags = 0xf8; break; + case 3: tile_flags = 0xf9; break; + default: + tile_flags = 0x70; + break; + } + } + break; + case 16: + case 8: + tile_flags = 0x70; + break; + default: + return 0; + } + if (mt->base.base.bind & PIPE_BIND_CURSOR) + tile_flags = 0; + } + + if (!compressed) + tile_flags &= ~0x180; + + return tile_flags; +} + +void +nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt) +{ + struct nv50_miptree *mt = nv50_miptree(pt); + + nouveau_bo_ref(NULL, &mt->base.bo); + + nouveau_fence_ref(NULL, &mt->base.fence); + nouveau_fence_ref(NULL, &mt->base.fence_wr); + + NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_count, -1); + NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_bytes, + -(uint64_t)mt->total_size); + + FREE(mt); +} + +boolean +nv50_miptree_get_handle(struct pipe_screen *pscreen, + struct pipe_resource *pt, + struct winsys_handle *whandle) +{ + struct nv50_miptree *mt = nv50_miptree(pt); + unsigned stride; + + if (!mt || !mt->base.bo) + return FALSE; + + stride = mt->level[0].pitch; + + return nouveau_screen_bo_get_handle(pscreen, + mt->base.bo, + stride, + whandle); +} + +const struct u_resource_vtbl nv50_miptree_vtbl = +{ + nv50_miptree_get_handle, /* get_handle */ + nv50_miptree_destroy, /* resource_destroy */ + nv50_miptree_transfer_map, /* transfer_map */ + u_default_transfer_flush_region, /* transfer_flush_region */ + nv50_miptree_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + +static INLINE boolean +nv50_miptree_init_ms_mode(struct nv50_miptree *mt) +{ + switch (mt->base.base.nr_samples) { + case 8: + mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS8; + mt->ms_x = 2; + mt->ms_y = 1; + break; + case 4: + mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS4; + mt->ms_x = 1; + mt->ms_y = 1; + break; + case 2: + mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS2; + mt->ms_x = 1; + break; + case 1: + case 0: + mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1; + break; + default: + NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples); + return FALSE; + } + return TRUE; +} + +boolean +nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align) +{ + struct pipe_resource *pt = &mt->base.base; + const unsigned blocksize = util_format_get_blocksize(pt->format); + unsigned h = pt->height0; + + if (util_format_is_depth_or_stencil(pt->format)) + return FALSE; + + if ((pt->last_level > 0) || (pt->depth0 > 1) || (pt->array_size > 1)) + return FALSE; + if (mt->ms_x | mt->ms_y) + return FALSE; + + mt->level[0].pitch = align(pt->width0 * blocksize, pitch_align); + + /* Account for very generous prefetch (allocate size as if tiled). */ + h = MAX2(h, 8); + h = util_next_power_of_two(h); + + mt->total_size = mt->level[0].pitch * h; + + return TRUE; +} + +static void +nv50_miptree_init_layout_video(struct nv50_miptree *mt) +{ + const struct pipe_resource *pt = &mt->base.base; + const unsigned blocksize = util_format_get_blocksize(pt->format); + + assert(pt->last_level == 0); + assert(mt->ms_x == 0 && mt->ms_y == 0); + assert(!util_format_is_compressed(pt->format)); + + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; + + mt->level[0].tile_mode = 0x20; + mt->level[0].pitch = align(pt->width0 * blocksize, 64); + mt->total_size = align(pt->height0, 16) * mt->level[0].pitch * (mt->layout_3d ? pt->depth0 : 1); + + if (pt->array_size > 1) { + mt->layer_stride = align(mt->total_size, NV50_TILE_SIZE(0x20)); + mt->total_size = mt->layer_stride * pt->array_size; + } +} + +static void +nv50_miptree_init_layout_tiled(struct nv50_miptree *mt) +{ + struct pipe_resource *pt = &mt->base.base; + unsigned w, h, d, l; + const unsigned blocksize = util_format_get_blocksize(pt->format); + + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; + + w = pt->width0 << mt->ms_x; + h = pt->height0 << mt->ms_y; + + /* For 3D textures, a mipmap is spanned by all the layers, for array + * textures and cube maps, each layer contains its own mipmaps. + */ + d = mt->layout_3d ? pt->depth0 : 1; + + for (l = 0; l <= pt->last_level; ++l) { + struct nv50_miptree_level *lvl = &mt->level[l]; + unsigned tsx, tsy, tsz; + unsigned nbx = util_format_get_nblocksx(pt->format, w); + unsigned nby = util_format_get_nblocksy(pt->format, h); + + lvl->offset = mt->total_size; + + lvl->tile_mode = nv50_tex_choose_tile_dims(nbx, nby, d); + + tsx = NV50_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */ + tsy = NV50_TILE_SIZE_Y(lvl->tile_mode); + tsz = NV50_TILE_SIZE_Z(lvl->tile_mode); + + lvl->pitch = align(nbx * blocksize, tsx); + + mt->total_size += lvl->pitch * align(nby, tsy) * align(d, tsz); + + w = u_minify(w, 1); + h = u_minify(h, 1); + d = u_minify(d, 1); + } + + if (pt->array_size > 1) { + mt->layer_stride = align(mt->total_size, + NV50_TILE_SIZE(mt->level[0].tile_mode)); + mt->total_size = mt->layer_stride * pt->array_size; + } +} + +struct pipe_resource * +nv50_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ) +{ + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); + struct pipe_resource *pt = &mt->base.base; + int ret; + union nouveau_bo_config bo_config; + uint32_t bo_flags; + + if (!mt) + return NULL; + + mt->base.vtbl = &nv50_miptree_vtbl; + *pt = *templ; + pipe_reference_init(&pt->reference, 1); + pt->screen = pscreen; + + if (pt->bind & PIPE_BIND_LINEAR) + pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR; + + bo_config.nv50.memtype = nv50_mt_choose_storage_type(mt, TRUE); + + if (!nv50_miptree_init_ms_mode(mt)) { + FREE(mt); + return NULL; + } + + if (unlikely(pt->flags & NV50_RESOURCE_FLAG_VIDEO)) { + nv50_miptree_init_layout_video(mt); + if (pt->flags & NV50_RESOURCE_FLAG_NOALLOC) { + /* BO allocation done by client */ + return pt; + } + } else + if (bo_config.nv50.memtype != 0) { + nv50_miptree_init_layout_tiled(mt); + } else + if (!nv50_miptree_init_layout_linear(mt, 64)) { + FREE(mt); + return NULL; + } + bo_config.nv50.tile_mode = mt->level[0].tile_mode; + + bo_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP; + if (mt->base.base.bind & (PIPE_BIND_CURSOR | PIPE_BIND_DISPLAY_TARGET)) + bo_flags |= NOUVEAU_BO_CONTIG; + + ret = nouveau_bo_new(dev, bo_flags, 4096, mt->total_size, &bo_config, + &mt->base.bo); + if (ret) { + FREE(mt); + return NULL; + } + mt->base.domain = NOUVEAU_BO_VRAM; + mt->base.address = mt->base.bo->offset; + + return pt; +} + +struct pipe_resource * +nv50_miptree_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + struct nv50_miptree *mt; + unsigned stride; + + /* only supports 2D, non-mipmapped textures for the moment */ + if ((templ->target != PIPE_TEXTURE_2D && + templ->target != PIPE_TEXTURE_RECT) || + templ->last_level != 0 || + templ->depth0 != 1 || + templ->array_size > 1) + return NULL; + + mt = CALLOC_STRUCT(nv50_miptree); + if (!mt) + return NULL; + + mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride); + if (mt->base.bo == NULL) { + FREE(mt); + return NULL; + } + mt->base.domain = NOUVEAU_BO_VRAM; + mt->base.address = mt->base.bo->offset; + + mt->base.base = *templ; + mt->base.vtbl = &nv50_miptree_vtbl; + pipe_reference_init(&mt->base.base.reference, 1); + mt->base.base.screen = pscreen; + mt->level[0].pitch = stride; + mt->level[0].offset = 0; + mt->level[0].tile_mode = mt->base.bo->config.nv50.tile_mode; + + /* no need to adjust bo reference count */ + return &mt->base.base; +} + + +/* Offset of zslice @z from start of level @l. */ +INLINE unsigned +nv50_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z) +{ + const struct pipe_resource *pt = &mt->base.base; + + unsigned tds = NV50_TILE_SHIFT_Z(mt->level[l].tile_mode); + unsigned ths = NV50_TILE_SHIFT_Y(mt->level[l].tile_mode); + + unsigned nby = util_format_get_nblocksy(pt->format, + u_minify(pt->height0, l)); + + /* to next 2D tile slice within a 3D tile */ + unsigned stride_2d = NV50_TILE_SIZE_2D(mt->level[l].tile_mode); + + /* to slice in the next (in z direction) 3D tile */ + unsigned stride_3d = (align(nby, (1 << ths)) * mt->level[l].pitch) << tds; + + return (z & ((1 << tds) - 1)) * stride_2d + (z >> tds) * stride_3d; +} + +/* Surface functions. + */ + +struct nv50_surface * +nv50_surface_from_miptree(struct nv50_miptree *mt, + const struct pipe_surface *templ) +{ + struct pipe_surface *ps; + struct nv50_surface *ns = CALLOC_STRUCT(nv50_surface); + if (!ns) + return NULL; + ps = &ns->base; + + pipe_reference_init(&ps->reference, 1); + pipe_resource_reference(&ps->texture, &mt->base.base); + + ps->format = templ->format; + ps->writable = templ->writable; + ps->u.tex.level = templ->u.tex.level; + ps->u.tex.first_layer = templ->u.tex.first_layer; + ps->u.tex.last_layer = templ->u.tex.last_layer; + + ns->width = u_minify(mt->base.base.width0, ps->u.tex.level); + ns->height = u_minify(mt->base.base.height0, ps->u.tex.level); + ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1; + ns->offset = mt->level[templ->u.tex.level].offset; + + /* comment says there are going to be removed, but they're used by the st */ + ps->width = ns->width; + ps->height = ns->height; + + ns->width <<= mt->ms_x; + ns->height <<= mt->ms_y; + + return ns; +} + +struct pipe_surface * +nv50_miptree_surface_new(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *templ) +{ + struct nv50_miptree *mt = nv50_miptree(pt); + struct nv50_surface *ns = nv50_surface_from_miptree(mt, templ); + if (!ns) + return NULL; + ns->base.context = pipe; + + if (ns->base.u.tex.first_layer) { + const unsigned l = ns->base.u.tex.level; + const unsigned z = ns->base.u.tex.first_layer; + + if (mt->layout_3d) { + ns->offset += nv50_mt_zslice_offset(mt, l, z); + + /* TODO: switch to depth 1 tiles; but actually this shouldn't happen */ + if (ns->depth > 1 && + (z & (NV50_TILE_SIZE_Z(mt->level[l].tile_mode) - 1))) + NOUVEAU_ERR("Creating unsupported 3D surface !\n"); + } else { + ns->offset += mt->layer_stride * z; + } + } + + return &ns->base; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c new file mode 100644 index 00000000000..73df71c61e2 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -0,0 +1,445 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv50_program.h" +#include "nv50/nv50_context.h" + +#include "codegen/nv50_ir_driver.h" + +static INLINE unsigned +bitcount4(const uint32_t val) +{ + static const uint8_t cnt[16] + = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; + return cnt[val & 0xf]; +} + +static int +nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) +{ + struct nv50_program *prog = (struct nv50_program *)info->driverPriv; + unsigned i, n, c; + + n = 0; + for (i = 0; i < info->numInputs; ++i) { + prog->in[i].id = i; + prog->in[i].sn = info->in[i].sn; + prog->in[i].si = info->in[i].si; + prog->in[i].hw = n; + prog->in[i].mask = info->in[i].mask; + + prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32); + + for (c = 0; c < 4; ++c) + if (info->in[i].mask & (1 << c)) + info->in[i].slot[c] = n++; + } + prog->in_nr = info->numInputs; + + for (i = 0; i < info->numSysVals; ++i) { + switch (info->sv[i].sn) { + case TGSI_SEMANTIC_INSTANCEID: + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID; + continue; + case TGSI_SEMANTIC_VERTEXID: + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12; + continue; + default: + break; + } + } + + /* + * Corner case: VP has no inputs, but we will still need to submit data to + * draw it. HW will shout at us and won't draw anything if we don't enable + * any input, so let's just pretend it's the first one. + */ + if (prog->vp.attrs[0] == 0 && + prog->vp.attrs[1] == 0 && + prog->vp.attrs[2] == 0) + prog->vp.attrs[0] |= 0xf; + + /* VertexID before InstanceID */ + if (info->io.vertexId < info->numSysVals) + info->sv[info->io.vertexId].slot[0] = n++; + if (info->io.instanceId < info->numSysVals) + info->sv[info->io.instanceId].slot[0] = n++; + + n = 0; + for (i = 0; i < info->numOutputs; ++i) { + switch (info->out[i].sn) { + case TGSI_SEMANTIC_PSIZE: + prog->vp.psiz = i; + break; + case TGSI_SEMANTIC_CLIPDIST: + prog->vp.clpd[info->out[i].si] = n; + break; + case TGSI_SEMANTIC_EDGEFLAG: + prog->vp.edgeflag = i; + break; + case TGSI_SEMANTIC_BCOLOR: + prog->vp.bfc[info->out[i].si] = i; + break; + default: + break; + } + prog->out[i].id = i; + prog->out[i].sn = info->out[i].sn; + prog->out[i].si = info->out[i].si; + prog->out[i].hw = n; + prog->out[i].mask = info->out[i].mask; + + for (c = 0; c < 4; ++c) + if (info->out[i].mask & (1 << c)) + info->out[i].slot[c] = n++; + } + prog->out_nr = info->numOutputs; + prog->max_out = n; + + if (prog->vp.psiz < info->numOutputs) + prog->vp.psiz = prog->out[prog->vp.psiz].hw; + + return 0; +} + +static int +nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info) +{ + struct nv50_program *prog = (struct nv50_program *)info->driverPriv; + unsigned i, n, m, c; + unsigned nvary; + unsigned nflat; + unsigned nintp = 0; + + /* count recorded non-flat inputs */ + for (m = 0, i = 0; i < info->numInputs; ++i) { + switch (info->in[i].sn) { + case TGSI_SEMANTIC_POSITION: + case TGSI_SEMANTIC_FACE: + continue; + default: + m += info->in[i].flat ? 0 : 1; + break; + } + } + /* careful: id may be != i in info->in[prog->in[i].id] */ + + /* Fill prog->in[] so that non-flat inputs are first and + * kick out special inputs that don't use the RESULT_MAP. + */ + for (n = 0, i = 0; i < info->numInputs; ++i) { + if (info->in[i].sn == TGSI_SEMANTIC_POSITION) { + prog->fp.interp |= info->in[i].mask << 24; + for (c = 0; c < 4; ++c) + if (info->in[i].mask & (1 << c)) + info->in[i].slot[c] = nintp++; + } else + if (info->in[i].sn == TGSI_SEMANTIC_FACE) { + info->in[i].slot[0] = 255; + } else { + unsigned j = info->in[i].flat ? m++ : n++; + + if (info->in[i].sn == TGSI_SEMANTIC_COLOR) + prog->vp.bfc[info->in[i].si] = j; + + prog->in[j].id = i; + prog->in[j].mask = info->in[i].mask; + prog->in[j].sn = info->in[i].sn; + prog->in[j].si = info->in[i].si; + prog->in[j].linear = info->in[i].linear; + + prog->in_nr++; + } + } + if (!(prog->fp.interp & (8 << 24))) { + ++nintp; + prog->fp.interp |= 8 << 24; + } + + for (i = 0; i < prog->in_nr; ++i) { + int j = prog->in[i].id; + + prog->in[i].hw = nintp; + for (c = 0; c < 4; ++c) + if (prog->in[i].mask & (1 << c)) + info->in[j].slot[c] = nintp++; + } + /* (n == m) if m never increased, i.e. no flat inputs */ + nflat = (n < m) ? (nintp - prog->in[n].hw) : 0; + nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */ + nvary = nintp - nflat; + + prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT; + prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT; + + /* put front/back colors right after HPOS */ + prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT; + for (i = 0; i < 2; ++i) + if (prog->vp.bfc[i] < 0xff) + prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16; + + /* FP outputs */ + + if (info->prop.fp.numColourResults > 1) + prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS; + + for (i = 0; i < info->numOutputs; ++i) { + prog->out[i].id = i; + prog->out[i].sn = info->out[i].sn; + prog->out[i].si = info->out[i].si; + prog->out[i].mask = info->out[i].mask; + + if (i == info->io.fragDepth || i == info->io.sampleMask) + continue; + prog->out[i].hw = info->out[i].si * 4; + + for (c = 0; c < 4; ++c) + info->out[i].slot[c] = prog->out[i].hw + c; + + prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4); + } + + if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) + info->out[info->io.sampleMask].slot[0] = prog->max_out++; + + if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) + info->out[info->io.fragDepth].slot[2] = prog->max_out++; + + if (!prog->max_out) + prog->max_out = 4; + + return 0; +} + +static int +nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info) +{ + switch (info->type) { + case PIPE_SHADER_VERTEX: + return nv50_vertprog_assign_slots(info); + case PIPE_SHADER_GEOMETRY: + return nv50_vertprog_assign_slots(info); + case PIPE_SHADER_FRAGMENT: + return nv50_fragprog_assign_slots(info); + default: + return -1; + } +} + +static struct nv50_stream_output_state * +nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info, + const struct pipe_stream_output_info *pso) +{ + struct nv50_stream_output_state *so; + unsigned b, i, c; + unsigned base[4]; + + so = MALLOC_STRUCT(nv50_stream_output_state); + if (!so) + return NULL; + memset(so->map, 0xff, sizeof(so->map)); + + for (b = 0; b < 4; ++b) + so->num_attribs[b] = 0; + for (i = 0; i < pso->num_outputs; ++i) { + unsigned end = pso->output[i].dst_offset + pso->output[i].num_components; + b = pso->output[i].output_buffer; + assert(b < 4); + so->num_attribs[b] = MAX2(so->num_attribs[b], end); + } + + so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED; + + so->stride[0] = pso->stride[0] * 4; + base[0] = 0; + for (b = 1; b < 4; ++b) { + assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]); + so->stride[b] = so->num_attribs[b] * 4; + if (so->num_attribs[b]) + so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT; + base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4); + } + if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) { + assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX); + so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT; + } + + so->map_size = base[3] + so->num_attribs[3]; + + for (i = 0; i < pso->num_outputs; ++i) { + const unsigned s = pso->output[i].start_component; + const unsigned p = pso->output[i].dst_offset; + const unsigned r = pso->output[i].register_index; + b = pso->output[i].output_buffer; + + for (c = 0; c < pso->output[i].num_components; ++c) + so->map[base[b] + p + c] = info->out[r].slot[s + c]; + } + + return so; +} + +boolean +nv50_program_translate(struct nv50_program *prog, uint16_t chipset) +{ + struct nv50_ir_prog_info *info; + int ret; + const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80; + + info = CALLOC_STRUCT(nv50_ir_prog_info); + if (!info) + return FALSE; + + info->type = prog->type; + info->target = chipset; + info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; + info->bin.source = (void *)prog->pipe.tokens; + + info->io.ucpCBSlot = 15; + info->io.ucpBase = 0; + info->io.genUserClip = prog->vp.clpd_nr; + + info->assignSlots = nv50_program_assign_varying_slots; + + prog->vp.bfc[0] = 0xff; + prog->vp.bfc[1] = 0xff; + prog->vp.edgeflag = 0xff; + prog->vp.clpd[0] = map_undef; + prog->vp.clpd[1] = map_undef; + prog->vp.psiz = map_undef; + prog->gp.primid = 0x80; + + info->driverPriv = prog; + +#ifdef DEBUG + info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); + info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); +#else + info->optLevel = 3; +#endif + + ret = nv50_ir_generate_code(info); + if (ret) { + NOUVEAU_ERR("shader translation failed: %i\n", ret); + goto out; + } + FREE(info->bin.syms); + + prog->code = info->bin.code; + prog->code_size = info->bin.codeSize; + prog->fixups = info->bin.relocData; + prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); + prog->tls_space = info->bin.tlsSpace; + + if (prog->type == PIPE_SHADER_FRAGMENT) { + if (info->prop.fp.writesDepth) { + prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; + prog->fp.flags[1] = 0x11; + } + if (info->prop.fp.usesDiscard) + prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; + } + + if (prog->pipe.stream_output.num_outputs) + prog->so = nv50_program_create_strmout_state(info, + &prog->pipe.stream_output); + +out: + FREE(info); + return !ret; +} + +boolean +nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) +{ + struct nouveau_heap *heap; + int ret; + uint32_t size = align(prog->code_size, 0x40); + + switch (prog->type) { + case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; + case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break; + case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break; + default: + assert(!"invalid program type"); + return FALSE; + } + + ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); + if (ret) { + /* Out of space: evict everything to compactify the code segment, hoping + * the working set is much smaller and drifts slowly. Improve me ! + */ + while (heap->next) { + struct nv50_program *evict = heap->next->priv; + if (evict) + nouveau_heap_free(&evict->mem); + } + debug_printf("WARNING: out of code space, evicting all shaders.\n"); + ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); + if (ret) { + NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); + return FALSE; + } + } + prog->code_base = prog->mem->start; + + ret = nv50_tls_realloc(nv50->screen, prog->tls_space); + if (ret < 0) + return FALSE; + if (ret > 0) + nv50->state.new_tls_space = TRUE; + + if (prog->fixups) + nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0); + + nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, + (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, + NOUVEAU_BO_VRAM, prog->code_size, prog->code); + + BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); + PUSH_DATA (nv50->base.pushbuf, 0); + + return TRUE; +} + +void +nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) +{ + const struct pipe_shader_state pipe = p->pipe; + const ubyte type = p->type; + + if (p->mem) + nouveau_heap_free(&p->mem); + + FREE(p->code); + + FREE(p->fixups); + + FREE(p->so); + + memset(p, 0, sizeof(*p)); + + p->pipe = pipe; + p->type = type; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h new file mode 100644 index 00000000000..13b9516a3e4 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -0,0 +1,106 @@ +/* + * Copyright 2010 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NV50_PROG_H__ +#define __NV50_PROG_H__ + +struct nv50_context; + +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" + +struct nv50_varying { + uint8_t id; /* tgsi index */ + uint8_t hw; /* hw index, nv50 wants flat FP inputs last */ + + unsigned mask : 4; + unsigned linear : 1; + unsigned pad : 3; + + ubyte sn; /* semantic name */ + ubyte si; /* semantic index */ +}; + +struct nv50_stream_output_state +{ + uint32_t ctrl; + uint16_t stride[4]; + uint8_t num_attribs[4]; + uint8_t map_size; + uint8_t map[128]; +}; + +struct nv50_program { + struct pipe_shader_state pipe; + + ubyte type; + boolean translated; + + uint32_t *code; + unsigned code_size; + unsigned code_base; + uint32_t *immd; + unsigned immd_size; + unsigned parm_size; /* size limit of uniform buffer */ + uint32_t tls_space; /* required local memory per thread */ + + ubyte max_gpr; /* REG_ALLOC_TEMP */ + ubyte max_out; /* REG_ALLOC_RESULT or FP_RESULT_COUNT */ + + ubyte in_nr; + ubyte out_nr; + struct nv50_varying in[16]; + struct nv50_varying out[16]; + + struct { + uint32_t attrs[3]; /* VP_ATTR_EN_0,1 and VP_GP_BUILTIN_ATTR_EN */ + ubyte psiz; /* output slot of point size */ + ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */ + ubyte edgeflag; + ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */ + ubyte clpd_nr; + } vp; + + struct { + uint32_t flags[2]; /* 0x19a8, 196c */ + uint32_t interp; /* 0x1988 */ + uint32_t colors; /* 0x1904 */ + } fp; + + struct { + ubyte primid; /* primitive id output register */ + uint8_t vert_count; + uint8_t prim_type; /* point, line strip or tri strip */ + } gp; + + void *fixups; /* relocation records */ + + struct nouveau_heap *mem; + + struct nv50_stream_output_state *so; +}; + +boolean nv50_program_translate(struct nv50_program *, uint16_t chipset); +boolean nv50_program_upload_code(struct nv50_context *, struct nv50_program *); +void nv50_program_destroy(struct nv50_context *, struct nv50_program *); + +#endif /* __NV50_PROG_H__ */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c new file mode 100644 index 00000000000..3e9a4096cf0 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c @@ -0,0 +1,309 @@ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nv50/nv50_context.h" +#include "nv50/nv50_resource.h" + +#include "nv50/nv50_3d.xml.h" + +struct push_context { + struct nouveau_pushbuf *push; + + const void *idxbuf; + + float edgeflag; + int edgeflag_attr; + + uint32_t vertex_words; + uint32_t packet_vertex_limit; + + struct translate *translate; + + boolean primitive_restart; + uint32_t prim; + uint32_t restart_index; + uint32_t instance_id; +}; + +static INLINE unsigned +prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static void +emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) +{ + uint8_t *elts = (uint8_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i08(elts, push, ctx->restart_index); + + size = ctx->vertex_words * nr; + + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); + + ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id, + ctx->push->cur); + + ctx->push->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (ctx->push, ctx->restart_index); + } + } +} + +static void +emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) +{ + uint16_t *elts = (uint16_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i16(elts, push, ctx->restart_index); + + size = ctx->vertex_words * nr; + + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); + + ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id, + ctx->push->cur); + + ctx->push->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (ctx->push, ctx->restart_index); + } + } +} + +static void +emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) +{ + uint32_t *elts = (uint32_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i32(elts, push, ctx->restart_index); + + size = ctx->vertex_words * nr; + + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); + + ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id, + ctx->push->cur); + + ctx->push->cur += size; + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (ctx->push, ctx->restart_index); + } + } +} + +static void +emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) +{ + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size = ctx->vertex_words * push; + + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); + + ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id, + ctx->push->cur); + ctx->push->cur += size; + count -= push; + start += push; + } +} + + +#define NV50_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nv50_prim_gl(unsigned prim) +{ + switch (prim) { + NV50_PRIM_GL_CASE(POINTS); + NV50_PRIM_GL_CASE(LINES); + NV50_PRIM_GL_CASE(LINE_LOOP); + NV50_PRIM_GL_CASE(LINE_STRIP); + NV50_PRIM_GL_CASE(TRIANGLES); + NV50_PRIM_GL_CASE(TRIANGLE_STRIP); + NV50_PRIM_GL_CASE(TRIANGLE_FAN); + NV50_PRIM_GL_CASE(QUADS); + NV50_PRIM_GL_CASE(QUAD_STRIP); + NV50_PRIM_GL_CASE(POLYGON); + NV50_PRIM_GL_CASE(LINES_ADJACENCY); + NV50_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NV50_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NV50_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NV50_PRIM_GL_CASE(PATCHES); */ + default: + return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } +} + +void +nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) +{ + struct push_context ctx; + unsigned i, index_size; + unsigned inst_count = info->instance_count; + unsigned vert_count = info->count; + boolean apply_bias = info->indexed && info->index_bias; + + ctx.push = nv50->base.pushbuf; + ctx.translate = nv50->vertex->translate; + ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit; + ctx.vertex_words = nv50->vertex->vertex_size; + + for (i = 0; i < nv50->num_vtxbufs; ++i) { + const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; + const uint8_t *data; + + if (unlikely(vb->buffer)) + data = nouveau_resource_map_offset(&nv50->base, + nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD); + else + data = vb->user_buffer; + + if (apply_bias && likely(!(nv50->vertex->instance_bufs & (1 << i)))) + data += (ptrdiff_t)info->index_bias * vb->stride; + + ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); + } + + if (info->indexed) { + if (nv50->idxbuf.buffer) { + ctx.idxbuf = nouveau_resource_map_offset(&nv50->base, + nv04_resource(nv50->idxbuf.buffer), nv50->idxbuf.offset, + NOUVEAU_BO_RD); + } else { + ctx.idxbuf = nv50->idxbuf.user_buffer; + } + if (!ctx.idxbuf) + return; + index_size = nv50->idxbuf.index_size; + ctx.primitive_restart = info->primitive_restart; + ctx.restart_index = info->restart_index; + } else { + if (unlikely(info->count_from_stream_output)) { + struct pipe_context *pipe = &nv50->base.pipe; + struct nv50_so_target *targ; + targ = nv50_so_target(info->count_from_stream_output); + if (!targ->pq) { + NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n"); + return; + } + pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count); + vert_count /= targ->stride; + } + ctx.idxbuf = NULL; + index_size = 0; + ctx.primitive_restart = FALSE; + ctx.restart_index = 0; + } + + ctx.instance_id = info->start_instance; + ctx.prim = nv50_prim_gl(info->mode); + + if (info->primitive_restart) { + BEGIN_NV04(ctx.push, NV50_3D(PRIM_RESTART_ENABLE), 2); + PUSH_DATA (ctx.push, 1); + PUSH_DATA (ctx.push, info->restart_index); + } else + if (nv50->state.prim_restart) { + BEGIN_NV04(ctx.push, NV50_3D(PRIM_RESTART_ENABLE), 1); + PUSH_DATA (ctx.push, 0); + } + nv50->state.prim_restart = info->primitive_restart; + + while (inst_count--) { + BEGIN_NV04(ctx.push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (ctx.push, ctx.prim); + switch (index_size) { + case 0: + emit_vertices_seq(&ctx, info->start, vert_count); + break; + case 1: + emit_vertices_i08(&ctx, info->start, vert_count); + break; + case 2: + emit_vertices_i16(&ctx, info->start, vert_count); + break; + case 4: + emit_vertices_i32(&ctx, info->start, vert_count); + break; + default: + assert(0); + break; + } + BEGIN_NV04(ctx.push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (ctx.push, 0); + + ctx.instance_id++; + ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c new file mode 100644 index 00000000000..6f25a0822c4 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -0,0 +1,399 @@ +/* + * Copyright 2011 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christoph Bumiller + */ + +#define NV50_PUSH_EXPLICIT_SPACE_CHECKING + +#include "nv50/nv50_context.h" +#include "nv_object.xml.h" + +/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts + * (since we use only a single GPU channel per screen) will not work properly. + * + * The first is not that big of an issue because OpenGL does not allow nested + * queries anyway. + */ + +struct nv50_query { + uint32_t *data; + uint16_t type; + uint16_t index; + uint32_t sequence; + struct nouveau_bo *bo; + uint32_t base; + uint32_t offset; /* base + i * 16 */ + boolean ready; + boolean flushed; + boolean is64bit; + struct nouveau_mm_allocation *mm; +}; + +#define NV50_QUERY_ALLOC_SPACE 128 + +static INLINE struct nv50_query * +nv50_query(struct pipe_query *pipe) +{ + return (struct nv50_query *)pipe; +} + +static boolean +nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) +{ + struct nv50_screen *screen = nv50->screen; + int ret; + + if (q->bo) { + nouveau_bo_ref(NULL, &q->bo); + if (q->mm) { + if (q->ready) + nouveau_mm_free(q->mm); + else + nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, + q->mm); + } + } + if (size) { + q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base); + if (!q->bo) + return FALSE; + q->offset = q->base; + + ret = nouveau_bo_map(q->bo, 0, screen->base.client); + if (ret) { + nv50_query_allocate(nv50, q, 0); + return FALSE; + } + q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base); + } + return TRUE; +} + +static void +nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); + FREE(nv50_query(pq)); +} + +static struct pipe_query * +nv50_query_create(struct pipe_context *pipe, unsigned type) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_query *q; + + q = CALLOC_STRUCT(nv50_query); + if (!q) + return NULL; + + if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) { + FREE(q); + return NULL; + } + + q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || + type == PIPE_QUERY_PRIMITIVES_EMITTED || + type == PIPE_QUERY_SO_STATISTICS); + q->type = type; + + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + q->offset -= 16; + q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */ + } + + return (struct pipe_query *)q; +} + +static void +nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, + unsigned offset, uint32_t get) +{ + offset += q->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); + PUSH_DATAh(push, q->bo->offset + offset); + PUSH_DATA (push, q->bo->offset + offset); + PUSH_DATA (push, q->sequence); + PUSH_DATA (push, get); +} + +static void +nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_query *q = nv50_query(pq); + + /* For occlusion queries we have to change the storage, because a previous + * query might set the initial render conition to FALSE even *after* we re- + * initialized it to TRUE. + */ + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + q->offset += 16; + q->data += 16 / sizeof(*q->data); + if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE) + nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE); + + /* XXX: can we do this with the GPU, and sync with respect to a previous + * query ? + */ + q->data[1] = 1; /* initial render condition = TRUE */ + } + if (!q->is64bit) + q->data[0] = q->sequence++; /* the previously used one */ + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + PUSH_SPACE(push, 4); + BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); + PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 1); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nv50_query_get(push, q, 0x10, 0x06805002); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nv50_query_get(push, q, 0x10, 0x05805002); + break; + case PIPE_QUERY_SO_STATISTICS: + nv50_query_get(push, q, 0x20, 0x05805002); + nv50_query_get(push, q, 0x30, 0x06805002); + break; + case PIPE_QUERY_TIME_ELAPSED: + nv50_query_get(push, q, 0x10, 0x00005002); + break; + default: + break; + } + q->ready = FALSE; +} + +static void +nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_query *q = nv50_query(pq); + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + nv50_query_get(push, q, 0, 0x0100f002); + PUSH_SPACE(push, 2); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 0); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nv50_query_get(push, q, 0, 0x06805002); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nv50_query_get(push, q, 0, 0x05805002); + break; + case PIPE_QUERY_SO_STATISTICS: + nv50_query_get(push, q, 0x00, 0x05805002); + nv50_query_get(push, q, 0x10, 0x06805002); + break; + case PIPE_QUERY_TIMESTAMP: + q->sequence++; + /* fall through */ + case PIPE_QUERY_TIME_ELAPSED: + nv50_query_get(push, q, 0, 0x00005002); + break; + case PIPE_QUERY_GPU_FINISHED: + q->sequence++; + nv50_query_get(push, q, 0, 0x1000f010); + break; + case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + break; + default: + assert(0); + break; + } + q->ready = q->flushed = FALSE; +} + +static INLINE boolean +nv50_query_ready(struct nv50_query *q) +{ + return q->ready || (!q->is64bit && (q->data[0] == q->sequence)); +} + +static boolean +nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, union pipe_query_result *result) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_query *q = nv50_query(pq); + uint64_t *res64 = (uint64_t *)result; + uint32_t *res32 = (uint32_t *)result; + boolean *res8 = (boolean *)result; + uint64_t *data64 = (uint64_t *)q->data; + + if (!q->ready) /* update ? */ + q->ready = nv50_query_ready(q); + if (!q->ready) { + if (!wait) { + /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ + if (!q->flushed) { + q->flushed = TRUE; + PUSH_KICK(nv50->base.pushbuf); + } + return FALSE; + } + if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) + return FALSE; + } + q->ready = TRUE; + + switch (q->type) { + case PIPE_QUERY_GPU_FINISHED: + res8[0] = TRUE; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ + res64[0] = q->data[1]; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ + case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ + res64[0] = data64[0] - data64[2]; + break; + case PIPE_QUERY_SO_STATISTICS: + res64[0] = data64[0] - data64[4]; + res64[1] = data64[2] - data64[6]; + break; + case PIPE_QUERY_TIMESTAMP: + res64[0] = data64[1]; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + res64[0] = 1000000000; + res8[8] = FALSE; + break; + case PIPE_QUERY_TIME_ELAPSED: + res64[0] = data64[1] - data64[3]; + break; + case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + res32[0] = q->data[1]; + break; + default: + return FALSE; + } + + return TRUE; +} + +void +nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq) +{ + struct nv50_query *q = nv50_query(pq); + unsigned offset = q->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); + PUSH_DATAh(push, q->bo->offset + offset); + PUSH_DATA (push, q->bo->offset + offset); + PUSH_DATA (push, q->sequence); + PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); +} + +static void +nv50_render_condition(struct pipe_context *pipe, + struct pipe_query *pq, + boolean condition, uint mode) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_query *q; + + nv50->cond_query = pq; + nv50->cond_cond = condition; + nv50->cond_mode = mode; + + PUSH_SPACE(push, 6); + + if (!pq) { + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + return; + } + q = nv50_query(pq); + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + } + + BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3); + PUSH_DATAh(push, q->bo->offset + q->offset); + PUSH_DATA (push, q->bo->offset + q->offset); + PUSH_DATA (push, NV50_3D_COND_MODE_RES_NON_ZERO); +} + +void +nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, + struct pipe_query *pq, unsigned result_offset) +{ + struct nv50_query *q = nv50_query(pq); + + /* XXX: does this exist ? */ +#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8)) + + nouveau_pushbuf_space(push, 0, 0, 1); + nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 | + NV50_IB_ENTRY_1_NO_PREFETCH); +} + +void +nva0_so_target_save_offset(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg, + unsigned index, boolean serialize) +{ + struct nv50_so_target *targ = nv50_so_target(ptarg); + + if (serialize) { + struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf; + PUSH_SPACE(push, 2); + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + } + + nv50_query(targ->pq)->index = index; + nv50_query_end(pipe, targ->pq); +} + +void +nv50_init_query_functions(struct nv50_context *nv50) +{ + struct pipe_context *pipe = &nv50->base.pipe; + + pipe->create_query = nv50_query_create; + pipe->destroy_query = nv50_query_destroy; + pipe->begin_query = nv50_query_begin; + pipe->end_query = nv50_query_end; + pipe->get_query_result = nv50_query_result; + pipe->render_condition = nv50_render_condition; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.c b/src/gallium/drivers/nouveau/nv50/nv50_resource.c new file mode 100644 index 00000000000..7fbb0a92bf6 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.c @@ -0,0 +1,104 @@ + +#include "pipe/p_context.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nouveau_screen.h" + +#include "nv50/nv50_resource.h" + +static struct pipe_resource * +nv50_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + switch (templ->target) { + case PIPE_BUFFER: + return nouveau_buffer_create(screen, templ); + default: + return nv50_miptree_create(screen, templ); + } +} + +static struct pipe_resource * +nv50_resource_from_handle(struct pipe_screen * screen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + if (templ->target == PIPE_BUFFER) + return NULL; + else + return nv50_miptree_from_handle(screen, templ, whandle); +} + +struct pipe_surface * +nv50_surface_from_buffer(struct pipe_context *pipe, + struct pipe_resource *pbuf, + const struct pipe_surface *templ) +{ + struct nv50_surface *sf = CALLOC_STRUCT(nv50_surface); + if (!sf) + return NULL; + + pipe_reference_init(&sf->base.reference, 1); + pipe_resource_reference(&sf->base.texture, pbuf); + + sf->base.format = templ->format; + sf->base.writable = templ->writable; + sf->base.u.buf.first_element = templ->u.buf.first_element; + sf->base.u.buf.last_element = templ->u.buf.last_element; + + sf->offset = + templ->u.buf.first_element * util_format_get_blocksize(sf->base.format); + + sf->offset &= ~0x7f; /* FIXME: RT_ADDRESS requires 128 byte alignment */ + + sf->width = templ->u.buf.last_element - templ->u.buf.first_element + 1; + sf->height = 1; + sf->depth = 1; + + sf->base.width = sf->width; + sf->base.height = sf->height; + + sf->base.context = pipe; + return &sf->base; +} + +static struct pipe_surface * +nv50_surface_create(struct pipe_context *pipe, + struct pipe_resource *pres, + const struct pipe_surface *templ) +{ + if (unlikely(pres->target == PIPE_BUFFER)) + return nv50_surface_from_buffer(pipe, pres, templ); + return nv50_miptree_surface_new(pipe, pres, templ); +} + +void +nv50_surface_destroy(struct pipe_context *pipe, struct pipe_surface *ps) +{ + struct nv50_surface *s = nv50_surface(ps); + + pipe_resource_reference(&ps->texture, NULL); + + FREE(s); +} + +void +nv50_init_resource_functions(struct pipe_context *pcontext) +{ + pcontext->transfer_map = u_transfer_map_vtbl; + pcontext->transfer_flush_region = u_transfer_flush_region_vtbl; + pcontext->transfer_unmap = u_transfer_unmap_vtbl; + pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; + pcontext->create_surface = nv50_surface_create; + pcontext->surface_destroy = nv50_surface_destroy; +} + +void +nv50_screen_init_resource_functions(struct pipe_screen *pscreen) +{ + pscreen->resource_create = nv50_resource_create; + pscreen->resource_from_handle = nv50_resource_from_handle; + pscreen->resource_get_handle = u_resource_get_handle_vtbl; + pscreen->resource_destroy = u_resource_destroy_vtbl; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h new file mode 100644 index 00000000000..c06daa31c5d --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h @@ -0,0 +1,153 @@ + +#ifndef __NV50_RESOURCE_H__ +#define __NV50_RESOURCE_H__ + +#include "util/u_transfer.h" +#include "util/u_double_list.h" + +#include "nouveau_winsys.h" +#include "nouveau_buffer.h" + +#ifndef __NVC0_RESOURCE_H__ /* make sure we don't use these in nvc0: */ + +void +nv50_init_resource_functions(struct pipe_context *pcontext); + +void +nv50_screen_init_resource_functions(struct pipe_screen *pscreen); + +#define NV50_RESOURCE_FLAG_VIDEO (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 0) +#define NV50_RESOURCE_FLAG_NOALLOC (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 1) + +#define NV50_TILE_SHIFT_X(m) 6 +#define NV50_TILE_SHIFT_Y(m) ((((m) >> 4) & 0xf) + 2) +#define NV50_TILE_SHIFT_Z(m) ((((m) >> 8) & 0xf) + 0) + +#define NV50_TILE_SIZE_X(m) 64 +#define NV50_TILE_SIZE_Y(m) ( 4 << (((m) >> 4) & 0xf)) +#define NV50_TILE_SIZE_Z(m) ( 1 << (((m) >> 8) & 0xf)) + +#define NV50_TILE_SIZE_2D(m) (NV50_TILE_SIZE_X(m) << NV50_TILE_SHIFT_Y(m)) + +#define NV50_TILE_SIZE(m) (NV50_TILE_SIZE_2D(m) << NV50_TILE_SHIFT_Z(m)) + +#endif /* __NVC0_RESOURCE_H__ */ + +uint32_t +nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz); + +struct nv50_miptree_level { + uint32_t offset; + uint32_t pitch; + uint32_t tile_mode; +}; + +#define NV50_MAX_TEXTURE_LEVELS 16 + +struct nv50_miptree { + struct nv04_resource base; + struct nv50_miptree_level level[NV50_MAX_TEXTURE_LEVELS]; + uint32_t total_size; + uint32_t layer_stride; + boolean layout_3d; /* TRUE if layer count varies with mip level */ + uint8_t ms_x; /* log2 of number of samples in x/y dimension */ + uint8_t ms_y; + uint8_t ms_mode; +}; + +static INLINE struct nv50_miptree * +nv50_miptree(struct pipe_resource *pt) +{ + return (struct nv50_miptree *)pt; +} + + +#define NV50_TEXVIEW_SCALED_COORDS (1 << 0) +#define NV50_TEXVIEW_FILTER_MSAA8 (1 << 1) +#define NV50_TEXVIEW_ACCESS_RESOLVE (1 << 2) + + +/* Internal functions: + */ +boolean +nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align); + +struct pipe_resource * +nv50_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmp); + +void +nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt); + +struct pipe_resource * +nv50_miptree_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *template, + struct winsys_handle *whandle); + +boolean +nv50_miptree_get_handle(struct pipe_screen *pscreen, + struct pipe_resource *pt, + struct winsys_handle *whandle); + +struct nv50_surface { + struct pipe_surface base; + uint32_t offset; + uint32_t width; + uint16_t height; + uint16_t depth; +}; + +static INLINE struct nv50_surface * +nv50_surface(struct pipe_surface *ps) +{ + return (struct nv50_surface *)ps; +} + +static INLINE enum pipe_format +nv50_zs_to_s_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: return PIPE_FORMAT_X24S8_UINT; + case PIPE_FORMAT_S8_UINT_Z24_UNORM: return PIPE_FORMAT_S8X24_UINT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return PIPE_FORMAT_X32_S8X24_UINT; + default: + return format; + } +} + +#ifndef __NVC0_RESOURCE_H__ + +unsigned +nv50_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z); + +struct pipe_surface * +nv50_miptree_surface_new(struct pipe_context *, + struct pipe_resource *, + const struct pipe_surface *templ); + +void * +nv50_miptree_transfer_map(struct pipe_context *pctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer); +void +nv50_miptree_transfer_unmap(struct pipe_context *pcontext, + struct pipe_transfer *ptx); + +#endif /* __NVC0_RESOURCE_H__ */ + +struct nv50_surface * +nv50_surface_from_miptree(struct nv50_miptree *mt, + const struct pipe_surface *templ); + +struct pipe_surface * +nv50_surface_from_buffer(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *templ); + +void +nv50_surface_destroy(struct pipe_context *, struct pipe_surface *); + +#endif /* __NV50_RESOURCE_H__ */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c new file mode 100644 index 00000000000..f454ec77656 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -0,0 +1,845 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_format_s3tc.h" +#include "pipe/p_screen.h" + +#include "nv50/nv50_context.h" +#include "nv50/nv50_screen.h" + +#include "nouveau_vp3_video.h" + +#include "nv_object.xml.h" +#include <errno.h> + +#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS +# define NOUVEAU_GETPARAM_GRAPH_UNITS 13 +#endif + +/* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */ +#define LOCAL_WARPS_ALLOC 32 +/* affected by STACK_WARPS_LOG_ALLOC / STACK_WARPS_NO_CLAMP */ +#define STACK_WARPS_ALLOC 32 + +#define THREADS_IN_WARP 32 + +#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float)) + +static boolean +nv50_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bindings) +{ + if (sample_count > 8) + return FALSE; + if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */ + return FALSE; + if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128) + return FALSE; + + if (!util_format_is_supported(format, bindings)) + return FALSE; + + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS) + return FALSE; + break; + default: + break; + } + + /* transfers & shared are always supported */ + bindings &= ~(PIPE_BIND_TRANSFER_READ | + PIPE_BIND_TRANSFER_WRITE | + PIPE_BIND_SHARED); + + return (nv50_format_table[format].usage & bindings) == bindings; +} + +static int +nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; + + switch (param) { + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 64; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 14; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 12; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 14; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 512; + case PIPE_CAP_MIN_TEXEL_OFFSET: + return -8; + case PIPE_CAP_MAX_TEXEL_OFFSET: + return 7; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_SCALED_RESOLVE: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + return 1; + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return 65536; + case PIPE_CAP_SEAMLESS_CUBE_MAP: + return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS; + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + return 0; + case PIPE_CAP_CUBE_MAP_ARRAY: + return 0; + /* + return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS; + */ + case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 140; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + return 1; + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + return 1; + case PIPE_CAP_QUERY_TIMESTAMP: + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return 4; + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + return 64; + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + return (class_3d >= NVA0_3D_CLASS) ? 1 : 0; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_INDEP_BLEND_ENABLE: + return 1; + case PIPE_CAP_INDEP_BLEND_FUNC: + return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 0; + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_START_INSTANCE: + return 1; + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + return 0; /* state trackers will know better */ + case PIPE_CAP_USER_CONSTANT_BUFFERS: + case PIPE_CAP_USER_INDEX_BUFFERS: + case PIPE_CAP_USER_VERTEX_BUFFERS: + return 1; + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 256; + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + return 1; /* 256 for binding as RT, but that's not possible in GL */ + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return NOUVEAU_MIN_BUFFER_MAP_ALIGN; + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_TGSI_TEXCOORD: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return 0; + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 1; + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + return 0; + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50; + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static int +nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, + enum pipe_shader_cap param) +{ + switch (shader) { + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_GEOMETRY: + case PIPE_SHADER_FRAGMENT: + break; + default: + return 0; + } + + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 4; + case PIPE_SHADER_CAP_MAX_INPUTS: + if (shader == PIPE_SHADER_VERTEX) + return 32; + return 0x300 / 16; + case PIPE_SHADER_CAP_MAX_CONSTS: + return 65536 / 16; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return NV50_MAX_PIPE_CONSTBUFS; + case PIPE_SHADER_CAP_MAX_ADDRS: + return 1; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + return shader != PIPE_SHADER_FRAGMENT; + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; + case PIPE_SHADER_CAP_MAX_TEMPS: + return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE; + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 1; + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + return 0; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; /* please inline, or provide function declarations */ + case PIPE_SHADER_CAP_INTEGERS: + return 1; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + return 32; + default: + NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); + return 0; + } +} + +static float +nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 10.0f; + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 64.0f; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 4.0f; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0.0f; + } +} + +static void +nv50_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + + if (screen->base.fence.current) { + nouveau_fence_wait(screen->base.fence.current); + nouveau_fence_ref (NULL, &screen->base.fence.current); + } + if (screen->base.pushbuf) + screen->base.pushbuf->user_priv = NULL; + + if (screen->blitter) + nv50_blitter_destroy(screen); + + nouveau_bo_ref(NULL, &screen->code); + nouveau_bo_ref(NULL, &screen->tls_bo); + nouveau_bo_ref(NULL, &screen->stack_bo); + nouveau_bo_ref(NULL, &screen->txc); + nouveau_bo_ref(NULL, &screen->uniforms); + nouveau_bo_ref(NULL, &screen->fence.bo); + + nouveau_heap_destroy(&screen->vp_code_heap); + nouveau_heap_destroy(&screen->gp_code_heap); + nouveau_heap_destroy(&screen->fp_code_heap); + + FREE(screen->tic.entries); + + nouveau_object_del(&screen->tesla); + nouveau_object_del(&screen->eng2d); + nouveau_object_del(&screen->m2mf); + nouveau_object_del(&screen->sync); + + nouveau_screen_fini(&screen->base); + + FREE(screen); +} + +static void +nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + struct nouveau_pushbuf *push = screen->base.pushbuf; + + /* we need to do it after possible flush in MARK_RING */ + *sequence = ++screen->base.fence.sequence; + + PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4)); + PUSH_DATAh(push, screen->fence.bo->offset); + PUSH_DATA (push, screen->fence.bo->offset); + PUSH_DATA (push, *sequence); + PUSH_DATA (push, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 | + NV50_3D_QUERY_GET_UNK4 | + NV50_3D_QUERY_GET_UNIT_CROP | + NV50_3D_QUERY_GET_TYPE_QUERY | + NV50_3D_QUERY_GET_QUERY_SELECT_ZERO | + NV50_3D_QUERY_GET_SHORT); +} + +static u32 +nv50_screen_fence_update(struct pipe_screen *pscreen) +{ + return nv50_screen(pscreen)->fence.map[0]; +} + +static void +nv50_screen_init_hwctx(struct nv50_screen *screen) +{ + struct nouveau_pushbuf *push = screen->base.pushbuf; + struct nv04_fifo *fifo; + unsigned i; + + fifo = (struct nv04_fifo *)screen->base.channel->data; + + BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->m2mf->handle); + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3); + PUSH_DATA (push, screen->sync->handle); + PUSH_DATA (push, fifo->vram); + PUSH_DATA (push, fifo->vram); + + BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->eng2d->handle); + BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4); + PUSH_DATA (push, screen->sync->handle); + PUSH_DATA (push, fifo->vram); + PUSH_DATA (push, fifo->vram); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_2D(OPERATION), 1); + PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY); + BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, SUBC_2D(0x0888), 1); + PUSH_DATA (push, 1); + + BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->tesla->handle); + + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + + BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1); + PUSH_DATA (push, screen->sync->handle); + BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11); + for (i = 0; i < 11; ++i) + PUSH_DATA(push, fifo->vram); + BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN); + for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i) + PUSH_DATA(push, fifo->vram); + + BEGIN_NV04(push, NV50_3D(REG_MODE), 1); + PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED); + BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1); + PUSH_DATA (push, 0xf); + + if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) { + BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1); + PUSH_DATA (push, 0x18); + } + + BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); + PUSH_DATA (push, 1); + + BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1); + PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1); + BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(LINE_LAST_PIXEL), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1); + PUSH_DATA (push, 1); + + if (screen->tesla->oclass >= NVA0_3D_CLASS) { + BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1); + PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); + } + + BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1); + PUSH_DATA (push, 0x3f); + + BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2)); + PUSH_DATA (push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2)); + + BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2)); + PUSH_DATA (push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2)); + + BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2)); + PUSH_DATA (push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2)); + + BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->tls_bo->offset); + PUSH_DATA (push, screen->tls_bo->offset); + PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8)); + + BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->stack_bo->offset); + PUSH_DATA (push, screen->stack_bo->offset); + PUSH_DATA (push, 4); + + BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->uniforms->offset + (0 << 16)); + PUSH_DATA (push, screen->uniforms->offset + (0 << 16)); + PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000); + + BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->uniforms->offset + (1 << 16)); + PUSH_DATA (push, screen->uniforms->offset + (1 << 16)); + PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000); + + BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->uniforms->offset + (2 << 16)); + PUSH_DATA (push, screen->uniforms->offset + (2 << 16)); + PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000); + + BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->uniforms->offset + (3 << 16)); + PUSH_DATA (push, screen->uniforms->offset + (3 << 16)); + PUSH_DATA (push, (NV50_CB_AUX << 16) | 0x0200); + + BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3); + PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01); + PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21); + PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31); + + /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */ + BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); + PUSH_DATA (push, ((1 << 9) << 6) | NV50_CB_AUX); + BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + (1 << 9)); + PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + (1 << 9)); + + /* max TIC (bits 4:8) & TSC bindings, per program type */ + for (i = 0; i < 3; ++i) { + BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1); + PUSH_DATA (push, 0x54); + } + + BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset); + PUSH_DATA (push, screen->txc->offset); + PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); + + BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset + 65536); + PUSH_DATA (push, screen->txc->offset + 65536); + PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); + + BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1); + PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY); + BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2); + for (i = 0; i < 8 * 2; ++i) + PUSH_DATA(push, 0); + BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(0)), 2); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 1.0f); + + BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1); +#ifdef NV50_SCISSORS_CLIPPING + PUSH_DATA (push, 0x0000); +#else + PUSH_DATA (push, 0x1080); +#endif + + BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1); + PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT); + + /* We use scissors instead of exact view volume clipping, + * so they're always enabled. + */ + BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 3); + PUSH_DATA (push, 1); + PUSH_DATA (push, 8192 << 16); + PUSH_DATA (push, 8192 << 16); + + BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1); + PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL); + BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1); + PUSH_DATA (push, 0x11111111); + BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1); + PUSH_DATA (push, 1); + + PUSH_KICK (push); +} + +static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space, + uint64_t *tls_size) +{ + struct nouveau_device *dev = screen->base.device; + int ret; + + screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) * + ONE_TEMP_SIZE; + if (nouveau_mesa_debug) + debug_printf("allocating space for %u temps\n", + util_next_power_of_two(tls_space / ONE_TEMP_SIZE)); + *tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) * + screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, + *tls_size, NULL, &screen->tls_bo); + if (ret) { + NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret); + return ret; + } + + return 0; +} + +int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space) +{ + struct nouveau_pushbuf *push = screen->base.pushbuf; + int ret; + uint64_t tls_size; + + if (tls_space < screen->cur_tls_space) + return 0; + if (tls_space > screen->max_tls_space) { + /* fixable by limiting number of warps (LOCAL_WARPS_LOG_ALLOC / + * LOCAL_WARPS_NO_CLAMP) */ + NOUVEAU_ERR("Unsupported number of temporaries (%u > %u). Fixable if someone cares.\n", + (unsigned)(tls_space / ONE_TEMP_SIZE), + (unsigned)(screen->max_tls_space / ONE_TEMP_SIZE)); + return -ENOMEM; + } + + nouveau_bo_ref(NULL, &screen->tls_bo); + ret = nv50_tls_alloc(screen, tls_space, &tls_size); + if (ret) + return ret; + + BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->tls_bo->offset); + PUSH_DATA (push, screen->tls_bo->offset); + PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8)); + + return 1; +} + +struct pipe_screen * +nv50_screen_create(struct nouveau_device *dev) +{ + struct nv50_screen *screen; + struct pipe_screen *pscreen; + struct nouveau_object *chan; + uint64_t value; + uint32_t tesla_class; + unsigned stack_size; + int ret; + + screen = CALLOC_STRUCT(nv50_screen); + if (!screen) + return NULL; + pscreen = &screen->base.base; + + ret = nouveau_screen_init(&screen->base, dev); + if (ret) { + NOUVEAU_ERR("nouveau_screen_init failed: %d\n", ret); + goto fail; + } + + /* TODO: Prevent FIFO prefetch before transfer of index buffers and + * admit them to VRAM. + */ + screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER | + PIPE_BIND_VERTEX_BUFFER; + screen->base.sysmem_bindings |= + PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER; + + screen->base.pushbuf->user_priv = screen; + screen->base.pushbuf->rsvd_kick = 5; + + chan = screen->base.channel; + + pscreen->destroy = nv50_screen_destroy; + pscreen->context_create = nv50_create; + pscreen->is_format_supported = nv50_screen_is_format_supported; + pscreen->get_param = nv50_screen_get_param; + pscreen->get_shader_param = nv50_screen_get_shader_param; + pscreen->get_paramf = nv50_screen_get_paramf; + + nv50_screen_init_resource_functions(pscreen); + + if (screen->base.device->chipset < 0x84 || + debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) { + /* PMPEG */ + nouveau_screen_init_vdec(&screen->base); + } else if (screen->base.device->chipset < 0x98 || + screen->base.device->chipset == 0xa0) { + /* VP2 */ + screen->base.base.get_video_param = nv84_screen_get_video_param; + screen->base.base.is_video_format_supported = nv84_screen_video_supported; + } else { + /* VP3/4 */ + screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param; + screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported; + } + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, + NULL, &screen->fence.bo); + if (ret) { + NOUVEAU_ERR("Failed to allocate fence bo: %d\n", ret); + goto fail; + } + + nouveau_bo_map(screen->fence.bo, 0, NULL); + screen->fence.map = screen->fence.bo->map; + screen->base.fence.emit = nv50_screen_fence_emit; + screen->base.fence.update = nv50_screen_fence_update; + + ret = nouveau_object_new(chan, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS, + &(struct nv04_notify){ .length = 32 }, + sizeof(struct nv04_notify), &screen->sync); + if (ret) { + NOUVEAU_ERR("Failed to allocate notifier: %d\n", ret); + goto fail; + } + + ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS, + NULL, 0, &screen->m2mf); + if (ret) { + NOUVEAU_ERR("Failed to allocate PGRAPH context for M2MF: %d\n", ret); + goto fail; + } + + ret = nouveau_object_new(chan, 0xbeef502d, NV50_2D_CLASS, + NULL, 0, &screen->eng2d); + if (ret) { + NOUVEAU_ERR("Failed to allocate PGRAPH context for 2D: %d\n", ret); + goto fail; + } + + switch (dev->chipset & 0xf0) { + case 0x50: + tesla_class = NV50_3D_CLASS; + break; + case 0x80: + case 0x90: + tesla_class = NV84_3D_CLASS; + break; + case 0xa0: + switch (dev->chipset) { + case 0xa0: + case 0xaa: + case 0xac: + tesla_class = NVA0_3D_CLASS; + break; + case 0xaf: + tesla_class = NVAF_3D_CLASS; + break; + default: + tesla_class = NVA3_3D_CLASS; + break; + } + break; + default: + NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", dev->chipset); + goto fail; + } + screen->base.class_3d = tesla_class; + + ret = nouveau_object_new(chan, 0xbeef5097, tesla_class, + NULL, 0, &screen->tesla); + if (ret) { + NOUVEAU_ERR("Failed to allocate PGRAPH context for 3D: %d\n", ret); + goto fail; + } + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, + 3 << NV50_CODE_BO_SIZE_LOG2, NULL, &screen->code); + if (ret) { + NOUVEAU_ERR("Failed to allocate code bo: %d\n", ret); + goto fail; + } + + nouveau_heap_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); + nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); + nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); + + nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); + + screen->TPs = util_bitcount(value & 0xffff); + screen->MPsInTP = util_bitcount((value >> 24) & 0xf); + + stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP * + STACK_WARPS_ALLOC * 64 * 8; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, NULL, + &screen->stack_bo); + if (ret) { + NOUVEAU_ERR("Failed to allocate stack bo: %d\n", ret); + goto fail; + } + + uint64_t size_of_one_temp = util_next_power_of_two(screen->TPs) * + screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP * + ONE_TEMP_SIZE; + screen->max_tls_space = dev->vram_size / size_of_one_temp * ONE_TEMP_SIZE; + screen->max_tls_space /= 2; /* half of vram */ + + /* hw can address max 64 KiB */ + screen->max_tls_space = MIN2(screen->max_tls_space, 64 << 10); + + uint64_t tls_size; + unsigned tls_space = 4/*temps*/ * ONE_TEMP_SIZE; + ret = nv50_tls_alloc(screen, tls_space, &tls_size); + if (ret) + goto fail; + + if (nouveau_mesa_debug) + debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n", + screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16, NULL, + &screen->uniforms); + if (ret) { + NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret); + goto fail; + } + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16, NULL, + &screen->txc); + if (ret) { + NOUVEAU_ERR("Failed to allocate TIC/TSC bo: %d\n", ret); + goto fail; + } + + screen->tic.entries = CALLOC(4096, sizeof(void *)); + screen->tsc.entries = screen->tic.entries + 2048; + + if (!nv50_blitter_create(screen)) + goto fail; + + nv50_screen_init_hwctx(screen); + + nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE); + + return pscreen; + +fail: + nv50_screen_destroy(pscreen); + return NULL; +} + +int +nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry) +{ + int i = screen->tic.next; + + while (screen->tic.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1); + + screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1); + + if (screen->tic.entries[i]) + nv50_tic_entry(screen->tic.entries[i])->id = -1; + + screen->tic.entries[i] = entry; + return i; +} + +int +nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry) +{ + int i = screen->tsc.next; + + while (screen->tsc.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1); + + screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1); + + if (screen->tsc.entries[i]) + nv50_tsc_entry(screen->tsc.entries[i])->id = -1; + + screen->tsc.entries[i] = entry; + return i; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h new file mode 100644 index 00000000000..091a3921a4b --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -0,0 +1,153 @@ +#ifndef __NV50_SCREEN_H__ +#define __NV50_SCREEN_H__ + +#include "nouveau_screen.h" +#include "nouveau_fence.h" +#include "nouveau_mm.h" +#include "nouveau_heap.h" + +#include "nv50/nv50_winsys.h" +#include "nv50/nv50_stateobj.h" + +#define NV50_TIC_MAX_ENTRIES 2048 +#define NV50_TSC_MAX_ENTRIES 2048 + +/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */ +#define NV50_MAX_PIPE_CONSTBUFS 14 + +struct nv50_context; + +#define NV50_CODE_BO_SIZE_LOG2 19 + +#define NV50_SCREEN_RESIDENT_BO_COUNT 5 + +struct nv50_blitter; + +struct nv50_screen { + struct nouveau_screen base; + + struct nv50_context *cur_ctx; + + struct nouveau_bo *code; + struct nouveau_bo *uniforms; + struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ + struct nouveau_bo *stack_bo; + struct nouveau_bo *tls_bo; + + unsigned TPs; + unsigned MPsInTP; + unsigned max_tls_space; + unsigned cur_tls_space; + + struct nouveau_heap *vp_code_heap; + struct nouveau_heap *gp_code_heap; + struct nouveau_heap *fp_code_heap; + + struct nv50_blitter *blitter; + + struct { + void **entries; + int next; + uint32_t lock[NV50_TIC_MAX_ENTRIES / 32]; + } tic; + + struct { + void **entries; + int next; + uint32_t lock[NV50_TSC_MAX_ENTRIES / 32]; + } tsc; + + struct { + uint32_t *map; + struct nouveau_bo *bo; + } fence; + + struct nouveau_object *sync; + + struct nouveau_object *tesla; + struct nouveau_object *eng2d; + struct nouveau_object *m2mf; +}; + +static INLINE struct nv50_screen * +nv50_screen(struct pipe_screen *screen) +{ + return (struct nv50_screen *)screen; +} + +boolean nv50_blitter_create(struct nv50_screen *); +void nv50_blitter_destroy(struct nv50_screen *); + +int nv50_screen_tic_alloc(struct nv50_screen *, void *); +int nv50_screen_tsc_alloc(struct nv50_screen *, void *); + +static INLINE void +nv50_resource_fence(struct nv04_resource *res, uint32_t flags) +{ + struct nv50_screen *screen = nv50_screen(res->base.screen); + + if (res->mm) { + nouveau_fence_ref(screen->base.fence.current, &res->fence); + if (flags & NOUVEAU_BO_WR) + nouveau_fence_ref(screen->base.fence.current, &res->fence_wr); + } +} + +static INLINE void +nv50_resource_validate(struct nv04_resource *res, uint32_t flags) +{ + if (likely(res->bo)) { + if (flags & NOUVEAU_BO_WR) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING | + NOUVEAU_BUFFER_STATUS_DIRTY; + if (flags & NOUVEAU_BO_RD) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nv50_resource_fence(res, flags); + } +} + +struct nv50_format { + uint32_t rt; + uint32_t tic; + uint32_t vtx; + uint32_t usage; +}; + +extern const struct nv50_format nv50_format_table[]; + +static INLINE void +nv50_screen_tic_unlock(struct nv50_screen *screen, struct nv50_tic_entry *tic) +{ + if (tic->id >= 0) + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); +} + +static INLINE void +nv50_screen_tsc_unlock(struct nv50_screen *screen, struct nv50_tsc_entry *tsc) +{ + if (tsc->id >= 0) + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); +} + +static INLINE void +nv50_screen_tic_free(struct nv50_screen *screen, struct nv50_tic_entry *tic) +{ + if (tic->id >= 0) { + screen->tic.entries[tic->id] = NULL; + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); + } +} + +static INLINE void +nv50_screen_tsc_free(struct nv50_screen *screen, struct nv50_tsc_entry *tsc) +{ + if (tsc->id >= 0) { + screen->tsc.entries[tsc->id] = NULL; + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); + } +} + +extern int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space); + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c new file mode 100644 index 00000000000..9144fc48d95 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -0,0 +1,623 @@ +/* + * Copyright 2008 Ben Skeggs + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" + +#include "nv50/nv50_context.h" + +void +nv50_constbufs_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned s; + + for (s = 0; s < 3; ++s) { + unsigned p; + + if (s == PIPE_SHADER_FRAGMENT) + p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT; + else + if (s == PIPE_SHADER_GEOMETRY) + p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY; + else + p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX; + + while (nv50->constbuf_dirty[s]) { + const int i = ffs(nv50->constbuf_dirty[s]) - 1; + nv50->constbuf_dirty[s] &= ~(1 << i); + + if (nv50->constbuf[s][i].user) { + const unsigned b = NV50_CB_PVP + s; + unsigned start = 0; + unsigned words = nv50->constbuf[s][0].size / 4; + if (i) { + NOUVEAU_ERR("user constbufs only supported in slot 0\n"); + continue; + } + if (!nv50->state.uniform_buffer_bound[s]) { + nv50->state.uniform_buffer_bound[s] = TRUE; + BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); + PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); + } + while (words) { + unsigned nr; + + if (!PUSH_SPACE(push, 16)) + break; + nr = PUSH_AVAIL(push); + assert(nr >= 16); + nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); + PUSH_DATA (push, (start << 8) | b); + BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr); + PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr); + + start += nr; + words -= nr; + } + } else { + struct nv04_resource *res = + nv04_resource(nv50->constbuf[s][i].u.buf); + if (res) { + /* TODO: allocate persistent bindings */ + const unsigned b = s * 16 + i; + + assert(nouveau_resource_mapped_by_gpu(&res->base)); + + BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset); + PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset); + PUSH_DATA (push, (b << 16) | + (nv50->constbuf[s][i].size & 0xffff)); + BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); + PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); + + BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD); + } else { + BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); + PUSH_DATA (push, (i << 8) | p | 0); + } + if (i == 0) + nv50->state.uniform_buffer_bound[s] = FALSE; + } + } + } +} + +static boolean +nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog) +{ + if (!prog->translated) { + prog->translated = nv50_program_translate( + prog, nv50->screen->base.device->chipset); + if (!prog->translated) + return FALSE; + } else + if (prog->mem) + return TRUE; + + return nv50_program_upload_code(nv50, prog); +} + +static INLINE void +nv50_program_update_context_state(struct nv50_context *nv50, + struct nv50_program *prog, int stage) +{ + const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR; + + if (prog && prog->tls_space) { + if (nv50->state.new_tls_space) + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS); + if (!nv50->state.tls_required || nv50->state.new_tls_space) + BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo); + nv50->state.new_tls_space = FALSE; + nv50->state.tls_required |= 1 << stage; + } else { + if (nv50->state.tls_required == (1 << stage)) + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS); + nv50->state.tls_required &= ~(1 << stage); + } +} + +void +nv50_vertprog_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *vp = nv50->vertprog; + + if (!nv50_program_validate(nv50, vp)) + return; + nv50_program_update_context_state(nv50, vp, 0); + + BEGIN_NV04(push, NV50_3D(VP_ATTR_EN(0)), 2); + PUSH_DATA (push, vp->vp.attrs[0]); + PUSH_DATA (push, vp->vp.attrs[1]); + BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_RESULT), 1); + PUSH_DATA (push, vp->max_out); + BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_TEMP), 1); + PUSH_DATA (push, vp->max_gpr); + BEGIN_NV04(push, NV50_3D(VP_START_ID), 1); + PUSH_DATA (push, vp->code_base); +} + +void +nv50_fragprog_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *fp = nv50->fragprog; + + if (!nv50_program_validate(nv50, fp)) + return; + nv50_program_update_context_state(nv50, fp, 1); + + BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1); + PUSH_DATA (push, fp->max_gpr); + BEGIN_NV04(push, NV50_3D(FP_RESULT_COUNT), 1); + PUSH_DATA (push, fp->max_out); + BEGIN_NV04(push, NV50_3D(FP_CONTROL), 1); + PUSH_DATA (push, fp->fp.flags[0]); + BEGIN_NV04(push, NV50_3D(FP_CTRL_UNK196C), 1); + PUSH_DATA (push, fp->fp.flags[1]); + BEGIN_NV04(push, NV50_3D(FP_START_ID), 1); + PUSH_DATA (push, fp->code_base); +} + +void +nv50_gmtyprog_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *gp = nv50->gmtyprog; + + if (gp) { + BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_TEMP), 1); + PUSH_DATA (push, gp->max_gpr); + BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_RESULT), 1); + PUSH_DATA (push, gp->max_out); + BEGIN_NV04(push, NV50_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1); + PUSH_DATA (push, gp->gp.prim_type); + BEGIN_NV04(push, NV50_3D(GP_VERTEX_OUTPUT_COUNT), 1); + PUSH_DATA (push, gp->gp.vert_count); + BEGIN_NV04(push, NV50_3D(GP_START_ID), 1); + PUSH_DATA (push, gp->code_base); + + nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */ + } + nv50_program_update_context_state(nv50, gp, 2); + + /* GP_ENABLE is updated in linkage validation */ +} + +static void +nv50_sprite_coords_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + uint32_t pntc[8], mode; + struct nv50_program *fp = nv50->fragprog; + unsigned i, c; + unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff; + + if (!nv50->rast->pipe.point_quad_rasterization) { + if (nv50->state.point_sprite) { + BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8); + for (i = 0; i < 8; ++i) + PUSH_DATA(push, 0); + + nv50->state.point_sprite = FALSE; + } + return; + } else { + nv50->state.point_sprite = TRUE; + } + + memset(pntc, 0, sizeof(pntc)); + + for (i = 0; i < fp->in_nr; i++) { + unsigned n = util_bitcount(fp->in[i].mask); + + if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) { + m += n; + continue; + } + if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) { + m += n; + continue; + } + + for (c = 0; c < 4; ++c) { + if (fp->in[i].mask & (1 << c)) { + pntc[m / 8] |= (c + 1) << ((m % 8) * 4); + ++m; + } + } + } + + if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) + mode = 0x00; + else + mode = 0x10; + + BEGIN_NV04(push, NV50_3D(POINT_SPRITE_CTRL), 1); + PUSH_DATA (push, mode); + + BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8); + PUSH_DATAp(push, pntc, 8); +} + +/* Validate state derived from shaders and the rasterizer cso. */ +void +nv50_validate_derived_rs(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + uint32_t color, psize; + + nv50_sprite_coords_validate(nv50); + + if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) { + nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard; + BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1); + PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard); + } + + if (nv50->dirty & NV50_NEW_FRAGPROG) + return; + psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK; + color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN; + + if (nv50->rast->pipe.clamp_vertex_color) + color |= NV50_3D_SEMANTIC_COLOR_CLMP_EN; + + if (color != nv50->state.semantic_color) { + nv50->state.semantic_color = color; + BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 1); + PUSH_DATA (push, color); + } + + if (nv50->rast->pipe.point_size_per_vertex) + psize |= NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK; + + if (psize != nv50->state.semantic_psize) { + nv50->state.semantic_psize = psize; + BEGIN_NV04(push, NV50_3D(SEMANTIC_PTSZ), 1); + PUSH_DATA (push, psize); + } +} + +static int +nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4], + struct nv50_varying *in, struct nv50_varying *out) +{ + int c; + uint8_t mv = out->mask, mf = in->mask, oid = out->hw; + + for (c = 0; c < 4; ++c) { + if (mf & 1) { + if (in->linear) + lin[mid / 32] |= 1 << (mid % 32); + if (mv & 1) + map[mid] = oid; + else + if (c == 3) + map[mid] |= 1; + ++mid; + } + + oid += mv & 1; + mf >>= 1; + mv >>= 1; + } + + return mid; +} + +void +nv50_fp_linkage_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog; + struct nv50_program *fp = nv50->fragprog; + struct nv50_varying dummy; + int i, n, c, m; + uint32_t primid = 0; + uint32_t psiz = 0x000; + uint32_t interp = fp->fp.interp; + uint32_t colors = fp->fp.colors; + uint32_t lin[4]; + uint8_t map[64]; + uint8_t so_map[64]; + + if (!(nv50->dirty & (NV50_NEW_VERTPROG | + NV50_NEW_FRAGPROG | + NV50_NEW_GMTYPROG))) { + uint8_t bfc, ffc; + ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK); + bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK) + >> 8; + if (nv50->rast->pipe.light_twoside == ((ffc == bfc) ? 0 : 1)) + return; + } + + memset(lin, 0x00, sizeof(lin)); + + /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx + * or is it the first byte ? + */ + memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map)); + + dummy.mask = 0xf; /* map all components of HPOS */ + dummy.linear = 0; + m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]); + + for (c = 0; c < vp->vp.clpd_nr; ++c) + map[m++] = vp->vp.clpd[c / 4] + (c % 4); + + colors |= m << 8; /* adjust BFC0 id */ + + dummy.mask = 0x0; + + /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */ + if (nv50->rast->pipe.light_twoside) { + for (i = 0; i < 2; ++i) { + n = vp->vp.bfc[i]; + if (fp->vp.bfc[i] >= fp->in_nr) + continue; + m = nv50_vec4_map(map, m, lin, &fp->in[fp->vp.bfc[i]], + (n < vp->out_nr) ? &vp->out[n] : &dummy); + } + } + colors += m - 4; /* adjust FFC0 id */ + interp |= m << 8; /* set map id where 'normal' FP inputs start */ + + for (i = 0; i < fp->in_nr; ++i) { + for (n = 0; n < vp->out_nr; ++n) + if (vp->out[n].sn == fp->in[i].sn && + vp->out[n].si == fp->in[i].si) + break; + m = nv50_vec4_map(map, m, lin, + &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy); + } + + /* PrimitiveID either is replaced by the system value, or + * written by the geometry shader into an output register + */ + if (fp->gp.primid < 0x80) { + primid = m; + map[m++] = vp->gp.primid; + } + + if (nv50->rast->pipe.point_size_per_vertex) { + psiz = (m << 4) | 1; + map[m++] = vp->vp.psiz; + } + + if (nv50->rast->pipe.clamp_vertex_color) + colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN; + + if (unlikely(vp->so)) { + /* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP + * gets written. + * + * TODO: + * Inverting vp->so->map (output -> offset) would probably speed this up. + */ + memset(so_map, 0, sizeof(so_map)); + for (i = 0; i < vp->so->map_size; ++i) { + if (vp->so->map[i] == 0xff) + continue; + for (c = 0; c < m; ++c) + if (map[c] == vp->so->map[i] && !so_map[c]) + break; + if (c == m) { + c = m; + map[m++] = vp->so->map[i]; + } + so_map[c] = 0x80 | i; + } + for (c = m; c & 3; ++c) + so_map[c] = 0; + } + + n = (m + 3) / 4; + assert(m <= 64); + + if (unlikely(nv50->gmtyprog)) { + BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP_SIZE), 1); + PUSH_DATA (push, m); + BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP(0)), n); + PUSH_DATAp(push, map, n); + } else { + BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1); + PUSH_DATA (push, vp->vp.attrs[2]); + + BEGIN_NV04(push, NV50_3D(SEMANTIC_PRIM_ID), 1); + PUSH_DATA (push, primid); + + BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1); + PUSH_DATA (push, m); + BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n); + PUSH_DATAp(push, map, n); + } + + BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 4); + PUSH_DATA (push, colors); + PUSH_DATA (push, (vp->vp.clpd_nr << 8) | 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, psiz); + + BEGIN_NV04(push, NV50_3D(FP_INTERPOLANT_CTRL), 1); + PUSH_DATA (push, interp); + + nv50->state.interpolant_ctrl = interp; + + nv50->state.semantic_color = colors; + nv50->state.semantic_psize = psiz; + + BEGIN_NV04(push, NV50_3D(NOPERSPECTIVE_BITMAP(0)), 4); + PUSH_DATAp(push, lin, 4); + + BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1); + PUSH_DATA (push, nv50->gmtyprog ? 1 : 0); + + if (vp->so) { + BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n); + PUSH_DATAp(push, so_map, n); + } +} + +static int +nv50_vp_gp_mapping(uint8_t *map, int m, + struct nv50_program *vp, struct nv50_program *gp) +{ + int i, j, c; + + for (i = 0; i < gp->in_nr; ++i) { + uint8_t oid = 0, mv = 0, mg = gp->in[i].mask; + + for (j = 0; j < vp->out_nr; ++j) { + if (vp->out[j].sn == gp->in[i].sn && + vp->out[j].si == gp->in[i].si) { + mv = vp->out[j].mask; + oid = vp->out[j].hw; + break; + } + } + + for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) { + if (mg & mv & 1) + map[m++] = oid; + else + if (mg & 1) + map[m++] = (c == 3) ? 0x41 : 0x40; + oid += mv & 1; + } + } + return m; +} + +void +nv50_gp_linkage_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *vp = nv50->vertprog; + struct nv50_program *gp = nv50->gmtyprog; + int m = 0; + int n; + uint8_t map[64]; + + if (!gp) + return; + memset(map, 0, sizeof(map)); + + m = nv50_vp_gp_mapping(map, m, vp, gp); + + n = (m + 3) / 4; + + BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1); + PUSH_DATA (push, vp->vp.attrs[2] | gp->vp.attrs[2]); + + BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1); + PUSH_DATA (push, m); + BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n); + PUSH_DATAp(push, map, n); +} + +void +nv50_stream_output_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_stream_output_state *so; + uint32_t ctrl; + unsigned i; + unsigned prims = ~0; + + so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so; + + BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1); + PUSH_DATA (push, 0); + if (!so || !nv50->num_so_targets) { + if (nv50->screen->base.class_3d < NVA0_3D_CLASS) { + BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1); + PUSH_DATA (push, 0); + } + BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1); + PUSH_DATA (push, 1); + return; + } + + /* previous TFB needs to complete */ + if (nv50->screen->base.class_3d < NVA0_3D_CLASS) { + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + } + + ctrl = so->ctrl; + if (nv50->screen->base.class_3d >= NVA0_3D_CLASS) + ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET; + + BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1); + PUSH_DATA (push, ctrl); + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO); + + for (i = 0; i < nv50->num_so_targets; ++i) { + struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]); + struct nv04_resource *buf = nv04_resource(targ->pipe.buffer); + + const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3; + + if (n == 4 && !targ->clean) + nv84_query_fifo_wait(push, targ->pq); + BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n); + PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset); + PUSH_DATA (push, buf->address + targ->pipe.buffer_offset); + PUSH_DATA (push, so->num_attribs[i]); + if (n == 4) { + PUSH_DATA(push, targ->pipe.buffer_size); + + BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1); + if (!targ->clean) { + assert(targ->pq); + nv50_query_pushbuf_submit(push, targ->pq, 0x4); + } else { + PUSH_DATA(push, 0); + targ->clean = FALSE; + } + } else { + const unsigned limit = targ->pipe.buffer_size / + (so->stride[i] * nv50->state.prim_size); + prims = MIN2(prims, limit); + } + BCTX_REFN(nv50->bufctx_3d, SO, buf, WR); + } + if (prims != ~0) { + BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1); + PUSH_DATA (push, prims); + } + BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1); + PUSH_DATA (push, 1); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c new file mode 100644 index 00000000000..7dceb51c19e --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -0,0 +1,1110 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "util/u_helpers.h" +#include "util/u_inlines.h" +#include "util/u_transfer.h" +#include "util/u_format_srgb.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv50/nv50_stateobj.h" +#include "nv50/nv50_context.h" + +#include "nv50/nv50_3d.xml.h" +#include "nv50/nv50_texture.xml.h" + +#include "nouveau_gldefs.h" + +/* Caveats: + * ! pipe_sampler_state.normalized_coords is ignored - rectangle textures will + * use non-normalized coordinates, everything else won't + * (The relevant bit is in the TIC entry and not the TSC entry.) + * + * ! pipe_sampler_state.seamless_cube_map is ignored - seamless filtering is + * always activated on NVA0 + + * (Give me the global bit, otherwise it's not worth the CPU work.) + * + * ! pipe_sampler_state.border_color is not swizzled according to the texture + * swizzle in pipe_sampler_view + * (This will be ugly with indirect independent texture/sampler access, + * we'd have to emulate the logic in the shader. GL doesn't have that, + * D3D doesn't have swizzle, if we knew what we were implementing we'd be + * good.) + * + * ! pipe_rasterizer_state.line_last_pixel is ignored - it is never drawn + * + * ! pipe_rasterizer_state.flatshade_first also applies to QUADS + * (There's a GL query for that, forcing an exception is just ridiculous.) + * + * ! pipe_rasterizer_state.half_pixel_center is ignored - pixel centers + * are always at half integer coordinates and the top-left rule applies + * (There does not seem to be a hardware switch for this.) + * + * ! pipe_rasterizer_state.sprite_coord_enable is masked with 0xff on NVC0 + * (The hardware only has 8 slots meant for TexCoord and we have to assign + * in advance to maintain elegant separate shader objects.) + */ + +static INLINE uint32_t +nv50_colormask(unsigned mask) +{ + uint32_t ret = 0; + + if (mask & PIPE_MASK_R) + ret |= 0x0001; + if (mask & PIPE_MASK_G) + ret |= 0x0010; + if (mask & PIPE_MASK_B) + ret |= 0x0100; + if (mask & PIPE_MASK_A) + ret |= 0x1000; + + return ret; +} + +#define NV50_BLEND_FACTOR_CASE(a, b) \ + case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b + +static INLINE uint32_t +nv50_blend_fac(unsigned factor) +{ + switch (factor) { + NV50_BLEND_FACTOR_CASE(ONE, ONE); + NV50_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR); + NV50_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA); + NV50_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA); + NV50_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR); + NV50_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE); + NV50_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR); + NV50_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA); + NV50_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR); + NV50_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA); + NV50_BLEND_FACTOR_CASE(ZERO, ZERO); + NV50_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR); + NV50_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA); + NV50_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA); + NV50_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR); + NV50_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR); + NV50_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA); + NV50_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR); + NV50_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA); + default: + return NV50_3D_BLEND_FACTOR_ZERO; + } +} + +static void * +nv50_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj); + int i; + boolean emit_common_func = cso->rt[0].blend_enable; + uint32_t ms; + + if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) { + SB_BEGIN_3D(so, BLEND_INDEPENDENT, 1); + SB_DATA (so, cso->independent_blend_enable); + } + + so->pipe = *cso; + + SB_BEGIN_3D(so, COLOR_MASK_COMMON, 1); + SB_DATA (so, !cso->independent_blend_enable); + + SB_BEGIN_3D(so, BLEND_ENABLE_COMMON, 1); + SB_DATA (so, !cso->independent_blend_enable); + + if (cso->independent_blend_enable) { + SB_BEGIN_3D(so, BLEND_ENABLE(0), 8); + for (i = 0; i < 8; ++i) { + SB_DATA(so, cso->rt[i].blend_enable); + if (cso->rt[i].blend_enable) + emit_common_func = TRUE; + } + + if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) { + emit_common_func = FALSE; + + for (i = 0; i < 8; ++i) { + if (!cso->rt[i].blend_enable) + continue; + SB_BEGIN_3D_(so, NVA3_3D_IBLEND_EQUATION_RGB(i), 6); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func)); + SB_DATA (so, nv50_blend_fac(cso->rt[i].rgb_src_factor)); + SB_DATA (so, nv50_blend_fac(cso->rt[i].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func)); + SB_DATA (so, nv50_blend_fac(cso->rt[i].alpha_src_factor)); + SB_DATA (so, nv50_blend_fac(cso->rt[i].alpha_dst_factor)); + } + } + } else { + SB_BEGIN_3D(so, BLEND_ENABLE(0), 1); + SB_DATA (so, cso->rt[0].blend_enable); + } + + if (emit_common_func) { + SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5); + SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); + SB_DATA (so, nv50_blend_fac(cso->rt[0].rgb_src_factor)); + SB_DATA (so, nv50_blend_fac(cso->rt[0].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[0].alpha_func)); + SB_DATA (so, nv50_blend_fac(cso->rt[0].alpha_src_factor)); + SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1); + SB_DATA (so, nv50_blend_fac(cso->rt[0].alpha_dst_factor)); + } + + if (cso->logicop_enable) { + SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2); + SB_DATA (so, 1); + SB_DATA (so, nvgl_logicop_func(cso->logicop_func)); + } else { + SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 1); + SB_DATA (so, 0); + } + + if (cso->independent_blend_enable) { + SB_BEGIN_3D(so, COLOR_MASK(0), 8); + for (i = 0; i < 8; ++i) + SB_DATA(so, nv50_colormask(cso->rt[i].colormask)); + } else { + SB_BEGIN_3D(so, COLOR_MASK(0), 1); + SB_DATA (so, nv50_colormask(cso->rt[0].colormask)); + } + + ms = 0; + if (cso->alpha_to_coverage) + ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE; + if (cso->alpha_to_one) + ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE; + + SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1); + SB_DATA (so, ms); + + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); + return so; +} + +static void +nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->blend = hwcso; + nv50->dirty |= NV50_NEW_BLEND; +} + +static void +nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +/* NOTE: ignoring line_last_pixel, using FALSE (set on screen init) */ +static void * +nv50_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv50_rasterizer_stateobj *so; + uint32_t reg; + + so = CALLOC_STRUCT(nv50_rasterizer_stateobj); + if (!so) + return NULL; + so->pipe = *cso; + +#ifndef NV50_SCISSORS_CLIPPING + SB_BEGIN_3D(so, SCISSOR_ENABLE(0), 1); + SB_DATA (so, cso->scissor); +#endif + + SB_BEGIN_3D(so, SHADE_MODEL, 1); + SB_DATA (so, cso->flatshade ? NV50_3D_SHADE_MODEL_FLAT : + NV50_3D_SHADE_MODEL_SMOOTH); + SB_BEGIN_3D(so, PROVOKING_VERTEX_LAST, 1); + SB_DATA (so, !cso->flatshade_first); + SB_BEGIN_3D(so, VERTEX_TWO_SIDE_ENABLE, 1); + SB_DATA (so, cso->light_twoside); + + SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1); + SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000); + + SB_BEGIN_3D(so, MULTISAMPLE_ENABLE, 1); + SB_DATA (so, cso->multisample); + + SB_BEGIN_3D(so, LINE_WIDTH, 1); + SB_DATA (so, fui(cso->line_width)); + SB_BEGIN_3D(so, LINE_SMOOTH_ENABLE, 1); + SB_DATA (so, cso->line_smooth); + + SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1); + if (cso->line_stipple_enable) { + SB_DATA (so, 1); + SB_BEGIN_3D(so, LINE_STIPPLE, 1); + SB_DATA (so, (cso->line_stipple_pattern << 8) | + cso->line_stipple_factor); + } else { + SB_DATA (so, 0); + } + + if (!cso->point_size_per_vertex) { + SB_BEGIN_3D(so, POINT_SIZE, 1); + SB_DATA (so, fui(cso->point_size)); + } + SB_BEGIN_3D(so, POINT_SPRITE_ENABLE, 1); + SB_DATA (so, cso->point_quad_rasterization); + SB_BEGIN_3D(so, POINT_SMOOTH_ENABLE, 1); + SB_DATA (so, cso->point_smooth); + + SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 3); + SB_DATA (so, nvgl_polygon_mode(cso->fill_front)); + SB_DATA (so, nvgl_polygon_mode(cso->fill_back)); + SB_DATA (so, cso->poly_smooth); + + SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3); + SB_DATA (so, cso->cull_face != PIPE_FACE_NONE); + SB_DATA (so, cso->front_ccw ? NV50_3D_FRONT_FACE_CCW : + NV50_3D_FRONT_FACE_CW); + switch (cso->cull_face) { + case PIPE_FACE_FRONT_AND_BACK: + SB_DATA(so, NV50_3D_CULL_FACE_FRONT_AND_BACK); + break; + case PIPE_FACE_FRONT: + SB_DATA(so, NV50_3D_CULL_FACE_FRONT); + break; + case PIPE_FACE_BACK: + default: + SB_DATA(so, NV50_3D_CULL_FACE_BACK); + break; + } + + SB_BEGIN_3D(so, POLYGON_STIPPLE_ENABLE, 1); + SB_DATA (so, cso->poly_stipple_enable); + SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3); + SB_DATA (so, cso->offset_point); + SB_DATA (so, cso->offset_line); + SB_DATA (so, cso->offset_tri); + + if (cso->offset_point || cso->offset_line || cso->offset_tri) { + SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1); + SB_DATA (so, fui(cso->offset_scale)); + SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1); + SB_DATA (so, fui(cso->offset_units * 2.0f)); + SB_BEGIN_3D(so, POLYGON_OFFSET_CLAMP, 1); + SB_DATA (so, fui(cso->offset_clamp)); + } + + if (cso->depth_clip) { + reg = 0; + } else { + reg = + NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR | + NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR | + NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1; + } +#ifndef NV50_SCISSORS_CLIPPING + reg |= + NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 | + NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1; +#endif + SB_BEGIN_3D(so, VIEW_VOLUME_CLIP_CTRL, 1); + SB_DATA (so, reg); + + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; +} + +static void +nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->rast = hwcso; + nv50->dirty |= NV50_NEW_RASTERIZER; +} + +static void +nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nv50_zsa_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv50_zsa_stateobj *so = CALLOC_STRUCT(nv50_zsa_stateobj); + + so->pipe = *cso; + + SB_BEGIN_3D(so, DEPTH_WRITE_ENABLE, 1); + SB_DATA (so, cso->depth.writemask); + SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1); + if (cso->depth.enabled) { + SB_DATA (so, 1); + SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1); + SB_DATA (so, nvgl_comparison_op(cso->depth.func)); + } else { + SB_DATA (so, 0); + } + + if (cso->stencil[0].enabled) { + SB_BEGIN_3D(so, STENCIL_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func)); + SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2); + SB_DATA (so, cso->stencil[0].writemask); + SB_DATA (so, cso->stencil[0].valuemask); + } else { + SB_BEGIN_3D(so, STENCIL_ENABLE, 1); + SB_DATA (so, 0); + } + + if (cso->stencil[1].enabled) { + assert(cso->stencil[0].enabled); + SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func)); + SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2); + SB_DATA (so, cso->stencil[1].writemask); + SB_DATA (so, cso->stencil[1].valuemask); + } else { + SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 1); + SB_DATA (so, 0); + } + + SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1); + if (cso->alpha.enabled) { + SB_DATA (so, 1); + SB_BEGIN_3D(so, ALPHA_TEST_REF, 2); + SB_DATA (so, fui(cso->alpha.ref_value)); + SB_DATA (so, nvgl_comparison_op(cso->alpha.func)); + } else { + SB_DATA (so, 0); + } + + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; +} + +static void +nv50_zsa_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->zsa = hwcso; + nv50->dirty |= NV50_NEW_ZSA; +} + +static void +nv50_zsa_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +/* ====================== SAMPLERS AND TEXTURES ================================ + */ + +#define NV50_TSC_WRAP_CASE(n) \ + case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n + +static INLINE unsigned +nv50_tsc_wrap_mode(unsigned wrap) +{ + switch (wrap) { + NV50_TSC_WRAP_CASE(REPEAT); + NV50_TSC_WRAP_CASE(MIRROR_REPEAT); + NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(CLAMP); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP); + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + return NV50_TSC_WRAP_REPEAT; + } +} + +void * +nv50_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv50_tsc_entry *so = MALLOC_STRUCT(nv50_tsc_entry); + float f[2]; + + so->id = -1; + + so->tsc[0] = (0x00026000 | + (nv50_tsc_wrap_mode(cso->wrap_s) << 0) | + (nv50_tsc_wrap_mode(cso->wrap_t) << 3) | + (nv50_tsc_wrap_mode(cso->wrap_r) << 6)); + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + so->tsc[1] = NV50_TSC_1_MAGF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + so->tsc[1] = NV50_TSC_1_MAGF_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + so->tsc[1] |= NV50_TSC_1_MINF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + so->tsc[1] |= NV50_TSC_1_MINF_NEAREST; + break; + } + + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_LINEAR: + so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + so->tsc[1] |= NV50_TSC_1_MIPF_NONE; + break; + } + + if (nouveau_screen(pipe->screen)->class_3d >= NVE4_3D_CLASS) { + if (cso->seamless_cube_map) + so->tsc[1] |= NVE4_TSC_1_CUBE_SEAMLESS; + if (!cso->normalized_coords) + so->tsc[1] |= NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS; + } + + if (cso->max_anisotropy >= 16) + so->tsc[0] |= (7 << 20); + else + if (cso->max_anisotropy >= 12) + so->tsc[0] |= (6 << 20); + else { + so->tsc[0] |= (cso->max_anisotropy >> 1) << 20; + + if (cso->max_anisotropy >= 4) + so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35; + else + if (cso->max_anisotropy >= 2) + so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15; + } + + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + /* NOTE: must be deactivated for non-shadow textures */ + so->tsc[0] |= (1 << 9); + so->tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10; + } + + f[0] = CLAMP(cso->lod_bias, -16.0f, 15.0f); + so->tsc[1] |= ((int)(f[0] * 256.0f) & 0x1fff) << 12; + + f[0] = CLAMP(cso->min_lod, 0.0f, 15.0f); + f[1] = CLAMP(cso->max_lod, 0.0f, 15.0f); + so->tsc[2] = + (((int)(f[1] * 256.0f) & 0xfff) << 12) | ((int)(f[0] * 256.0f) & 0xfff); + + so->tsc[2] |= + util_format_linear_float_to_srgb_8unorm(cso->border_color.f[0]) << 24; + so->tsc[3] = + util_format_linear_float_to_srgb_8unorm(cso->border_color.f[1]) << 12; + so->tsc[3] |= + util_format_linear_float_to_srgb_8unorm(cso->border_color.f[2]) << 20; + + so->tsc[4] = fui(cso->border_color.f[0]); + so->tsc[5] = fui(cso->border_color.f[1]); + so->tsc[6] = fui(cso->border_color.f[2]); + so->tsc[7] = fui(cso->border_color.f[3]); + + return (void *)so; +} + +static void +nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + unsigned s, i; + + for (s = 0; s < 3; ++s) + for (i = 0; i < nv50_context(pipe)->num_samplers[s]; ++i) + if (nv50_context(pipe)->samplers[s][i] == hwcso) + nv50_context(pipe)->samplers[s][i] = NULL; + + nv50_screen_tsc_free(nv50_context(pipe)->screen, nv50_tsc_entry(hwcso)); + + FREE(hwcso); +} + +static INLINE void +nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s, + unsigned nr, void **hwcso) +{ + unsigned i; + + for (i = 0; i < nr; ++i) { + struct nv50_tsc_entry *old = nv50->samplers[s][i]; + + nv50->samplers[s][i] = nv50_tsc_entry(hwcso[i]); + if (old) + nv50_screen_tsc_unlock(nv50->screen, old); + } + for (; i < nv50->num_samplers[s]; ++i) + if (nv50->samplers[s][i]) + nv50_screen_tsc_unlock(nv50->screen, nv50->samplers[s][i]); + + nv50->num_samplers[s] = nr; + + nv50->dirty |= NV50_NEW_SAMPLERS; +} + +static void +nv50_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nv50_stage_sampler_states_bind(nv50_context(pipe), 0, nr, s); +} + +static void +nv50_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nv50_stage_sampler_states_bind(nv50_context(pipe), 2, nr, s); +} + +static void +nv50_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nv50_stage_sampler_states_bind(nv50_context(pipe), 1, nr, s); +} + +/* NOTE: only called when not referenced anywhere, won't be bound */ +static void +nv50_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + + nv50_screen_tic_free(nv50_context(pipe)->screen, nv50_tic_entry(view)); + + FREE(nv50_tic_entry(view)); +} + +static INLINE void +nv50_stage_set_sampler_views(struct nv50_context *nv50, int s, + unsigned nr, + struct pipe_sampler_view **views) +{ + unsigned i; + + for (i = 0; i < nr; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]); + if (old) + nv50_screen_tic_unlock(nv50->screen, old); + + pipe_sampler_view_reference(&nv50->textures[s][i], views[i]); + } + + for (i = nr; i < nv50->num_textures[s]; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]); + if (!old) + continue; + nv50_screen_tic_unlock(nv50->screen, old); + + pipe_sampler_view_reference(&nv50->textures[s][i], NULL); + } + + nv50->num_textures[s] = nr; + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + + nv50->dirty |= NV50_NEW_TEXTURES; +} + +static void +nv50_vp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nv50_stage_set_sampler_views(nv50_context(pipe), 0, nr, views); +} + +static void +nv50_fp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nv50_stage_set_sampler_views(nv50_context(pipe), 2, nr, views); +} + +static void +nv50_gp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nv50_stage_set_sampler_views(nv50_context(pipe), 1, nr, views); +} + +/* ============================= SHADERS ======================================= + */ + +static void * +nv50_sp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso, unsigned type) +{ + struct nv50_program *prog; + + prog = CALLOC_STRUCT(nv50_program); + if (!prog) + return NULL; + + prog->type = type; + prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + if (cso->stream_output.num_outputs) + prog->pipe.stream_output = cso->stream_output; + + return (void *)prog; +} + +static void +nv50_sp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_program *prog = (struct nv50_program *)hwcso; + + nv50_program_destroy(nv50_context(pipe), prog); + + FREE((void *)prog->pipe.tokens); + FREE(prog); +} + +static void * +nv50_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nv50_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX); +} + +static void +nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->vertprog = hwcso; + nv50->dirty |= NV50_NEW_VERTPROG; +} + +static void * +nv50_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nv50_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT); +} + +static void +nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->fragprog = hwcso; + nv50->dirty |= NV50_NEW_FRAGPROG; +} + +static void * +nv50_gp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nv50_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY); +} + +static void +nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->gmtyprog = hwcso; + nv50->dirty |= NV50_NEW_GMTYPROG; +} + +static void +nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + struct pipe_constant_buffer *cb) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct pipe_resource *res = cb ? cb->buffer : NULL; + const unsigned s = nv50_context_shader_stage(shader); + const unsigned i = index; + + if (shader == PIPE_SHADER_COMPUTE) + return; + + if (nv50->constbuf[s][i].user) + nv50->constbuf[s][i].u.buf = NULL; + else + if (nv50->constbuf[s][i].u.buf) + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i)); + + pipe_resource_reference(&nv50->constbuf[s][i].u.buf, res); + + nv50->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE; + if (nv50->constbuf[s][i].user) { + nv50->constbuf[s][i].u.data = cb->user_buffer; + nv50->constbuf[s][i].size = cb->buffer_size; + nv50->constbuf_valid[s] |= 1 << i; + } else + if (res) { + nv50->constbuf[s][i].offset = cb->buffer_offset; + nv50->constbuf[s][i].size = align(cb->buffer_size, 0x100); + nv50->constbuf_valid[s] |= 1 << i; + } else { + nv50->constbuf_valid[s] &= ~(1 << i); + } + nv50->constbuf_dirty[s] |= 1 << i; + + nv50->dirty |= NV50_NEW_CONSTBUF; +} + +/* ============================================================================= + */ + +static void +nv50_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->blend_colour = *bcol; + nv50->dirty |= NV50_NEW_BLEND_COLOUR; +} + +static void +nv50_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *sr) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->stencil_ref = *sr; + nv50->dirty |= NV50_NEW_STENCIL_REF; +} + +static void +nv50_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + memcpy(nv50->clip.ucp, clip->ucp, sizeof(clip->ucp)); + + nv50->dirty |= NV50_NEW_CLIP; +} + +static void +nv50_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->sample_mask = sample_mask; + nv50->dirty |= NV50_NEW_SAMPLE_MASK; +} + + +static void +nv50_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv50_context *nv50 = nv50_context(pipe); + unsigned i; + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + + for (i = 0; i < fb->nr_cbufs; ++i) + pipe_surface_reference(&nv50->framebuffer.cbufs[i], fb->cbufs[i]); + for (; i < nv50->framebuffer.nr_cbufs; ++i) + pipe_surface_reference(&nv50->framebuffer.cbufs[i], NULL); + + nv50->framebuffer.nr_cbufs = fb->nr_cbufs; + + nv50->framebuffer.width = fb->width; + nv50->framebuffer.height = fb->height; + + pipe_surface_reference(&nv50->framebuffer.zsbuf, fb->zsbuf); + + nv50->dirty |= NV50_NEW_FRAMEBUFFER; +} + +static void +nv50_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->stipple = *stipple; + nv50->dirty |= NV50_NEW_STIPPLE; +} + +static void +nv50_set_scissor_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *scissor) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->scissor = *scissor; + nv50->dirty |= NV50_NEW_SCISSOR; +} + +static void +nv50_set_viewport_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *vpt) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->viewport = *vpt; + nv50->dirty |= NV50_NEW_VIEWPORT; +} + +static void +nv50_set_vertex_buffers(struct pipe_context *pipe, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv50_context *nv50 = nv50_context(pipe); + unsigned i; + + util_set_vertex_buffers_count(nv50->vtxbuf, &nv50->num_vtxbufs, vb, + start_slot, count); + + if (!vb) { + nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot); + nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot); + return; + } + + for (i = 0; i < count; ++i) { + unsigned dst_index = start_slot + i; + + if (!vb[i].buffer && vb[i].user_buffer) { + nv50->vbo_user |= 1 << dst_index; + if (!vb[i].stride) + nv50->vbo_constant |= 1 << dst_index; + else + nv50->vbo_constant &= ~(1 << dst_index); + } else { + nv50->vbo_user &= ~(1 << dst_index); + nv50->vbo_constant &= ~(1 << dst_index); + } + } + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX); + + nv50->dirty |= NV50_NEW_ARRAYS; +} + +static void +nv50_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + if (nv50->idxbuf.buffer) + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX); + + if (ib) { + pipe_resource_reference(&nv50->idxbuf.buffer, ib->buffer); + nv50->idxbuf.index_size = ib->index_size; + if (ib->buffer) { + nv50->idxbuf.offset = ib->offset; + BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(ib->buffer), RD); + } else { + nv50->idxbuf.user_buffer = ib->user_buffer; + } + } else { + pipe_resource_reference(&nv50->idxbuf.buffer, NULL); + } +} + +static void +nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->vertex = hwcso; + nv50->dirty |= NV50_NEW_VERTEX; +} + +static struct pipe_stream_output_target * +nv50_so_target_create(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size) +{ + struct nv50_so_target *targ = MALLOC_STRUCT(nv50_so_target); + if (!targ) + return NULL; + + if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) { + targ->pq = pipe->create_query(pipe, + NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET); + if (!targ->pq) { + FREE(targ); + return NULL; + } + } else { + targ->pq = NULL; + } + targ->clean = TRUE; + + targ->pipe.buffer_size = size; + targ->pipe.buffer_offset = offset; + targ->pipe.context = pipe; + targ->pipe.buffer = NULL; + pipe_resource_reference(&targ->pipe.buffer, res); + pipe_reference_init(&targ->pipe.reference, 1); + + return &targ->pipe; +} + +static void +nv50_so_target_destroy(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg) +{ + struct nv50_so_target *targ = nv50_so_target(ptarg); + if (targ->pq) + pipe->destroy_query(pipe, targ->pq); + pipe_resource_reference(&targ->pipe.buffer, NULL); + FREE(targ); +} + +static void +nv50_set_stream_output_targets(struct pipe_context *pipe, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_mask) +{ + struct nv50_context *nv50 = nv50_context(pipe); + unsigned i; + boolean serialize = TRUE; + const boolean can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS; + + assert(num_targets <= 4); + + for (i = 0; i < num_targets; ++i) { + const boolean changed = nv50->so_target[i] != targets[i]; + if (!changed && (append_mask & (1 << i))) + continue; + nv50->so_targets_dirty |= 1 << i; + + if (can_resume && changed && nv50->so_target[i]) { + nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize); + serialize = FALSE; + } + + if (targets[i] && !(append_mask & (1 << i))) + nv50_so_target(targets[i])->clean = TRUE; + + pipe_so_target_reference(&nv50->so_target[i], targets[i]); + } + for (; i < nv50->num_so_targets; ++i) { + if (can_resume && nv50->so_target[i]) { + nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize); + serialize = FALSE; + } + pipe_so_target_reference(&nv50->so_target[i], NULL); + nv50->so_targets_dirty |= 1 << i; + } + nv50->num_so_targets = num_targets; + + if (nv50->so_targets_dirty) + nv50->dirty |= NV50_NEW_STRMOUT; +} + +void +nv50_init_state_functions(struct nv50_context *nv50) +{ + struct pipe_context *pipe = &nv50->base.pipe; + + pipe->create_blend_state = nv50_blend_state_create; + pipe->bind_blend_state = nv50_blend_state_bind; + pipe->delete_blend_state = nv50_blend_state_delete; + + pipe->create_rasterizer_state = nv50_rasterizer_state_create; + pipe->bind_rasterizer_state = nv50_rasterizer_state_bind; + pipe->delete_rasterizer_state = nv50_rasterizer_state_delete; + + pipe->create_depth_stencil_alpha_state = nv50_zsa_state_create; + pipe->bind_depth_stencil_alpha_state = nv50_zsa_state_bind; + pipe->delete_depth_stencil_alpha_state = nv50_zsa_state_delete; + + pipe->create_sampler_state = nv50_sampler_state_create; + pipe->delete_sampler_state = nv50_sampler_state_delete; + pipe->bind_vertex_sampler_states = nv50_vp_sampler_states_bind; + pipe->bind_fragment_sampler_states = nv50_fp_sampler_states_bind; + pipe->bind_geometry_sampler_states = nv50_gp_sampler_states_bind; + + pipe->create_sampler_view = nv50_create_sampler_view; + pipe->sampler_view_destroy = nv50_sampler_view_destroy; + pipe->set_vertex_sampler_views = nv50_vp_set_sampler_views; + pipe->set_fragment_sampler_views = nv50_fp_set_sampler_views; + pipe->set_geometry_sampler_views = nv50_gp_set_sampler_views; + + pipe->create_vs_state = nv50_vp_state_create; + pipe->create_fs_state = nv50_fp_state_create; + pipe->create_gs_state = nv50_gp_state_create; + pipe->bind_vs_state = nv50_vp_state_bind; + pipe->bind_fs_state = nv50_fp_state_bind; + pipe->bind_gs_state = nv50_gp_state_bind; + pipe->delete_vs_state = nv50_sp_state_delete; + pipe->delete_fs_state = nv50_sp_state_delete; + pipe->delete_gs_state = nv50_sp_state_delete; + + pipe->set_blend_color = nv50_set_blend_color; + pipe->set_stencil_ref = nv50_set_stencil_ref; + pipe->set_clip_state = nv50_set_clip_state; + pipe->set_sample_mask = nv50_set_sample_mask; + pipe->set_constant_buffer = nv50_set_constant_buffer; + pipe->set_framebuffer_state = nv50_set_framebuffer_state; + pipe->set_polygon_stipple = nv50_set_polygon_stipple; + pipe->set_scissor_states = nv50_set_scissor_states; + pipe->set_viewport_states = nv50_set_viewport_states; + + pipe->create_vertex_elements_state = nv50_vertex_state_create; + pipe->delete_vertex_elements_state = nv50_vertex_state_delete; + pipe->bind_vertex_elements_state = nv50_vertex_state_bind; + + pipe->set_vertex_buffers = nv50_set_vertex_buffers; + pipe->set_index_buffer = nv50_set_index_buffer; + + pipe->create_stream_output_target = nv50_so_target_create; + pipe->stream_output_target_destroy = nv50_so_target_destroy; + pipe->set_stream_output_targets = nv50_set_stream_output_targets; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c new file mode 100644 index 00000000000..866829ca22d --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -0,0 +1,414 @@ + +#include "nv50/nv50_context.h" +#include "os/os_time.h" + +static void +nv50_validate_fb(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct pipe_framebuffer_state *fb = &nv50->framebuffer; + unsigned i; + unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1; + uint32_t array_size = 0xffff, array_mode = 0; + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + + BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); + PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs); + BEGIN_NV04(push, NV50_3D(SCREEN_SCISSOR_HORIZ), 2); + PUSH_DATA (push, fb->width << 16); + PUSH_DATA (push, fb->height << 16); + + for (i = 0; i < fb->nr_cbufs; ++i) { + struct nv50_miptree *mt = nv50_miptree(fb->cbufs[i]->texture); + struct nv50_surface *sf = nv50_surface(fb->cbufs[i]); + struct nouveau_bo *bo = mt->base.bo; + + array_size = MIN2(array_size, sf->depth); + if (mt->layout_3d) + array_mode = NV50_3D_RT_ARRAY_MODE_MODE_3D; /* 1 << 16 */ + + /* can't mix 3D with ARRAY or have RTs of different depth/array_size */ + assert(mt->layout_3d || !array_mode || array_size == 1); + + BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(i)), 5); + PUSH_DATAh(push, bo->offset + sf->offset); + PUSH_DATA (push, bo->offset + sf->offset); + PUSH_DATA (push, nv50_format_table[sf->base.format].rt); + if (likely(nouveau_bo_memtype(bo))) { + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, mt->layer_stride >> 2); + BEGIN_NV04(push, NV50_3D(RT_HORIZ(i)), 2); + PUSH_DATA (push, sf->width); + PUSH_DATA (push, sf->height); + BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1); + PUSH_DATA (push, array_mode | array_size); + } else { + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(RT_HORIZ(i)), 2); + PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | mt->level[0].pitch); + PUSH_DATA (push, sf->height); + BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1); + PUSH_DATA (push, 0); + + assert(!fb->zsbuf); + assert(!mt->ms_mode); + } + + ms_mode = mt->ms_mode; + + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) + nv50->state.rt_serialize = TRUE; + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING; + + /* only register for writing, otherwise we'd always serialize here */ + BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR); + } + + if (fb->zsbuf) { + struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture); + struct nv50_surface *sf = nv50_surface(fb->zsbuf); + struct nouveau_bo *bo = mt->base.bo; + int unk = mt->base.base.target == PIPE_TEXTURE_3D || sf->depth == 1; + + BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5); + PUSH_DATAh(push, bo->offset + sf->offset); + PUSH_DATA (push, bo->offset + sf->offset); + PUSH_DATA (push, nv50_format_table[fb->zsbuf->format].rt); + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, mt->layer_stride >> 2); + BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(ZETA_HORIZ), 3); + PUSH_DATA (push, sf->width); + PUSH_DATA (push, sf->height); + PUSH_DATA (push, (unk << 16) | sf->depth); + + ms_mode = mt->ms_mode; + + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) + nv50->state.rt_serialize = TRUE; + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING; + + BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR); + } else { + BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 0); + } + + BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1); + PUSH_DATA (push, ms_mode); + + BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2); + PUSH_DATA (push, fb->width << 16); + PUSH_DATA (push, fb->height << 16); +} + +static void +nv50_validate_blend_colour(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + BEGIN_NV04(push, NV50_3D(BLEND_COLOR(0)), 4); + PUSH_DATAf(push, nv50->blend_colour.color[0]); + PUSH_DATAf(push, nv50->blend_colour.color[1]); + PUSH_DATAf(push, nv50->blend_colour.color[2]); + PUSH_DATAf(push, nv50->blend_colour.color[3]); +} + +static void +nv50_validate_stencil_ref(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + BEGIN_NV04(push, NV50_3D(STENCIL_FRONT_FUNC_REF), 1); + PUSH_DATA (push, nv50->stencil_ref.ref_value[0]); + BEGIN_NV04(push, NV50_3D(STENCIL_BACK_FUNC_REF), 1); + PUSH_DATA (push, nv50->stencil_ref.ref_value[1]); +} + +static void +nv50_validate_stipple(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned i; + + BEGIN_NV04(push, NV50_3D(POLYGON_STIPPLE_PATTERN(0)), 32); + for (i = 0; i < 32; ++i) + PUSH_DATA(push, util_bswap32(nv50->stipple.stipple[i])); +} + +static void +nv50_validate_scissor(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct pipe_scissor_state *s = &nv50->scissor; +#ifdef NV50_SCISSORS_CLIPPING + struct pipe_viewport_state *vp = &nv50->viewport; + int minx, maxx, miny, maxy; + + if (!(nv50->dirty & + (NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | NV50_NEW_FRAMEBUFFER)) && + nv50->state.scissor == nv50->rast->pipe.scissor) + return; + nv50->state.scissor = nv50->rast->pipe.scissor; + + if (nv50->state.scissor) { + minx = s->minx; + maxx = s->maxx; + miny = s->miny; + maxy = s->maxy; + } else { + minx = 0; + maxx = nv50->framebuffer.width; + miny = 0; + maxy = nv50->framebuffer.height; + } + + minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0]))); + maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0]))); + miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1]))); + maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1]))); + + BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2); + PUSH_DATA (push, (maxx << 16) | minx); + PUSH_DATA (push, (maxy << 16) | miny); +#else + BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2); + PUSH_DATA (push, (s->maxx << 16) | s->minx); + PUSH_DATA (push, (s->maxy << 16) | s->miny); +#endif +} + +static void +nv50_validate_viewport(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + float zmin, zmax; + + BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSLATE_X(0)), 3); + PUSH_DATAf(push, nv50->viewport.translate[0]); + PUSH_DATAf(push, nv50->viewport.translate[1]); + PUSH_DATAf(push, nv50->viewport.translate[2]); + BEGIN_NV04(push, NV50_3D(VIEWPORT_SCALE_X(0)), 3); + PUSH_DATAf(push, nv50->viewport.scale[0]); + PUSH_DATAf(push, nv50->viewport.scale[1]); + PUSH_DATAf(push, nv50->viewport.scale[2]); + + zmin = nv50->viewport.translate[2] - fabsf(nv50->viewport.scale[2]); + zmax = nv50->viewport.translate[2] + fabsf(nv50->viewport.scale[2]); + +#ifdef NV50_SCISSORS_CLIPPING + BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(0)), 2); + PUSH_DATAf(push, zmin); + PUSH_DATAf(push, zmax); +#endif +} + +static INLINE void +nv50_check_program_ucps(struct nv50_context *nv50, + struct nv50_program *vp, uint8_t mask) +{ + const unsigned n = util_logbase2(mask) + 1; + + if (vp->vp.clpd_nr >= n) + return; + nv50_program_destroy(nv50, vp); + + vp->vp.clpd_nr = n; + if (likely(vp == nv50->vertprog)) { + nv50->dirty |= NV50_NEW_VERTPROG; + nv50_vertprog_validate(nv50); + } else { + nv50->dirty |= NV50_NEW_GMTYPROG; + nv50_gmtyprog_validate(nv50); + } + nv50_fp_linkage_validate(nv50); +} + +static void +nv50_validate_clip(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *vp; + uint8_t clip_enable; + + if (nv50->dirty & NV50_NEW_CLIP) { + BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); + PUSH_DATA (push, (0 << 8) | NV50_CB_AUX); + BEGIN_NI04(push, NV50_3D(CB_DATA(0)), PIPE_MAX_CLIP_PLANES * 4); + PUSH_DATAp(push, &nv50->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4); + } + + vp = nv50->gmtyprog; + if (likely(!vp)) + vp = nv50->vertprog; + + clip_enable = nv50->rast->pipe.clip_plane_enable; + + BEGIN_NV04(push, NV50_3D(CLIP_DISTANCE_ENABLE), 1); + PUSH_DATA (push, clip_enable); + + if (clip_enable) + nv50_check_program_ucps(nv50, vp, clip_enable); +} + +static void +nv50_validate_blend(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + PUSH_SPACE(push, nv50->blend->size); + PUSH_DATAp(push, nv50->blend->state, nv50->blend->size); +} + +static void +nv50_validate_zsa(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + PUSH_SPACE(push, nv50->zsa->size); + PUSH_DATAp(push, nv50->zsa->state, nv50->zsa->size); +} + +static void +nv50_validate_rasterizer(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + PUSH_SPACE(push, nv50->rast->size); + PUSH_DATAp(push, nv50->rast->state, nv50->rast->size); +} + +static void +nv50_validate_sample_mask(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + unsigned mask[4] = + { + nv50->sample_mask & 0xffff, + nv50->sample_mask & 0xffff, + nv50->sample_mask & 0xffff, + nv50->sample_mask & 0xffff + }; + + BEGIN_NV04(push, NV50_3D(MSAA_MASK(0)), 4); + PUSH_DATA (push, mask[0]); + PUSH_DATA (push, mask[1]); + PUSH_DATA (push, mask[2]); + PUSH_DATA (push, mask[3]); +} + +static void +nv50_switch_pipe_context(struct nv50_context *ctx_to) +{ + struct nv50_context *ctx_from = ctx_to->screen->cur_ctx; + + if (ctx_from) + ctx_to->state = ctx_from->state; + + ctx_to->dirty = ~0; + + if (!ctx_to->vertex) + ctx_to->dirty &= ~(NV50_NEW_VERTEX | NV50_NEW_ARRAYS); + + if (!ctx_to->vertprog) + ctx_to->dirty &= ~NV50_NEW_VERTPROG; + if (!ctx_to->fragprog) + ctx_to->dirty &= ~NV50_NEW_FRAGPROG; + + if (!ctx_to->blend) + ctx_to->dirty &= ~NV50_NEW_BLEND; + if (!ctx_to->rast) +#ifdef NV50_SCISSORS_CLIPPING + ctx_to->dirty &= ~(NV50_NEW_RASTERIZER | NV50_NEW_SCISSOR); +#else + ctx_to->dirty &= ~NV50_NEW_RASTERIZER; +#endif + if (!ctx_to->zsa) + ctx_to->dirty &= ~NV50_NEW_ZSA; + + ctx_to->screen->cur_ctx = ctx_to; +} + +static struct state_validate { + void (*func)(struct nv50_context *); + uint32_t states; +} validate_list[] = { + { nv50_validate_fb, NV50_NEW_FRAMEBUFFER }, + { nv50_validate_blend, NV50_NEW_BLEND }, + { nv50_validate_zsa, NV50_NEW_ZSA }, + { nv50_validate_sample_mask, NV50_NEW_SAMPLE_MASK }, + { nv50_validate_rasterizer, NV50_NEW_RASTERIZER }, + { nv50_validate_blend_colour, NV50_NEW_BLEND_COLOUR }, + { nv50_validate_stencil_ref, NV50_NEW_STENCIL_REF }, + { nv50_validate_stipple, NV50_NEW_STIPPLE }, +#ifdef NV50_SCISSORS_CLIPPING + { nv50_validate_scissor, NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | + NV50_NEW_RASTERIZER | + NV50_NEW_FRAMEBUFFER }, +#else + { nv50_validate_scissor, NV50_NEW_SCISSOR }, +#endif + { nv50_validate_viewport, NV50_NEW_VIEWPORT }, + { nv50_vertprog_validate, NV50_NEW_VERTPROG }, + { nv50_gmtyprog_validate, NV50_NEW_GMTYPROG }, + { nv50_fragprog_validate, NV50_NEW_FRAGPROG }, + { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | + NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER }, + { nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG }, + { nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, + { nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, + { nv50_constbufs_validate, NV50_NEW_CONSTBUF }, + { nv50_validate_textures, NV50_NEW_TEXTURES }, + { nv50_validate_samplers, NV50_NEW_SAMPLERS }, + { nv50_stream_output_validate, NV50_NEW_STRMOUT | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, + { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS } +}; +#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) + +boolean +nv50_state_validate(struct nv50_context *nv50, uint32_t mask, unsigned words) +{ + uint32_t state_mask; + int ret; + unsigned i; + + if (nv50->screen->cur_ctx != nv50) + nv50_switch_pipe_context(nv50); + + state_mask = nv50->dirty & mask; + + if (state_mask) { + for (i = 0; i < validate_list_len; ++i) { + struct state_validate *validate = &validate_list[i]; + + if (state_mask & validate->states) + validate->func(nv50); + } + nv50->dirty &= ~state_mask; + + if (nv50->state.rt_serialize) { + nv50->state.rt_serialize = FALSE; + BEGIN_NV04(nv50->base.pushbuf, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (nv50->base.pushbuf, 0); + } + + nv50_bufctx_fence(nv50->bufctx_3d, FALSE); + } + nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_3d); + ret = nouveau_pushbuf_validate(nv50->base.pushbuf); + + if (unlikely(nv50->state.flushed)) { + nv50->state.flushed = FALSE; + nv50_bufctx_fence(nv50->bufctx_3d, TRUE); + } + return !ret; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h new file mode 100644 index 00000000000..238951733cf --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h @@ -0,0 +1,78 @@ + +#ifndef __NV50_STATEOBJ_H__ +#define __NV50_STATEOBJ_H__ + +#include "pipe/p_state.h" + +#define NV50_SCISSORS_CLIPPING + +#define SB_BEGIN_3D(so, m, s) \ + (so)->state[(so)->size++] = NV50_FIFO_PKHDR(NV50_3D(m), s) + +#define SB_BEGIN_3D_(so, m, s) \ + (so)->state[(so)->size++] = NV50_FIFO_PKHDR(SUBC_3D(m), s) + +#define SB_DATA(so, u) (so)->state[(so)->size++] = (u) + +#include "nv50/nv50_stateobj_tex.h" + +struct nv50_blend_stateobj { + struct pipe_blend_state pipe; + int size; + uint32_t state[84]; // TODO: allocate less if !independent_blend_enable +}; + +struct nv50_rasterizer_stateobj { + struct pipe_rasterizer_state pipe; + int size; + uint32_t state[48]; +}; + +struct nv50_zsa_stateobj { + struct pipe_depth_stencil_alpha_state pipe; + int size; + uint32_t state[29]; +}; + +struct nv50_constbuf { + union { + struct pipe_resource *buf; + const uint8_t *data; + } u; + uint32_t size; /* max 65536 */ + uint32_t offset; + boolean user; /* should only be TRUE if u.data is valid and non-NULL */ +}; + +struct nv50_vertex_element { + struct pipe_vertex_element pipe; + uint32_t state; +}; + +struct nv50_vertex_stateobj { + uint32_t min_instance_div[PIPE_MAX_ATTRIBS]; + uint16_t vb_access_size[PIPE_MAX_ATTRIBS]; + struct translate *translate; + unsigned num_elements; + uint32_t instance_elts; + uint32_t instance_bufs; + boolean need_conversion; + unsigned vertex_size; + unsigned packet_vertex_limit; + struct nv50_vertex_element element[0]; +}; + +struct nv50_so_target { + struct pipe_stream_output_target pipe; + struct pipe_query *pq; + unsigned stride; + boolean clean; +}; + +static INLINE struct nv50_so_target * +nv50_so_target(struct pipe_stream_output_target *ptarg) +{ + return (struct nv50_so_target *)ptarg; +} + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h new file mode 100644 index 00000000000..99548cbdb42 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h @@ -0,0 +1,34 @@ + +#ifndef __NV50_STATEOBJ_TEX_H__ +#define __NV50_STATEOBJ_TEX_H__ + +#include "pipe/p_state.h" + +struct nv50_tsc_entry { + int id; + uint32_t tsc[8]; +}; + +static INLINE struct nv50_tsc_entry * +nv50_tsc_entry(void *hwcso) +{ + return (struct nv50_tsc_entry *)hwcso; +} + +struct nv50_tic_entry { + struct pipe_sampler_view pipe; + int id; + uint32_t tic[8]; +}; + +static INLINE struct nv50_tic_entry * +nv50_tic_entry(struct pipe_sampler_view *view) +{ + return (struct nv50_tic_entry *)view; +} + +extern void * +nv50_sampler_state_create(struct pipe_context *, + const struct pipe_sampler_state *); + +#endif /* __NV50_STATEOBJ_TEX_H__ */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c new file mode 100644 index 00000000000..dcc1fce41c5 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -0,0 +1,1353 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <stdint.h> + +#include "pipe/p_defines.h" + +#include "util/u_inlines.h" +#include "util/u_pack_color.h" +#include "util/u_format.h" +#include "util/u_surface.h" + +#include "tgsi/tgsi_ureg.h" + +#include "os/os_thread.h" + +#include "nv50/nv50_context.h" +#include "nv50/nv50_resource.h" + +#include "nv50/nv50_defs.xml.h" +#include "nv50/nv50_texture.xml.h" + +/* these are used in nv50_blit.h */ +#define NV50_ENG2D_SUPPORTED_FORMATS 0xff0843e080608409ULL +#define NV50_ENG2D_NOCONVERT_FORMATS 0x0008402000000000ULL +#define NV50_ENG2D_LUMINANCE_FORMATS 0x0008402000000000ULL +#define NV50_ENG2D_INTENSITY_FORMATS 0x0000000000000000ULL +#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000608000ULL + +#define NOUVEAU_DRIVER 0x50 +#include "nv50/nv50_blit.h" + +static INLINE uint8_t +nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal) +{ + uint8_t id = nv50_format_table[format].rt; + + /* Hardware values for color formats range from 0xc0 to 0xff, + * but the 2D engine doesn't support all of them. + */ + if ((id >= 0xc0) && (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)))) + return id; + assert(dst_src_equal); + + switch (util_format_get_blocksize(format)) { + case 1: + return NV50_SURFACE_FORMAT_R8_UNORM; + case 2: + return NV50_SURFACE_FORMAT_R16_UNORM; + case 4: + return NV50_SURFACE_FORMAT_BGRA8_UNORM; + default: + return 0; + } +} + +static int +nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst, + struct nv50_miptree *mt, unsigned level, unsigned layer, + enum pipe_format pformat, boolean dst_src_pformat_equal) +{ + struct nouveau_bo *bo = mt->base.bo; + uint32_t width, height, depth; + uint32_t format; + uint32_t mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; + uint32_t offset = mt->level[level].offset; + + format = nv50_2d_format(pformat, dst, dst_src_pformat_equal); + if (!format) { + NOUVEAU_ERR("invalid/unsupported surface format: %s\n", + util_format_name(pformat)); + return 1; + } + + width = u_minify(mt->base.base.width0, level) << mt->ms_x; + height = u_minify(mt->base.base.height0, level) << mt->ms_y; + depth = u_minify(mt->base.base.depth0, level); + + offset = mt->level[level].offset; + if (!mt->layout_3d) { + offset += mt->layer_stride * layer; + depth = 1; + layer = 0; + } else + if (!dst) { + offset += nv50_mt_zslice_offset(mt, level, layer); + layer = 0; + } + + if (!nouveau_bo_memtype(bo)) { + BEGIN_NV04(push, SUBC_2D(mthd), 2); + PUSH_DATA (push, format); + PUSH_DATA (push, 1); + BEGIN_NV04(push, SUBC_2D(mthd + 0x14), 5); + PUSH_DATA (push, mt->level[level].pitch); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + PUSH_DATAh(push, bo->offset + offset); + PUSH_DATA (push, bo->offset + offset); + } else { + BEGIN_NV04(push, SUBC_2D(mthd), 5); + PUSH_DATA (push, format); + PUSH_DATA (push, 0); + PUSH_DATA (push, mt->level[level].tile_mode); + PUSH_DATA (push, depth); + PUSH_DATA (push, layer); + BEGIN_NV04(push, SUBC_2D(mthd + 0x18), 4); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + PUSH_DATAh(push, bo->offset + offset); + PUSH_DATA (push, bo->offset + offset); + } + +#if 0 + if (dst) { + BEGIN_NV04(push, SUBC_2D(NV50_2D_CLIP_X), 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + } +#endif + return 0; +} + +static int +nv50_2d_texture_do_copy(struct nouveau_pushbuf *push, + struct nv50_miptree *dst, unsigned dst_level, + unsigned dx, unsigned dy, unsigned dz, + struct nv50_miptree *src, unsigned src_level, + unsigned sx, unsigned sy, unsigned sz, + unsigned w, unsigned h) +{ + const enum pipe_format dfmt = dst->base.base.format; + const enum pipe_format sfmt = src->base.base.format; + int ret; + boolean eqfmt = dfmt == sfmt; + + if (!PUSH_SPACE(push, 2 * 16 + 32)) + return PIPE_ERROR; + + ret = nv50_2d_texture_set(push, 1, dst, dst_level, dz, dfmt, eqfmt); + if (ret) + return ret; + + ret = nv50_2d_texture_set(push, 0, src, src_level, sz, sfmt, eqfmt); + if (ret) + return ret; + + BEGIN_NV04(push, NV50_2D(BLIT_CONTROL), 1); + PUSH_DATA (push, NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE); + BEGIN_NV04(push, NV50_2D(BLIT_DST_X), 4); + PUSH_DATA (push, dx << dst->ms_x); + PUSH_DATA (push, dy << dst->ms_y); + PUSH_DATA (push, w << dst->ms_x); + PUSH_DATA (push, h << dst->ms_y); + BEGIN_NV04(push, NV50_2D(BLIT_DU_DX_FRACT), 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_2D(BLIT_SRC_X_FRACT), 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, sx << src->ms_x); + PUSH_DATA (push, 0); + PUSH_DATA (push, sy << src->ms_y); + + return 0; +} + +static void +nv50_resource_copy_region(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) +{ + struct nv50_context *nv50 = nv50_context(pipe); + int ret; + boolean m2mf; + unsigned dst_layer = dstz, src_layer = src_box->z; + + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + nouveau_copy_buffer(&nv50->base, + nv04_resource(dst), dstx, + nv04_resource(src), src_box->x, src_box->width); + return; + } + + /* 0 and 1 are equal, only supporting 0/1, 2, 4 and 8 */ + assert((src->nr_samples | 1) == (dst->nr_samples | 1)); + + m2mf = (src->format == dst->format) || + (util_format_get_blocksizebits(src->format) == + util_format_get_blocksizebits(dst->format)); + + nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + + if (m2mf) { + struct nv50_m2mf_rect drect, srect; + unsigned i; + unsigned nx = util_format_get_nblocksx(src->format, src_box->width); + unsigned ny = util_format_get_nblocksy(src->format, src_box->height); + + nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz); + nv50_m2mf_rect_setup(&srect, src, src_level, + src_box->x, src_box->y, src_box->z); + + for (i = 0; i < src_box->depth; ++i) { + nv50_m2mf_transfer_rect(nv50, &drect, &srect, nx, ny); + + if (nv50_miptree(dst)->layout_3d) + drect.z++; + else + drect.base += nv50_miptree(dst)->layer_stride; + + if (nv50_miptree(src)->layout_3d) + srect.z++; + else + srect.base += nv50_miptree(src)->layer_stride; + } + return; + } + + assert((src->format == dst->format) || + (nv50_2d_src_format_faithful(src->format) && + nv50_2d_dst_format_faithful(dst->format))); + + BCTX_REFN(nv50->bufctx, 2D, nv04_resource(src), RD); + BCTX_REFN(nv50->bufctx, 2D, nv04_resource(dst), WR); + nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx); + nouveau_pushbuf_validate(nv50->base.pushbuf); + + for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) { + ret = nv50_2d_texture_do_copy(nv50->base.pushbuf, + nv50_miptree(dst), dst_level, + dstx, dsty, dst_layer, + nv50_miptree(src), src_level, + src_box->x, src_box->y, src_layer, + src_box->width, src_box->height); + if (ret) + break; + } + nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D); +} + +static void +nv50_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nv50_surface *sf = nv50_surface(dst); + struct nouveau_bo *bo = mt->base.bo; + unsigned z; + + BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4); + PUSH_DATAf(push, color->f[0]); + PUSH_DATAf(push, color->f[1]); + PUSH_DATAf(push, color->f[2]); + PUSH_DATAf(push, color->f[3]); + + if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0)) + return; + + PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR); + + BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5); + PUSH_DATAh(push, bo->offset + sf->offset); + PUSH_DATA (push, bo->offset + sf->offset); + PUSH_DATA (push, nv50_format_table[dst->format].rt); + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2); + if (nouveau_bo_memtype(bo)) + PUSH_DATA(push, sf->width); + else + PUSH_DATA(push, NV50_3D_RT_HORIZ_LINEAR | mt->level[0].pitch); + PUSH_DATA (push, sf->height); + BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1); + PUSH_DATA (push, 1); + + if (!nouveau_bo_memtype(bo)) { + BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 0); + } + + /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ + + BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2); + PUSH_DATA (push, (width << 16) | dstx); + PUSH_DATA (push, (height << 16) | dsty); + + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth); + for (z = 0; z < sf->depth; ++z) { + PUSH_DATA (push, 0x3c | + (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT)); + } + + nv50->dirty |= NV50_NEW_FRAMEBUFFER; +} + +static void +nv50_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nv50_surface *sf = nv50_surface(dst); + struct nouveau_bo *bo = mt->base.bo; + uint32_t mode = 0; + unsigned z; + + assert(nouveau_bo_memtype(bo)); /* ZETA cannot be linear */ + + if (clear_flags & PIPE_CLEAR_DEPTH) { + BEGIN_NV04(push, NV50_3D(CLEAR_DEPTH), 1); + PUSH_DATAf(push, depth); + mode |= NV50_3D_CLEAR_BUFFERS_Z; + } + + if (clear_flags & PIPE_CLEAR_STENCIL) { + BEGIN_NV04(push, NV50_3D(CLEAR_STENCIL), 1); + PUSH_DATA (push, stencil & 0xff); + mode |= NV50_3D_CLEAR_BUFFERS_S; + } + + if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0)) + return; + + PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR); + + BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5); + PUSH_DATAh(push, bo->offset + sf->offset); + PUSH_DATA (push, bo->offset + sf->offset); + PUSH_DATA (push, nv50_format_table[dst->format].rt); + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(ZETA_HORIZ), 3); + PUSH_DATA (push, sf->width); + PUSH_DATA (push, sf->height); + PUSH_DATA (push, (1 << 16) | 1); + + BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2); + PUSH_DATA (push, (width << 16) | dstx); + PUSH_DATA (push, (height << 16) | dsty); + + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth); + for (z = 0; z < sf->depth; ++z) { + PUSH_DATA (push, mode | + (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT)); + } + + nv50->dirty |= NV50_NEW_FRAMEBUFFER; +} + +void +nv50_clear(struct pipe_context *pipe, unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct pipe_framebuffer_state *fb = &nv50->framebuffer; + unsigned i; + uint32_t mode = 0; + + /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ + if (!nv50_state_validate(nv50, NV50_NEW_FRAMEBUFFER, 9 + (fb->nr_cbufs * 2))) + return; + + if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { + BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4); + PUSH_DATAf(push, color->f[0]); + PUSH_DATAf(push, color->f[1]); + PUSH_DATAf(push, color->f[2]); + PUSH_DATAf(push, color->f[3]); + mode = + NV50_3D_CLEAR_BUFFERS_R | NV50_3D_CLEAR_BUFFERS_G | + NV50_3D_CLEAR_BUFFERS_B | NV50_3D_CLEAR_BUFFERS_A; + } + + if (buffers & PIPE_CLEAR_DEPTH) { + BEGIN_NV04(push, NV50_3D(CLEAR_DEPTH), 1); + PUSH_DATA (push, fui(depth)); + mode |= NV50_3D_CLEAR_BUFFERS_Z; + } + + if (buffers & PIPE_CLEAR_STENCIL) { + BEGIN_NV04(push, NV50_3D(CLEAR_STENCIL), 1); + PUSH_DATA (push, stencil & 0xff); + mode |= NV50_3D_CLEAR_BUFFERS_S; + } + + BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1); + PUSH_DATA (push, mode); + + for (i = 1; i < fb->nr_cbufs; i++) { + BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1); + PUSH_DATA (push, (i << 6) | 0x3c); + } +} + + +/* =============================== BLIT CODE =================================== + */ + +struct nv50_blitter +{ + struct nv50_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES]; + struct nv50_program vp; + + struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */ + + pipe_mutex mutex; +}; + +struct nv50_blitctx +{ + struct nv50_context *nv50; + struct nv50_program *fp; + uint8_t mode; + uint16_t color_mask; + uint8_t filter; + enum pipe_texture_target target; + struct { + struct pipe_framebuffer_state fb; + struct nv50_rasterizer_stateobj *rast; + struct nv50_program *vp; + struct nv50_program *gp; + struct nv50_program *fp; + unsigned num_textures[3]; + unsigned num_samplers[3]; + struct pipe_sampler_view *texture[2]; + struct nv50_tsc_entry *sampler[2]; + uint32_t dirty; + } saved; + struct nv50_rasterizer_stateobj rast; +}; + +static void +nv50_blitter_make_vp(struct nv50_blitter *blit) +{ + static const uint32_t code[] = + { + 0x10000001, 0x0423c788, /* mov b32 o[0x00] s[0x00] */ /* HPOS.x */ + 0x10000205, 0x0423c788, /* mov b32 o[0x04] s[0x04] */ /* HPOS.y */ + 0x10000409, 0x0423c788, /* mov b32 o[0x08] s[0x08] */ /* TEXC.x */ + 0x1000060d, 0x0423c788, /* mov b32 o[0x0c] s[0x0c] */ /* TEXC.y */ + 0x10000811, 0x0423c789, /* mov b32 o[0x10] s[0x10] */ /* TEXC.z */ + }; + + blit->vp.type = PIPE_SHADER_VERTEX; + blit->vp.translated = TRUE; + blit->vp.code = (uint32_t *)code; /* const_cast */ + blit->vp.code_size = sizeof(code); + blit->vp.max_gpr = 4; + blit->vp.max_out = 5; + blit->vp.out_nr = 2; + blit->vp.out[0].mask = 0x3; + blit->vp.out[0].sn = TGSI_SEMANTIC_POSITION; + blit->vp.out[1].hw = 2; + blit->vp.out[1].mask = 0x7; + blit->vp.out[1].sn = TGSI_SEMANTIC_GENERIC; + blit->vp.out[1].si = 0; + blit->vp.vp.attrs[0] = 0x73; + blit->vp.vp.psiz = 0x40; + blit->vp.vp.edgeflag = 0x40; +} + +void * +nv50_blitter_make_fp(struct pipe_context *pipe, + unsigned mode, + enum pipe_texture_target ptarg) +{ + struct ureg_program *ureg; + struct ureg_src tc; + struct ureg_dst out; + struct ureg_dst data; + + const unsigned target = nv50_blit_get_tgsi_texture_target(ptarg); + + boolean tex_rgbaz = FALSE; + boolean tex_s = FALSE; + boolean cvt_un8 = FALSE; + + if (mode != NV50_BLIT_MODE_PASS && + mode != NV50_BLIT_MODE_Z24X8 && + mode != NV50_BLIT_MODE_X8Z24) + tex_s = TRUE; + + if (mode != NV50_BLIT_MODE_X24S8 && + mode != NV50_BLIT_MODE_S8X24 && + mode != NV50_BLIT_MODE_XS) + tex_rgbaz = TRUE; + + if (mode != NV50_BLIT_MODE_PASS && + mode != NV50_BLIT_MODE_ZS && + mode != NV50_BLIT_MODE_XS) + cvt_un8 = TRUE; + + ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!ureg) + return NULL; + + out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); + tc = ureg_DECL_fs_input( + ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_LINEAR); + + data = ureg_DECL_temporary(ureg); + + if (tex_s) { + ureg_TEX(ureg, ureg_writemask(data, TGSI_WRITEMASK_X), + target, tc, ureg_DECL_sampler(ureg, 1)); + ureg_MOV(ureg, ureg_writemask(data, TGSI_WRITEMASK_Y), + ureg_scalar(ureg_src(data), TGSI_SWIZZLE_X)); + } + if (tex_rgbaz) { + const unsigned mask = (mode == NV50_BLIT_MODE_PASS) ? + TGSI_WRITEMASK_XYZW : TGSI_WRITEMASK_X; + ureg_TEX(ureg, ureg_writemask(data, mask), + target, tc, ureg_DECL_sampler(ureg, 0)); + } + + if (cvt_un8) { + struct ureg_src mask; + struct ureg_src scale; + struct ureg_dst outz; + struct ureg_dst outs; + struct ureg_dst zdst3 = ureg_writemask(data, TGSI_WRITEMASK_XYZ); + struct ureg_dst zdst = ureg_writemask(data, TGSI_WRITEMASK_X); + struct ureg_dst sdst = ureg_writemask(data, TGSI_WRITEMASK_Y); + struct ureg_src zsrc3 = ureg_src(data); + struct ureg_src zsrc = ureg_scalar(zsrc3, TGSI_SWIZZLE_X); + struct ureg_src ssrc = ureg_scalar(zsrc3, TGSI_SWIZZLE_Y); + struct ureg_src zshuf; + + mask = ureg_imm3u(ureg, 0x0000ff, 0x00ff00, 0xff0000); + scale = ureg_imm4f(ureg, + 1.0f / 0x0000ff, 1.0f / 0x00ff00, 1.0f / 0xff0000, + (1 << 24) - 1); + + if (mode == NV50_BLIT_MODE_Z24S8 || + mode == NV50_BLIT_MODE_X24S8 || + mode == NV50_BLIT_MODE_Z24X8) { + outz = ureg_writemask(out, TGSI_WRITEMASK_XYZ); + outs = ureg_writemask(out, TGSI_WRITEMASK_W); + zshuf = ureg_src(data); + } else { + outz = ureg_writemask(out, TGSI_WRITEMASK_YZW); + outs = ureg_writemask(out, TGSI_WRITEMASK_X); + zshuf = ureg_swizzle(zsrc3, TGSI_SWIZZLE_W, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z); + } + + if (tex_s) { + ureg_I2F(ureg, sdst, ssrc); + ureg_MUL(ureg, outs, ssrc, ureg_scalar(scale, TGSI_SWIZZLE_X)); + } + + if (tex_rgbaz) { + ureg_MUL(ureg, zdst, zsrc, ureg_scalar(scale, TGSI_SWIZZLE_W)); + ureg_F2I(ureg, zdst, zsrc); + ureg_AND(ureg, zdst3, zsrc, mask); + ureg_I2F(ureg, zdst3, zsrc3); + ureg_MUL(ureg, zdst3, zsrc3, scale); + ureg_MOV(ureg, outz, zshuf); + } + } else { + unsigned mask = TGSI_WRITEMASK_XYZW; + + if (mode != NV50_BLIT_MODE_PASS) { + mask &= ~TGSI_WRITEMASK_ZW; + if (!tex_s) + mask = TGSI_WRITEMASK_X; + if (!tex_rgbaz) + mask = TGSI_WRITEMASK_Y; + } + ureg_MOV(ureg, ureg_writemask(out, mask), ureg_src(data)); + } + ureg_END(ureg); + + return ureg_create_shader_and_destroy(ureg, pipe); +} + +static void +nv50_blitter_make_sampler(struct nv50_blitter *blit) +{ + /* clamp to edge, min/max lod = 0, nearest filtering */ + + blit->sampler[0].id = -1; + + blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED | + (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) | + (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) | + (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT); + blit->sampler[0].tsc[1] = + NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE; + + /* clamp to edge, min/max lod = 0, bilinear filtering */ + + blit->sampler[1].id = -1; + + blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0]; + blit->sampler[1].tsc[1] = + NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE; +} + +unsigned +nv50_blit_select_mode(const struct pipe_blit_info *info) +{ + const unsigned mask = info->mask; + + switch (info->dst.resource->format) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_Z24X8_UNORM: + switch (mask & PIPE_MASK_ZS) { + case PIPE_MASK_ZS: return NV50_BLIT_MODE_Z24S8; + case PIPE_MASK_Z: return NV50_BLIT_MODE_Z24X8; + default: + return NV50_BLIT_MODE_X24S8; + } + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + switch (mask & PIPE_MASK_ZS) { + case PIPE_MASK_ZS: return NV50_BLIT_MODE_S8Z24; + case PIPE_MASK_Z: return NV50_BLIT_MODE_X8Z24; + default: + return NV50_BLIT_MODE_S8X24; + } + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + switch (mask & PIPE_MASK_ZS) { + case PIPE_MASK_ZS: return NV50_BLIT_MODE_ZS; + case PIPE_MASK_Z: return NV50_BLIT_MODE_PASS; + default: + return NV50_BLIT_MODE_XS; + } + default: + return NV50_BLIT_MODE_PASS; + } +} + +static void +nv50_blit_select_fp(struct nv50_blitctx *ctx, const struct pipe_blit_info *info) +{ + struct nv50_blitter *blitter = ctx->nv50->screen->blitter; + + const enum pipe_texture_target ptarg = + nv50_blit_reinterpret_pipe_texture_target(info->src.resource->target); + + const unsigned targ = nv50_blit_texture_type(ptarg); + const unsigned mode = ctx->mode; + + if (!blitter->fp[targ][mode]) { + pipe_mutex_lock(blitter->mutex); + if (!blitter->fp[targ][mode]) + blitter->fp[targ][mode] = + nv50_blitter_make_fp(&ctx->nv50->base.pipe, mode, ptarg); + pipe_mutex_unlock(blitter->mutex); + } + ctx->fp = blitter->fp[targ][mode]; +} + +static void +nv50_blit_set_dst(struct nv50_blitctx *ctx, + struct pipe_resource *res, unsigned level, unsigned layer, + enum pipe_format format) +{ + struct nv50_context *nv50 = ctx->nv50; + struct pipe_context *pipe = &nv50->base.pipe; + struct pipe_surface templ; + + if (util_format_is_depth_or_stencil(format)) + templ.format = nv50_blit_zeta_to_colour_format(format); + else + templ.format = format; + + templ.u.tex.level = level; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + + if (layer == -1) { + templ.u.tex.first_layer = 0; + templ.u.tex.last_layer = + (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1; + } + + nv50->framebuffer.cbufs[0] = nv50_miptree_surface_new(pipe, res, &templ); + nv50->framebuffer.nr_cbufs = 1; + nv50->framebuffer.zsbuf = NULL; + nv50->framebuffer.width = nv50->framebuffer.cbufs[0]->width; + nv50->framebuffer.height = nv50->framebuffer.cbufs[0]->height; +} + +static void +nv50_blit_set_src(struct nv50_blitctx *blit, + struct pipe_resource *res, unsigned level, unsigned layer, + enum pipe_format format, const uint8_t filter) +{ + struct nv50_context *nv50 = blit->nv50; + struct pipe_context *pipe = &nv50->base.pipe; + struct pipe_sampler_view templ; + uint32_t flags; + enum pipe_texture_target target; + + target = nv50_blit_reinterpret_pipe_texture_target(res->target); + + templ.format = format; + templ.u.tex.first_level = templ.u.tex.last_level = level; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + templ.swizzle_r = PIPE_SWIZZLE_RED; + templ.swizzle_g = PIPE_SWIZZLE_GREEN; + templ.swizzle_b = PIPE_SWIZZLE_BLUE; + templ.swizzle_a = PIPE_SWIZZLE_ALPHA; + + if (layer == -1) { + templ.u.tex.first_layer = 0; + templ.u.tex.last_layer = + (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1; + } + + flags = res->last_level ? 0 : NV50_TEXVIEW_SCALED_COORDS; + flags |= NV50_TEXVIEW_ACCESS_RESOLVE; + if (filter && res->nr_samples == 8) + flags |= NV50_TEXVIEW_FILTER_MSAA8; + + nv50->textures[2][0] = nv50_create_texture_view( + pipe, res, &templ, flags, target); + nv50->textures[2][1] = NULL; + + nv50->num_textures[0] = nv50->num_textures[1] = 0; + nv50->num_textures[2] = 1; + + templ.format = nv50_zs_to_s_format(format); + if (templ.format != res->format) { + nv50->textures[2][1] = nv50_create_texture_view( + pipe, res, &templ, flags, target); + nv50->num_textures[2] = 2; + } +} + +static void +nv50_blitctx_prepare_state(struct nv50_blitctx *blit) +{ + struct nouveau_pushbuf *push = blit->nv50->base.pushbuf; + + if (blit->nv50->cond_query) { + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + } + + /* blend state */ + BEGIN_NV04(push, NV50_3D(COLOR_MASK(0)), 1); + PUSH_DATA (push, blit->color_mask); + BEGIN_NV04(push, NV50_3D(BLEND_ENABLE(0)), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(LOGIC_OP_ENABLE), 1); + PUSH_DATA (push, 0); + + /* rasterizer state */ +#ifndef NV50_SCISSORS_CLIPPING + BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 1); + PUSH_DATA (push, 1); +#endif + BEGIN_NV04(push, NV50_3D(VERTEX_TWO_SIDE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(MSAA_MASK(0)), 4); + PUSH_DATA (push, 0xffff); + PUSH_DATA (push, 0xffff); + PUSH_DATA (push, 0xffff); + PUSH_DATA (push, 0xffff); + BEGIN_NV04(push, NV50_3D(POLYGON_MODE_FRONT), 3); + PUSH_DATA (push, NV50_3D_POLYGON_MODE_FRONT_FILL); + PUSH_DATA (push, NV50_3D_POLYGON_MODE_BACK_FILL); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(CULL_FACE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(POLYGON_STIPPLE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(POLYGON_OFFSET_FILL_ENABLE), 1); + PUSH_DATA (push, 0); + + /* zsa state */ + BEGIN_NV04(push, NV50_3D(DEPTH_TEST_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(STENCIL_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(ALPHA_TEST_ENABLE), 1); + PUSH_DATA (push, 0); +} + +static void +nv50_blitctx_pre_blit(struct nv50_blitctx *ctx) +{ + struct nv50_context *nv50 = ctx->nv50; + struct nv50_blitter *blitter = nv50->screen->blitter; + int s; + + ctx->saved.fb.width = nv50->framebuffer.width; + ctx->saved.fb.height = nv50->framebuffer.height; + ctx->saved.fb.nr_cbufs = nv50->framebuffer.nr_cbufs; + ctx->saved.fb.cbufs[0] = nv50->framebuffer.cbufs[0]; + ctx->saved.fb.zsbuf = nv50->framebuffer.zsbuf; + + ctx->saved.rast = nv50->rast; + + ctx->saved.vp = nv50->vertprog; + ctx->saved.gp = nv50->gmtyprog; + ctx->saved.fp = nv50->fragprog; + + nv50->rast = &ctx->rast; + + nv50->vertprog = &blitter->vp; + nv50->gmtyprog = NULL; + nv50->fragprog = ctx->fp; + + for (s = 0; s < 3; ++s) { + ctx->saved.num_textures[s] = nv50->num_textures[s]; + ctx->saved.num_samplers[s] = nv50->num_samplers[s]; + } + ctx->saved.texture[0] = nv50->textures[2][0]; + ctx->saved.texture[1] = nv50->textures[2][1]; + ctx->saved.sampler[0] = nv50->samplers[2][0]; + ctx->saved.sampler[1] = nv50->samplers[2][1]; + + nv50->samplers[2][0] = &blitter->sampler[ctx->filter]; + nv50->samplers[2][1] = &blitter->sampler[ctx->filter]; + + nv50->num_samplers[0] = nv50->num_samplers[1] = 0; + nv50->num_samplers[2] = 2; + + ctx->saved.dirty = nv50->dirty; + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + + nv50->dirty = + NV50_NEW_FRAMEBUFFER | + NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG | + NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS; +} + +static void +nv50_blitctx_post_blit(struct nv50_blitctx *blit) +{ + struct nv50_context *nv50 = blit->nv50; + int s; + + pipe_surface_reference(&nv50->framebuffer.cbufs[0], NULL); + + nv50->framebuffer.width = blit->saved.fb.width; + nv50->framebuffer.height = blit->saved.fb.height; + nv50->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs; + nv50->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0]; + nv50->framebuffer.zsbuf = blit->saved.fb.zsbuf; + + nv50->rast = blit->saved.rast; + + nv50->vertprog = blit->saved.vp; + nv50->gmtyprog = blit->saved.gp; + nv50->fragprog = blit->saved.fp; + + pipe_sampler_view_reference(&nv50->textures[2][0], NULL); + pipe_sampler_view_reference(&nv50->textures[2][1], NULL); + + for (s = 0; s < 3; ++s) { + nv50->num_textures[s] = blit->saved.num_textures[s]; + nv50->num_samplers[s] = blit->saved.num_samplers[s]; + } + nv50->textures[2][0] = blit->saved.texture[0]; + nv50->textures[2][1] = blit->saved.texture[1]; + nv50->samplers[2][0] = blit->saved.sampler[0]; + nv50->samplers[2][1] = blit->saved.sampler[1]; + + if (nv50->cond_query) + nv50->base.pipe.render_condition(&nv50->base.pipe, nv50->cond_query, + nv50->cond_cond, nv50->cond_mode); + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + + nv50->dirty = blit->saved.dirty | + (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK | + NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND | + NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG); +} + + +static void +nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info) +{ + struct nv50_blitctx *blit = nv50->blit; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct pipe_resource *src = info->src.resource; + struct pipe_resource *dst = info->dst.resource; + int32_t minx, maxx, miny, maxy; + int32_t i; + float x0, x1, y0, y1, z; + float dz; + float x_range, y_range; + + blit->mode = nv50_blit_select_mode(info); + blit->color_mask = nv50_blit_derive_color_mask(info); + blit->filter = nv50_blit_get_filter(info); + + nv50_blit_select_fp(blit, info); + nv50_blitctx_pre_blit(blit); + + nv50_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format); + nv50_blit_set_src(blit, src, info->src.level, -1, info->src.format, + blit->filter); + + nv50_blitctx_prepare_state(blit); + + nv50_state_validate(nv50, ~0, 36); + + x_range = (float)info->src.box.width / (float)info->dst.box.width; + y_range = (float)info->src.box.height / (float)info->dst.box.height; + + x0 = (float)info->src.box.x - x_range * (float)info->dst.box.x; + y0 = (float)info->src.box.y - y_range * (float)info->dst.box.y; + + x1 = x0 + 16384.0f * x_range; + y1 = y0 + 16384.0f * y_range; + + x0 *= (float)(1 << nv50_miptree(src)->ms_x); + x1 *= (float)(1 << nv50_miptree(src)->ms_x); + y0 *= (float)(1 << nv50_miptree(src)->ms_y); + y1 *= (float)(1 << nv50_miptree(src)->ms_y); + + if (src->last_level > 0) { + /* If there are mip maps, GPU always assumes normalized coordinates. */ + const unsigned l = info->src.level; + const float fh = u_minify(src->width0 << nv50_miptree(src)->ms_x, l); + const float fv = u_minify(src->height0 << nv50_miptree(src)->ms_y, l); + x0 /= fh; + x1 /= fh; + y0 /= fv; + y1 /= fv; + } + + /* XXX: multiply by 6 for cube arrays ? */ + dz = (float)info->src.box.depth / (float)info->dst.box.depth; + z = (float)info->src.box.z; + if (nv50_miptree(src)->layout_3d) + z += 0.5f * dz; + + BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1); + PUSH_DATA (push, 0x1); + + /* Draw a large triangle in screen coordinates covering the whole + * render target, with scissors defining the destination region. + * The vertex is supplied with non-normalized texture coordinates + * arranged in a way to yield the desired offset and scale. + */ + + minx = info->dst.box.x; + maxx = info->dst.box.x + info->dst.box.width; + miny = info->dst.box.y; + maxy = info->dst.box.y + info->dst.box.height; + if (info->scissor_enable) { + minx = MAX2(minx, info->scissor.minx); + maxx = MIN2(maxx, info->scissor.maxx); + miny = MAX2(miny, info->scissor.miny); + maxy = MIN2(maxy, info->scissor.maxy); + } + BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2); + PUSH_DATA (push, (maxx << 16) | minx); + PUSH_DATA (push, (maxy << 16) | miny); + + for (i = 0; i < info->dst.box.depth; ++i, z += dz) { + if (info->dst.box.z + i) { + BEGIN_NV04(push, NV50_3D(LAYER), 1); + PUSH_DATA (push, info->dst.box.z + i); + } + PUSH_SPACE(push, 32); + BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); + BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3); + PUSH_DATAf(push, x0); + PUSH_DATAf(push, y0); + PUSH_DATAf(push, z); + BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3); + PUSH_DATAf(push, x1); + PUSH_DATAf(push, y0); + PUSH_DATAf(push, z); + BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2); + PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_x); + PUSH_DATAf(push, 0.0f); + BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3); + PUSH_DATAf(push, x0); + PUSH_DATAf(push, y1); + PUSH_DATAf(push, z); + BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_y); + BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (push, 0); + } + if (info->dst.box.z + info->dst.box.depth - 1) { + BEGIN_NV04(push, NV50_3D(LAYER), 1); + PUSH_DATA (push, 0); + } + + /* re-enable normally constant state */ + + BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1); + PUSH_DATA (push, 1); + + nv50_blitctx_post_blit(blit); +} + +static void +nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_miptree *dst = nv50_miptree(info->dst.resource); + struct nv50_miptree *src = nv50_miptree(info->src.resource); + const int32_t srcx_adj = info->src.box.width < 0 ? -1 : 0; + const int32_t srcy_adj = info->src.box.height < 0 ? -1 : 0; + const int32_t dz = info->dst.box.z; + const int32_t sz = info->src.box.z; + uint32_t dstw, dsth; + int32_t dstx, dsty; + int64_t srcx, srcy; + int64_t du_dx, dv_dy; + int i; + uint32_t mode; + uint32_t mask = nv50_blit_eng2d_get_mask(info); + boolean b; + + mode = nv50_blit_get_filter(info) ? + NV50_2D_BLIT_CONTROL_FILTER_BILINEAR : + NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE; + mode |= (src->base.base.nr_samples > dst->base.base.nr_samples) ? + NV50_2D_BLIT_CONTROL_ORIGIN_CORNER : NV50_2D_BLIT_CONTROL_ORIGIN_CENTER; + + du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width; + dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height; + + b = info->dst.format == info->src.format; + nv50_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b); + nv50_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b); + + if (info->scissor_enable) { + BEGIN_NV04(push, NV50_2D(CLIP_X), 5); + PUSH_DATA (push, info->scissor.minx << dst->ms_x); + PUSH_DATA (push, info->scissor.miny << dst->ms_y); + PUSH_DATA (push, (info->scissor.maxx - info->scissor.minx) << dst->ms_x); + PUSH_DATA (push, (info->scissor.maxy - info->scissor.miny) << dst->ms_y); + PUSH_DATA (push, 1); /* enable */ + } + + if (mask != 0xffffffff) { + BEGIN_NV04(push, NV50_2D(ROP), 1); + PUSH_DATA (push, 0xca); /* DPSDxax */ + BEGIN_NV04(push, NV50_2D(PATTERN_COLOR_FORMAT), 1); + PUSH_DATA (push, NV50_2D_PATTERN_COLOR_FORMAT_32BPP); + BEGIN_NV04(push, NV50_2D(PATTERN_COLOR(0)), 4); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, mask); + PUSH_DATA (push, 0xffffffff); + PUSH_DATA (push, 0xffffffff); + BEGIN_NV04(push, NV50_2D(OPERATION), 1); + PUSH_DATA (push, NV50_2D_OPERATION_ROP); + } else + if (info->src.format != info->dst.format) { + if (info->src.format == PIPE_FORMAT_R8_UNORM || + info->src.format == PIPE_FORMAT_R16_UNORM || + info->src.format == PIPE_FORMAT_R16_FLOAT || + info->src.format == PIPE_FORMAT_R32_FLOAT) { + mask = 0xffff0000; /* also makes condition for OPERATION reset true */ + BEGIN_NV04(push, NV50_2D(BETA4), 2); + PUSH_DATA (push, mask); + PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY_PREMULT); + } + } + + if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) { + /* ms_x is always >= ms_y */ + du_dx <<= src->ms_x - dst->ms_x; + dv_dy <<= src->ms_y - dst->ms_y; + } else { + du_dx >>= dst->ms_x - src->ms_x; + dv_dy >>= dst->ms_y - src->ms_y; + } + + srcx = (int64_t)(info->src.box.x + srcx_adj) << (src->ms_x + 32); + srcy = (int64_t)(info->src.box.y + srcy_adj) << (src->ms_y + 32); + + if (src->base.base.nr_samples > dst->base.base.nr_samples) { + /* center src coorinates for proper MS resolve filtering */ + srcx += (int64_t)src->ms_x << 32; + srcy += (int64_t)src->ms_y << 32; + } + + dstx = info->dst.box.x << dst->ms_x; + dsty = info->dst.box.y << dst->ms_y; + + dstw = info->dst.box.width << dst->ms_x; + dsth = info->dst.box.height << dst->ms_y; + + if (dstx < 0) { + dstw += dstx; + srcx -= du_dx * dstx; + dstx = 0; + } + if (dsty < 0) { + dsth += dsty; + srcy -= dv_dy * dsty; + dsty = 0; + } + + BEGIN_NV04(push, NV50_2D(BLIT_CONTROL), 1); + PUSH_DATA (push, mode); + BEGIN_NV04(push, NV50_2D(BLIT_DST_X), 4); + PUSH_DATA (push, dstx); + PUSH_DATA (push, dsty); + PUSH_DATA (push, dstw); + PUSH_DATA (push, dsth); + BEGIN_NV04(push, NV50_2D(BLIT_DU_DX_FRACT), 4); + PUSH_DATA (push, du_dx); + PUSH_DATA (push, du_dx >> 32); + PUSH_DATA (push, dv_dy); + PUSH_DATA (push, dv_dy >> 32); + + BCTX_REFN(nv50->bufctx, 2D, &dst->base, WR); + BCTX_REFN(nv50->bufctx, 2D, &src->base, RD); + nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx); + if (nouveau_pushbuf_validate(nv50->base.pushbuf)) + return; + + for (i = 0; i < info->dst.box.depth; ++i) { + if (i > 0) { + /* no scaling in z-direction possible for eng2d blits */ + if (dst->layout_3d) { + BEGIN_NV04(push, NV50_2D(DST_LAYER), 1); + PUSH_DATA (push, info->dst.box.z + i); + } else { + const unsigned z = info->dst.box.z + i; + BEGIN_NV04(push, NV50_2D(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, dst->base.address + z * dst->layer_stride); + PUSH_DATA (push, dst->base.address + z * dst->layer_stride); + } + if (src->layout_3d) { + /* not possible because of depth tiling */ + assert(0); + } else { + const unsigned z = info->src.box.z + i; + BEGIN_NV04(push, NV50_2D(SRC_ADDRESS_HIGH), 2); + PUSH_DATAh(push, src->base.address + z * src->layer_stride); + PUSH_DATA (push, src->base.address + z * src->layer_stride); + } + BEGIN_NV04(push, NV50_2D(BLIT_SRC_Y_INT), 1); /* trigger */ + PUSH_DATA (push, srcy >> 32); + } else { + BEGIN_NV04(push, NV50_2D(BLIT_SRC_X_FRACT), 4); + PUSH_DATA (push, srcx); + PUSH_DATA (push, srcx >> 32); + PUSH_DATA (push, srcy); + PUSH_DATA (push, srcy >> 32); + } + } + nv50_bufctx_fence(nv50->bufctx, FALSE); + + nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D); + + if (info->scissor_enable) { + BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1); + PUSH_DATA (push, 0); + } + if (mask != 0xffffffff) { + BEGIN_NV04(push, NV50_2D(OPERATION), 1); + PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY); + } +} + +static void +nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) +{ + struct nv50_context *nv50 = nv50_context(pipe); + boolean eng3d = FALSE; + + if (util_format_is_depth_or_stencil(info->dst.resource->format)) { + if (!(info->mask & PIPE_MASK_ZS)) + return; + if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT || + info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + eng3d = TRUE; + if (info->filter != PIPE_TEX_FILTER_NEAREST) + eng3d = TRUE; + } else { + if (!(info->mask & PIPE_MASK_RGBA)) + return; + if (info->mask != PIPE_MASK_RGBA) + eng3d = TRUE; + } + + if (nv50_miptree(info->src.resource)->layout_3d) { + eng3d = TRUE; + } else + if (info->src.box.depth != info->dst.box.depth) { + eng3d = TRUE; + debug_printf("blit: cannot filter array or cube textures in z direction"); + } + + if (!eng3d && info->dst.format != info->src.format) { + if (!nv50_2d_dst_format_faithful(info->dst.format) || + !nv50_2d_src_format_faithful(info->src.format)) { + eng3d = TRUE; + } else + if (!nv50_2d_src_format_faithful(info->src.format)) { + if (!util_format_is_luminance(info->src.format)) { + if (util_format_is_intensity(info->src.format)) + eng3d = TRUE; + else + if (!nv50_2d_dst_format_ops_supported(info->dst.format)) + eng3d = TRUE; + else + eng3d = !nv50_2d_format_supported(info->src.format); + } + } else + if (util_format_is_luminance_alpha(info->src.format)) + eng3d = TRUE; + } + + if (info->src.resource->nr_samples == 8 && + info->dst.resource->nr_samples <= 1) + eng3d = TRUE; + + /* FIXME: can't make this work with eng2d anymore */ + if (info->src.resource->nr_samples > 1 || + info->dst.resource->nr_samples > 1) + eng3d = TRUE; + + /* FIXME: find correct src coordinate adjustments */ + if ((info->src.box.width != info->dst.box.width && + info->src.box.width != -info->dst.box.width) || + (info->src.box.height != info->dst.box.height && + info->src.box.height != -info->dst.box.height)) + eng3d = TRUE; + + if (!eng3d) + nv50_blit_eng2d(nv50, info); + else + nv50_blit_3d(nv50, info); +} + +boolean +nv50_blitter_create(struct nv50_screen *screen) +{ + screen->blitter = CALLOC_STRUCT(nv50_blitter); + if (!screen->blitter) { + NOUVEAU_ERR("failed to allocate blitter struct\n"); + return FALSE; + } + + pipe_mutex_init(screen->blitter->mutex); + + nv50_blitter_make_vp(screen->blitter); + nv50_blitter_make_sampler(screen->blitter); + + return TRUE; +} + +void +nv50_blitter_destroy(struct nv50_screen *screen) +{ + struct nv50_blitter *blitter = screen->blitter; + unsigned i, m; + + for (i = 0; i < NV50_BLIT_MAX_TEXTURE_TYPES; ++i) { + for (m = 0; m < NV50_BLIT_MODES; ++m) { + struct nv50_program *prog = blitter->fp[i][m]; + if (prog) { + nv50_program_destroy(NULL, prog); + FREE((void *)prog->pipe.tokens); + FREE(prog); + } + } + } + + FREE(blitter); +} + +boolean +nv50_blitctx_create(struct nv50_context *nv50) +{ + nv50->blit = CALLOC_STRUCT(nv50_blitctx); + if (!nv50->blit) { + NOUVEAU_ERR("failed to allocate blit context\n"); + return FALSE; + } + + nv50->blit->nv50 = nv50; + + nv50->blit->rast.pipe.half_pixel_center = 1; + + return TRUE; +} + +void +nv50_init_surface_functions(struct nv50_context *nv50) +{ + struct pipe_context *pipe = &nv50->base.pipe; + + pipe->resource_copy_region = nv50_resource_copy_region; + pipe->blit = nv50_blit; + pipe->clear_render_target = nv50_clear_render_target; + pipe->clear_depth_stencil = nv50_clear_depth_stencil; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c new file mode 100644 index 00000000000..9e512928381 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c @@ -0,0 +1,352 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv50_context.h" +#include "nv50/nv50_resource.h" +#include "nv50/nv50_texture.xml.h" +#include "nv50/nv50_defs.xml.h" + +#include "util/u_format.h" + +#define NV50_TIC_0_SWIZZLE__MASK \ + (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \ + NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK) + +static INLINE uint32_t +nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int) +{ + switch (swz) { + case PIPE_SWIZZLE_RED: + return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT; + case PIPE_SWIZZLE_GREEN: + return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT; + case PIPE_SWIZZLE_BLUE: + return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT; + case PIPE_SWIZZLE_ALPHA: + return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT; + case PIPE_SWIZZLE_ONE: + return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT; + case PIPE_SWIZZLE_ZERO: + default: + return NV50_TIC_MAP_ZERO; + } +} + +struct pipe_sampler_view * +nv50_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *res, + const struct pipe_sampler_view *templ) +{ + uint32_t flags = 0; + + if (res->target == PIPE_TEXTURE_RECT || res->target == PIPE_BUFFER) + flags |= NV50_TEXVIEW_SCALED_COORDS; + + return nv50_create_texture_view(pipe, res, templ, flags, res->target); +} + +struct pipe_sampler_view * +nv50_create_texture_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ, + uint32_t flags, + enum pipe_texture_target target) +{ + const struct util_format_description *desc; + uint64_t addr; + uint32_t *tic; + uint32_t swz[4]; + uint32_t depth; + struct nv50_tic_entry *view; + struct nv50_miptree *mt = nv50_miptree(texture); + boolean tex_int; + + view = MALLOC_STRUCT(nv50_tic_entry); + if (!view) + return NULL; + + view->pipe = *templ; + view->pipe.reference.count = 1; + view->pipe.texture = NULL; + view->pipe.context = pipe; + + view->id = -1; + + pipe_resource_reference(&view->pipe.texture, texture); + + tic = &view->tic[0]; + + desc = util_format_description(view->pipe.format); + + /* TIC[0] */ + + tic[0] = nv50_format_table[view->pipe.format].tic; + + tex_int = util_format_is_pure_integer(view->pipe.format); + + swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int); + swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int); + swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int); + swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int); + tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | + (swz[0] << NV50_TIC_0_MAPR__SHIFT) | + (swz[1] << NV50_TIC_0_MAPG__SHIFT) | + (swz[2] << NV50_TIC_0_MAPB__SHIFT) | + (swz[3] << NV50_TIC_0_MAPA__SHIFT); + + addr = mt->base.address; + + if (mt->base.base.target == PIPE_TEXTURE_1D_ARRAY || + mt->base.base.target == PIPE_TEXTURE_2D_ARRAY) { + addr += view->pipe.u.tex.first_layer * mt->layer_stride; + depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1; + } else { + depth = mt->base.base.depth0; + } + + tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER; + + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; + + if (!(flags & NV50_TEXVIEW_SCALED_COORDS)) + tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; + + if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) { + if (target == PIPE_BUFFER) { + addr += view->pipe.u.buf.first_element * desc->block.bits / 8; + tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER; + tic[3] = 0; + tic[4] = /* width */ + view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1; + tic[5] = 0; + } else { + tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT; + tic[3] = mt->level[0].pitch; + tic[4] = mt->base.base.width0; + tic[5] = (1 << 16) | mt->base.base.height0; + } + tic[6] = + tic[7] = 0; + tic[1] = addr; + tic[2] |= addr >> 32; + return &view->pipe; + } + + tic[1] = addr; + tic[2] |= (addr >> 32) & 0xff; + + tic[2] |= + ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) | + ((mt->level[0].tile_mode & 0xf00) << (25 - 8)); + + switch (target) { + case PIPE_TEXTURE_1D: + tic[2] |= NV50_TIC_2_TARGET_1D; + break; + case PIPE_TEXTURE_2D: + tic[2] |= NV50_TIC_2_TARGET_2D; + break; + case PIPE_TEXTURE_RECT: + tic[2] |= NV50_TIC_2_TARGET_RECT; + break; + case PIPE_TEXTURE_3D: + tic[2] |= NV50_TIC_2_TARGET_3D; + break; + case PIPE_TEXTURE_CUBE: + depth /= 6; + tic[2] |= NV50_TIC_2_TARGET_CUBE; + break; + case PIPE_TEXTURE_1D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; + break; + case PIPE_TEXTURE_2D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; + break; + case PIPE_TEXTURE_CUBE_ARRAY: + depth /= 6; + tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; + break; + case PIPE_BUFFER: + assert(0); /* should be linear and handled above ! */ + tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR; + break; + default: + NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target); + return FALSE; + } + + tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000; + + tic[4] = (1 << 31) | (mt->base.base.width0 << mt->ms_x); + + tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff; + tic[5] |= depth << 16; + tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT; + + tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */ + + tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; + + if (unlikely(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS))) + if (mt->base.base.last_level) + tic[5] &= ~NV50_TIC_5_LAST_LEVEL__MASK; + + return &view->pipe; +} + +static boolean +nv50_validate_tic(struct nv50_context *nv50, int s) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nouveau_bo *txc = nv50->screen->txc; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nv50->num_textures[s]; ++i) { + struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]); + struct nv04_resource *res; + + if (!tic) { + BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); + PUSH_DATA (push, (i << 1) | 0); + continue; + } + res = &nv50_miptree(tic->pipe.texture)->base; + + if (tic->id < 0) { + tic->id = nv50_screen_tic_alloc(nv50->screen, tic); + + BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); + PUSH_DATA (push, 262144); + PUSH_DATA (push, 65536); + PUSH_DATA (push, 1); + PUSH_DATAh(push, txc->offset); + PUSH_DATA (push, txc->offset); + BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); + PUSH_DATA (push, 32); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, tic->id * 32); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NI04(push, NV50_2D(SIFC_DATA), 8); + PUSH_DATAp(push, &tic->tic[0], 8); + + need_flush = TRUE; + } else + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1); + PUSH_DATA (push, 0x20); + } + + nv50->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + + res->status &= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + BCTX_REFN(nv50->bufctx_3d, TEXTURES, res, RD); + + BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); + PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1); + } + for (; i < nv50->state.num_textures[s]; ++i) { + BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); + PUSH_DATA (push, (i << 1) | 0); + } + nv50->state.num_textures[s] = nv50->num_textures[s]; + + return need_flush; +} + +void nv50_validate_textures(struct nv50_context *nv50) +{ + boolean need_flush; + + need_flush = nv50_validate_tic(nv50, 0); + need_flush |= nv50_validate_tic(nv50, 2); + + if (need_flush) { + BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TIC_FLUSH), 1); + PUSH_DATA (nv50->base.pushbuf, 0); + } +} + +static boolean +nv50_validate_tsc(struct nv50_context *nv50, int s) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nv50->num_samplers[s]; ++i) { + struct nv50_tsc_entry *tsc = nv50_tsc_entry(nv50->samplers[s][i]); + + if (!tsc) { + BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); + PUSH_DATA (push, (i << 4) | 0); + continue; + } + if (tsc->id < 0) { + tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc); + + nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc, + 65536 + tsc->id * 32, + NOUVEAU_BO_VRAM, 32, tsc->tsc); + need_flush = TRUE; + } + nv50->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); + + BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); + PUSH_DATA (push, (tsc->id << 12) | (i << 4) | 1); + } + for (; i < nv50->state.num_samplers[s]; ++i) { + BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); + PUSH_DATA (push, (i << 4) | 0); + } + nv50->state.num_samplers[s] = nv50->num_samplers[s]; + + return need_flush; +} + +void nv50_validate_samplers(struct nv50_context *nv50) +{ + boolean need_flush; + + need_flush = nv50_validate_tsc(nv50, 0); + need_flush |= nv50_validate_tsc(nv50, 2); + + if (need_flush) { + BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TSC_FLUSH), 1); + PUSH_DATA (nv50->base.pushbuf, 0); + } +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h new file mode 100644 index 00000000000..31eab9b5d87 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h @@ -0,0 +1,306 @@ +#ifndef NV50_TEXTURE_XML +#define NV50_TEXTURE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- rnndb/nv50_texture.xml ( 8648 bytes, from 2013-04-13 12:49:11) +- rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) +- rnndb/nvchipsets.xml ( 3954 bytes, from 2013-03-26 01:26:43) +- rnndb/nv50_defs.xml ( 16652 bytes, from 2013-04-04 10:57:15) + +Copyright (C) 2006-2013 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_TIC_MAP_ZERO 0x00000000 +#define NV50_TIC_MAP_C0 0x00000002 +#define NV50_TIC_MAP_C1 0x00000003 +#define NV50_TIC_MAP_C2 0x00000004 +#define NV50_TIC_MAP_C3 0x00000005 +#define NV50_TIC_MAP_ONE_INT 0x00000006 +#define NV50_TIC_MAP_ONE_FLOAT 0x00000007 +#define NV50_TIC_TYPE_SNORM 0x00000001 +#define NV50_TIC_TYPE_UNORM 0x00000002 +#define NV50_TIC_TYPE_SINT 0x00000003 +#define NV50_TIC_TYPE_UINT 0x00000004 +#define NV50_TIC_TYPE_SSCALED 0x00000005 +#define NV50_TIC_TYPE_USCALED 0x00000006 +#define NV50_TIC_TYPE_FLOAT 0x00000007 +#define NV50_TSC_WRAP_REPEAT 0x00000000 +#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001 +#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002 +#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003 +#define NV50_TSC_WRAP_CLAMP 0x00000004 +#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005 +#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006 +#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007 +#define NV50_TIC__SIZE 0x00000020 +#define NV50_TIC_0 0x00000000 +#define NV50_TIC_0_MAPA__MASK 0x38000000 +#define NV50_TIC_0_MAPA__SHIFT 27 +#define NV50_TIC_0_MAPB__MASK 0x07000000 +#define NV50_TIC_0_MAPB__SHIFT 24 +#define NV50_TIC_0_MAPG__MASK 0x00e00000 +#define NV50_TIC_0_MAPG__SHIFT 21 +#define NV50_TIC_0_MAPR__MASK 0x001c0000 +#define NV50_TIC_0_MAPR__SHIFT 18 +#define NV50_TIC_0_TYPE3__MASK 0x00038000 +#define NV50_TIC_0_TYPE3__SHIFT 15 +#define NV50_TIC_0_TYPE2__MASK 0x00007000 +#define NV50_TIC_0_TYPE2__SHIFT 12 +#define NV50_TIC_0_TYPE1__MASK 0x00000e00 +#define NV50_TIC_0_TYPE1__SHIFT 9 +#define NV50_TIC_0_TYPE0__MASK 0x000001c0 +#define NV50_TIC_0_TYPE0__SHIFT 6 +#define NV50_TIC_0_FMT__MASK 0x0000003f +#define NV50_TIC_0_FMT__SHIFT 0 +#define NV50_TIC_0_FMT_32_32_32_32 0x00000001 +#define NVC0_TIC_0_FMT_32_32_32 0x00000002 +#define NV50_TIC_0_FMT_16_16_16_16 0x00000003 +#define NV50_TIC_0_FMT_32_32 0x00000004 +#define NV50_TIC_0_FMT_32_8_X24 0x00000005 +#define NV50_TIC_0_FMT_8_8_8_8 0x00000008 +#define NV50_TIC_0_FMT_10_10_10_2 0x00000009 +#define NV50_TIC_0_FMT_16_16 0x0000000c +#define NV50_TIC_0_FMT_24_8 0x0000000d +#define NV50_TIC_0_FMT_8_24 0x0000000e +#define NV50_TIC_0_FMT_32 0x0000000f +#define NV50_TIC_0_FMT_BPTC_FLOAT 0x00000010 +#define NV50_TIC_0_FMT_BPTC_UFLOAT 0x00000011 +#define NV50_TIC_0_FMT_4_4_4_4 0x00000012 +#define NV50_TIC_0_FMT_1_5_5_5 0x00000013 +#define NV50_TIC_0_FMT_5_5_5_1 0x00000014 +#define NV50_TIC_0_FMT_5_6_5 0x00000015 +#define NV50_TIC_0_FMT_5_5_6 0x00000016 +#define NV50_TIC_0_FMT_BPTC 0x00000017 +#define NV50_TIC_0_FMT_8_8 0x00000018 +#define NV50_TIC_0_FMT_16 0x0000001b +#define NV50_TIC_0_FMT_8 0x0000001d +#define NV50_TIC_0_FMT_4_4 0x0000001e +#define NV50_TIC_0_FMT_BITMAP 0x0000001f +#define NV50_TIC_0_FMT_9_9_9_E5 0x00000020 +#define NV50_TIC_0_FMT_11_11_10 0x00000021 +#define NV50_TIC_0_FMT_U8_YA8_V8_YB8 0x00000022 +#define NV50_TIC_0_FMT_YA8_U8_YB8_V8 0x00000023 +#define NV50_TIC_0_FMT_DXT1 0x00000024 +#define NV50_TIC_0_FMT_DXT3 0x00000025 +#define NV50_TIC_0_FMT_DXT5 0x00000026 +#define NV50_TIC_0_FMT_RGTC1 0x00000027 +#define NV50_TIC_0_FMT_RGTC2 0x00000028 +#define NV50_TIC_0_FMT_S8_Z24 0x00000029 +#define NV50_TIC_0_FMT_Z24_X8 0x0000002a +#define NV50_TIC_0_FMT_Z24_S8 0x0000002b +#define NV50_TIC_0_FMT_Z24_C8_MS4_CS4 0x0000002c +#define NV50_TIC_0_FMT_Z24_C8_MS8_CS8 0x0000002d +#define NV50_TIC_0_FMT_Z24_C8_MS4_CS12 0x0000002e +#define NV50_TIC_0_FMT_Z32 0x0000002f +#define NV50_TIC_0_FMT_Z32_S8_X24 0x00000030 +#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS4 0x00000031 +#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS8_CS8 0x00000032 +#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS4 0x00000033 +#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS8_CS8 0x00000034 +#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS4 0x00000035 +#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS8_CS8 0x00000036 +#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS12 0x00000037 +#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS12 0x00000038 +#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS12 0x00000039 +#define NV50_TIC_0_FMT_Z16 0x0000003a + +#define NV50_TIC_1 0x00000004 +#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff +#define NV50_TIC_1_OFFSET_LOW__SHIFT 0 + +#define NV50_TIC_2 0x00000008 +#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff +#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0 +#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400 +#define NV50_TIC_2_TARGET__MASK 0x0003c000 +#define NV50_TIC_2_TARGET__SHIFT 14 +#define NV50_TIC_2_TARGET_1D 0x00000000 +#define NV50_TIC_2_TARGET_2D 0x00004000 +#define NV50_TIC_2_TARGET_3D 0x00008000 +#define NV50_TIC_2_TARGET_CUBE 0x0000c000 +#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000 +#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000 +#define NV50_TIC_2_TARGET_BUFFER 0x00018000 +#define NV50_TIC_2_TARGET_RECT 0x0001c000 +#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000 +#define NV50_TIC_2_LINEAR 0x00040000 +#define NV50_TIC_2_TILE_MODE_X__MASK 0x00380000 +#define NV50_TIC_2_TILE_MODE_X__SHIFT 19 +#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000 +#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22 +#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000 +#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25 +#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000 +#define NV50_TIC_2_2D_UNK0258__SHIFT 28 +#define NV50_TIC_2_NO_BORDER 0x40000000 +#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000 + +#define NV50_TIC_3 0x0000000c +#define NV50_TIC_3_PITCH__MASK 0xffffffff +#define NV50_TIC_3_PITCH__SHIFT 0 + +#define NV50_TIC_4 0x00000010 +#define NV50_TIC_4_WIDTH__MASK 0xffffffff +#define NV50_TIC_4_WIDTH__SHIFT 0 + +#define NV50_TIC_5 0x00000014 +#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000 +#define NV50_TIC_5_LAST_LEVEL__SHIFT 28 +#define NV50_TIC_5_DEPTH__MASK 0x0fff0000 +#define NV50_TIC_5_DEPTH__SHIFT 16 +#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff +#define NV50_TIC_5_HEIGHT__SHIFT 0 + +#define NV50_TIC_7 0x0000001c +#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f +#define NV50_TIC_7_BASE_LEVEL__SHIFT 0 +#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0 +#define NV50_TIC_7_MAX_LEVEL__SHIFT 4 +#define NV50_TIC_7_MS_MODE__MASK 0x0000f000 +#define NV50_TIC_7_MS_MODE__SHIFT 12 +#define NV50_TIC_7_MS_MODE_MS1 0x00000000 +#define NV50_TIC_7_MS_MODE_MS2 0x00001000 +#define NV50_TIC_7_MS_MODE_MS4 0x00002000 +#define NV50_TIC_7_MS_MODE_MS8 0x00003000 +#define NVA3_TIC_7_MS_MODE_MS8_ALT 0x00004000 +#define NVA3_TIC_7_MS_MODE_MS2_ALT 0x00005000 +#define NVC0_TIC_7_MS_MODE_UNK6 0x00006000 +#define NV50_TIC_7_MS_MODE_MS4_CS4 0x00008000 +#define NV50_TIC_7_MS_MODE_MS4_CS12 0x00009000 +#define NV50_TIC_7_MS_MODE_MS8_CS8 0x0000a000 +#define NVC0_TIC_7_MS_MODE_MS8_CS24 0x0000b000 + +#define NV50_TSC__SIZE 0x00000020 +#define NV50_TSC_0 0x00000000 +#define NV50_TSC_0_WRAPS__MASK 0x00000007 +#define NV50_TSC_0_WRAPS__SHIFT 0 +#define NV50_TSC_0_WRAPT__MASK 0x00000038 +#define NV50_TSC_0_WRAPT__SHIFT 3 +#define NV50_TSC_0_WRAPR__MASK 0x000001c0 +#define NV50_TSC_0_WRAPR__SHIFT 6 +#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200 +#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00 +#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10 +#define NV50_TSC_0_SRGB_CONVERSION_ALLOWED 0x00002000 +#define NV50_TSC_0_BOX_S__MASK 0x0001c000 +#define NV50_TSC_0_BOX_S__SHIFT 14 +#define NV50_TSC_0_BOX_T__MASK 0x000e0000 +#define NV50_TSC_0_BOX_T__SHIFT 17 +#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000 +#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20 + +#define NV50_TSC_1 0x00000004 +#define NV50_TSC_1_UNKN_ANISO_15 0x10000000 +#define NV50_TSC_1_UNKN_ANISO_35 0x18000000 +#define NV50_TSC_1_MAGF__MASK 0x00000003 +#define NV50_TSC_1_MAGF__SHIFT 0 +#define NV50_TSC_1_MAGF_NEAREST 0x00000001 +#define NV50_TSC_1_MAGF_LINEAR 0x00000002 +#define NV50_TSC_1_MINF__MASK 0x00000030 +#define NV50_TSC_1_MINF__SHIFT 4 +#define NV50_TSC_1_MINF_NEAREST 0x00000010 +#define NV50_TSC_1_MINF_LINEAR 0x00000020 +#define NV50_TSC_1_MIPF__MASK 0x000000c0 +#define NV50_TSC_1_MIPF__SHIFT 6 +#define NV50_TSC_1_MIPF_NONE 0x00000040 +#define NV50_TSC_1_MIPF_NEAREST 0x00000080 +#define NV50_TSC_1_MIPF_LINEAR 0x000000c0 +#define NVE4_TSC_1_CUBE_SEAMLESS 0x00000200 +#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000 +#define NV50_TSC_1_LOD_BIAS__SHIFT 12 +#define NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS 0x02000000 + +#define NV50_TSC_2 0x00000008 +#define NV50_TSC_2_MIN_LOD__MASK 0x00000fff +#define NV50_TSC_2_MIN_LOD__SHIFT 0 +#define NV50_TSC_2_MAX_LOD__MASK 0x00fff000 +#define NV50_TSC_2_MAX_LOD__SHIFT 12 +#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__MASK 0xff000000 +#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__SHIFT 24 + +#define NV50_TSC_3 0x0000000c +#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__MASK 0x000ff000 +#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__SHIFT 12 +#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__MASK 0x0ff00000 +#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__SHIFT 20 + +#define NV50_TSC_4 0x00000010 +#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff +#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0 + +#define NV50_TSC_5 0x00000014 +#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff +#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0 + +#define NV50_TSC_6 0x00000018 +#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff +#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0 + +#define NV50_TSC_7 0x0000001c +#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff +#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0 + + +#endif /* NV50_TEXTURE_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c new file mode 100644 index 00000000000..a9906829fec --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c @@ -0,0 +1,412 @@ + +#include "util/u_format.h" + +#include "nv50/nv50_context.h" + +#include "nv50/nv50_defs.xml.h" + +struct nv50_transfer { + struct pipe_transfer base; + struct nv50_m2mf_rect rect[2]; + uint32_t nblocksx; + uint32_t nblocksy; +}; + +void +nv50_m2mf_rect_setup(struct nv50_m2mf_rect *rect, + struct pipe_resource *restrict res, unsigned l, + unsigned x, unsigned y, unsigned z) +{ + struct nv50_miptree *mt = nv50_miptree(res); + const unsigned w = u_minify(res->width0, l); + const unsigned h = u_minify(res->height0, l); + + rect->bo = mt->base.bo; + rect->domain = mt->base.domain; + rect->base = mt->level[l].offset; + rect->pitch = mt->level[l].pitch; + if (util_format_is_plain(res->format)) { + rect->width = w << mt->ms_x; + rect->height = h << mt->ms_y; + rect->x = x << mt->ms_x; + rect->y = y << mt->ms_y; + } else { + rect->width = util_format_get_nblocksx(res->format, w); + rect->height = util_format_get_nblocksy(res->format, h); + rect->x = util_format_get_nblocksx(res->format, x); + rect->y = util_format_get_nblocksy(res->format, y); + } + rect->tile_mode = mt->level[l].tile_mode; + rect->cpp = util_format_get_blocksize(res->format); + + if (mt->layout_3d) { + rect->z = z; + rect->depth = u_minify(res->depth0, l); + } else { + rect->base += z * mt->layer_stride; + rect->z = 0; + rect->depth = 1; + } +} + +void +nv50_m2mf_transfer_rect(struct nv50_context *nv50, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nouveau_bufctx *bctx = nv50->bufctx; + const int cpp = dst->cpp; + uint32_t src_ofst = src->base; + uint32_t dst_ofst = dst->base; + uint32_t height = nblocksy; + uint32_t sy = src->y; + uint32_t dy = dst->y; + + assert(dst->cpp == src->cpp); + + nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD); + nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + if (nouveau_bo_memtype(src->bo)) { + BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 6); + PUSH_DATA (push, 0); + PUSH_DATA (push, src->tile_mode); + PUSH_DATA (push, src->width * cpp); + PUSH_DATA (push, src->height); + PUSH_DATA (push, src->depth); + PUSH_DATA (push, src->z); + } else { + src_ofst += src->y * src->pitch + src->x * cpp; + + BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_PITCH_IN), 1); + PUSH_DATA (push, src->pitch); + } + + if (nouveau_bo_memtype(dst->bo)) { + BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 6); + PUSH_DATA (push, 0); + PUSH_DATA (push, dst->tile_mode); + PUSH_DATA (push, dst->width * cpp); + PUSH_DATA (push, dst->height); + PUSH_DATA (push, dst->depth); + PUSH_DATA (push, dst->z); + } else { + dst_ofst += dst->y * dst->pitch + dst->x * cpp; + + BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_PITCH_OUT), 1); + PUSH_DATA (push, dst->pitch); + } + + while (height) { + int line_count = height > 2047 ? 2047 : height; + + BEGIN_NV04(push, NV50_M2MF(OFFSET_IN_HIGH), 2); + PUSH_DATAh(push, src->bo->offset + src_ofst); + PUSH_DATAh(push, dst->bo->offset + dst_ofst); + + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_OFFSET_IN), 2); + PUSH_DATA (push, src->bo->offset + src_ofst); + PUSH_DATA (push, dst->bo->offset + dst_ofst); + + if (nouveau_bo_memtype(src->bo)) { + BEGIN_NV04(push, NV50_M2MF(TILING_POSITION_IN), 1); + PUSH_DATA (push, (sy << 16) | (src->x * cpp)); + } else { + src_ofst += line_count * src->pitch; + } + if (nouveau_bo_memtype(dst->bo)) { + BEGIN_NV04(push, NV50_M2MF(TILING_POSITION_OUT), 1); + PUSH_DATA (push, (dy << 16) | (dst->x * cpp)); + } else { + dst_ofst += line_count * dst->pitch; + } + + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_LINE_LENGTH_IN), 4); + PUSH_DATA (push, nblocksx * cpp); + PUSH_DATA (push, line_count); + PUSH_DATA (push, (1 << 8) | (1 << 0)); + PUSH_DATA (push, 0); + + height -= line_count; + sy += line_count; + dy += line_count; + } + + nouveau_bufctx_reset(bctx, 0); +} + +void +nv50_sifc_linear_u8(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data) +{ + struct nv50_context *nv50 = nv50_context(&nv->pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + uint32_t *src = (uint32_t *)data; + unsigned count = (size + 3) / 4; + unsigned xcoord = offset & 0xff; + + nouveau_bufctx_refn(nv50->bufctx, 0, dst, domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nv50->bufctx); + nouveau_pushbuf_validate(push); + + offset &= ~0xff; + + BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); + PUSH_DATA (push, 262144); + PUSH_DATA (push, 65536); + PUSH_DATA (push, 1); + PUSH_DATAh(push, dst->offset + offset); + PUSH_DATA (push, dst->offset + offset); + BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); + PUSH_DATA (push, size); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, xcoord); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + + while (count) { + unsigned nr; + + if (!PUSH_SPACE(push, 16)) + break; + nr = PUSH_AVAIL(push); + assert(nr >= 16); + nr = MIN2(count, nr - 1); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr); + PUSH_DATAp(push, src, nr); + + src += nr; + count -= nr; + } + + nouveau_bufctx_reset(nv50->bufctx, 0); +} + +void +nv50_m2mf_copy_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size) +{ + struct nouveau_pushbuf *push = nv->pushbuf; + struct nouveau_bufctx *bctx = nv50_context(&nv->pipe)->bufctx; + + nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD); + nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 1); + PUSH_DATA (push, 1); + + while (size) { + unsigned bytes = MIN2(size, 1 << 17); + + BEGIN_NV04(push, NV50_M2MF(OFFSET_IN_HIGH), 2); + PUSH_DATAh(push, src->offset + srcoff); + PUSH_DATAh(push, dst->offset + dstoff); + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_OFFSET_IN), 2); + PUSH_DATA (push, src->offset + srcoff); + PUSH_DATA (push, dst->offset + dstoff); + BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_LINE_LENGTH_IN), 4); + PUSH_DATA (push, bytes); + PUSH_DATA (push, 1); + PUSH_DATA (push, (1 << 8) | (1 << 0)); + PUSH_DATA (push, 0); + + srcoff += bytes; + dstoff += bytes; + size -= bytes; + } + + nouveau_bufctx_reset(bctx, 0); +} + +void * +nv50_miptree_transfer_map(struct pipe_context *pctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer) +{ + struct nv50_screen *screen = nv50_screen(pctx->screen); + struct nv50_context *nv50 = nv50_context(pctx); + struct nouveau_device *dev = nv50->screen->base.device; + const struct nv50_miptree *mt = nv50_miptree(res); + struct nv50_transfer *tx; + uint32_t size; + int ret; + unsigned flags = 0; + + if (usage & PIPE_TRANSFER_MAP_DIRECTLY) + return NULL; + + tx = CALLOC_STRUCT(nv50_transfer); + if (!tx) + return NULL; + + pipe_resource_reference(&tx->base.resource, res); + + tx->base.level = level; + tx->base.usage = usage; + tx->base.box = *box; + + if (util_format_is_plain(res->format)) { + tx->nblocksx = box->width << mt->ms_x; + tx->nblocksy = box->height << mt->ms_x; + } else { + tx->nblocksx = util_format_get_nblocksx(res->format, box->width); + tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + } + + tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); + tx->base.layer_stride = tx->nblocksy * tx->base.stride; + + nv50_m2mf_rect_setup(&tx->rect[0], res, level, box->x, box->y, box->z); + + size = tx->base.layer_stride; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, + size * tx->base.box.depth, NULL, &tx->rect[1].bo); + if (ret) { + FREE(tx); + return NULL; + } + + tx->rect[1].cpp = tx->rect[0].cpp; + tx->rect[1].width = tx->nblocksx; + tx->rect[1].height = tx->nblocksy; + tx->rect[1].depth = 1; + tx->rect[1].pitch = tx->base.stride; + tx->rect[1].domain = NOUVEAU_BO_GART; + + if (usage & PIPE_TRANSFER_READ) { + unsigned base = tx->rect[0].base; + unsigned z = tx->rect[0].z; + unsigned i; + for (i = 0; i < box->depth; ++i) { + nv50_m2mf_transfer_rect(nv50, &tx->rect[1], &tx->rect[0], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += size; + } + tx->rect[0].z = z; + tx->rect[0].base = base; + tx->rect[1].base = 0; + } + + if (tx->rect[1].bo->map) { + *ptransfer = &tx->base; + return tx->rect[1].bo->map; + } + + if (usage & PIPE_TRANSFER_READ) + flags = NOUVEAU_BO_RD; + if (usage & PIPE_TRANSFER_WRITE) + flags |= NOUVEAU_BO_WR; + + ret = nouveau_bo_map(tx->rect[1].bo, flags, screen->base.client); + if (ret) { + nouveau_bo_ref(NULL, &tx->rect[1].bo); + FREE(tx); + return NULL; + } + + *ptransfer = &tx->base; + return tx->rect[1].bo->map; +} + +void +nv50_miptree_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct nv50_context *nv50 = nv50_context(pctx); + struct nv50_transfer *tx = (struct nv50_transfer *)transfer; + struct nv50_miptree *mt = nv50_miptree(tx->base.resource); + unsigned i; + + if (tx->base.usage & PIPE_TRANSFER_WRITE) { + for (i = 0; i < tx->base.box.depth; ++i) { + nv50_m2mf_transfer_rect(nv50, &tx->rect[0], &tx->rect[1], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += tx->nblocksy * tx->base.stride; + } + } + + nouveau_bo_ref(NULL, &tx->rect[1].bo); + pipe_resource_reference(&transfer->resource, NULL); + + FREE(tx); +} + +void +nv50_cb_push(struct nouveau_context *nv, + struct nouveau_bo *bo, unsigned domain, + unsigned base, unsigned size, + unsigned offset, unsigned words, const uint32_t *data) +{ + struct nouveau_pushbuf *push = nv->pushbuf; + struct nouveau_bufctx *bctx = nv50_context(&nv->pipe)->bufctx; + + assert(!(offset & 3)); + size = align(size, 0x100); + + nouveau_bufctx_refn(bctx, 0, bo, NOUVEAU_BO_WR | domain); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + while (words) { + unsigned nr; + + nr = PUSH_AVAIL(push); + nr = MIN2(nr - 7, words); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1); + + BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); + PUSH_DATAh(push, bo->offset + base); + PUSH_DATA (push, bo->offset + base); + PUSH_DATA (push, (NV50_CB_TMP << 16) | (size & 0xffff)); + BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); + PUSH_DATA (push, (offset << 6) | NV50_CB_TMP); + BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr); + PUSH_DATAp(push, data, nr); + + words -= nr; + data += nr; + offset += nr * 4; + } + + nouveau_bufctx_reset(bctx, 0); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.h b/src/gallium/drivers/nouveau/nv50/nv50_transfer.h new file mode 100644 index 00000000000..c58cb0008df --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.h @@ -0,0 +1,27 @@ + +#ifndef __NV50_TRANSFER_H__ +#define __NV50_TRANSFER_H__ + +#include "pipe/p_state.h" + +struct nv50_m2mf_rect { + struct nouveau_bo *bo; + uint32_t base; + unsigned domain; + uint32_t pitch; + uint32_t width; + uint32_t x; + uint32_t height; + uint32_t y; + uint16_t depth; + uint16_t z; + uint16_t tile_mode; + uint16_t cpp; +}; + +void +nv50_m2mf_rect_setup(struct nv50_m2mf_rect *rect, + struct pipe_resource *restrict res, unsigned l, + unsigned x, unsigned y, unsigned z); + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c new file mode 100644 index 00000000000..c6162b5f415 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -0,0 +1,820 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nv50/nv50_context.h" +#include "nv50/nv50_resource.h" + +#include "nv50/nv50_3d.xml.h" + +void +nv50_vertex_state_delete(struct pipe_context *pipe, + void *hwcso) +{ + struct nv50_vertex_stateobj *so = hwcso; + + if (so->translate) + so->translate->release(so->translate); + FREE(hwcso); +} + +void * +nv50_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nv50_vertex_stateobj *so; + struct translate_key transkey; + unsigned i; + + so = MALLOC(sizeof(*so) + + num_elements * sizeof(struct nv50_vertex_element)); + if (!so) + return NULL; + so->num_elements = num_elements; + so->instance_elts = 0; + so->instance_bufs = 0; + so->need_conversion = FALSE; + + memset(so->vb_access_size, 0, sizeof(so->vb_access_size)); + + for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) + so->min_instance_div[i] = 0xffffffff; + + transkey.nr_elements = 0; + transkey.output_stride = 0; + + for (i = 0; i < num_elements; ++i) { + const struct pipe_vertex_element *ve = &elements[i]; + const unsigned vbi = ve->vertex_buffer_index; + unsigned size; + enum pipe_format fmt = ve->src_format; + + so->element[i].pipe = elements[i]; + so->element[i].state = nv50_format_table[fmt].vtx; + + if (!so->element[i].state) { + switch (util_format_get_nr_components(fmt)) { + case 1: fmt = PIPE_FORMAT_R32_FLOAT; break; + case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break; + case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break; + case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break; + default: + assert(0); + FREE(so); + return NULL; + } + so->element[i].state = nv50_format_table[fmt].vtx; + so->need_conversion = TRUE; + } + so->element[i].state |= i; + + size = util_format_get_blocksize(fmt); + if (so->vb_access_size[vbi] < (ve->src_offset + size)) + so->vb_access_size[vbi] = ve->src_offset + size; + + if (1) { + unsigned j = transkey.nr_elements++; + + transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL; + transkey.element[j].input_format = ve->src_format; + transkey.element[j].input_buffer = vbi; + transkey.element[j].input_offset = ve->src_offset; + transkey.element[j].instance_divisor = ve->instance_divisor; + + transkey.element[j].output_format = fmt; + transkey.element[j].output_offset = transkey.output_stride; + transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3; + + if (unlikely(ve->instance_divisor)) { + so->instance_elts |= 1 << i; + so->instance_bufs |= 1 << vbi; + if (ve->instance_divisor < so->min_instance_div[vbi]) + so->min_instance_div[vbi] = ve->instance_divisor; + } + } + } + + so->translate = translate_create(&transkey); + so->vertex_size = transkey.output_stride / 4; + so->packet_vertex_limit = NV04_PFIFO_MAX_PACKET_LEN / + MAX2(so->vertex_size, 1); + + return so; +} + +#define NV50_3D_VERTEX_ATTRIB_INACTIVE \ + NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT | \ + NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 | \ + NV50_3D_VERTEX_ARRAY_ATTRIB_CONST + +static void +nv50_emit_vtxattr(struct nv50_context *nv50, struct pipe_vertex_buffer *vb, + struct pipe_vertex_element *ve, unsigned attr) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + const void *data = (const uint8_t *)vb->user_buffer + ve->src_offset; + float v[4]; + const unsigned nc = util_format_get_nr_components(ve->src_format); + const struct util_format_description *desc = + util_format_description(ve->src_format); + + assert(vb->user_buffer); + + if (desc->channel[0].pure_integer) { + if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + desc->unpack_rgba_sint((int32_t *)v, 0, data, 0, 1, 1); + } else { + desc->unpack_rgba_uint((uint32_t *)v, 0, data, 0, 1, 1); + } + } else { + desc->unpack_rgba_float(v, 0, data, 0, 1, 1); + } + + switch (nc) { + case 4: + BEGIN_NV04(push, NV50_3D(VTX_ATTR_4F_X(attr)), 4); + PUSH_DATAf(push, v[0]); + PUSH_DATAf(push, v[1]); + PUSH_DATAf(push, v[2]); + PUSH_DATAf(push, v[3]); + break; + case 3: + BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(attr)), 3); + PUSH_DATAf(push, v[0]); + PUSH_DATAf(push, v[1]); + PUSH_DATAf(push, v[2]); + break; + case 2: + BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(attr)), 2); + PUSH_DATAf(push, v[0]); + PUSH_DATAf(push, v[1]); + break; + case 1: + if (attr == nv50->vertprog->vp.edgeflag) { + BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1); + PUSH_DATA (push, v[0] ? 1 : 0); + } + BEGIN_NV04(push, NV50_3D(VTX_ATTR_1F(attr)), 1); + PUSH_DATAf(push, v[0]); + break; + default: + assert(0); + break; + } +} + +static INLINE void +nv50_user_vbuf_range(struct nv50_context *nv50, int vbi, + uint32_t *base, uint32_t *size) +{ + if (unlikely(nv50->vertex->instance_bufs & (1 << vbi))) { + /* TODO: use min and max instance divisor to get a proper range */ + *base = 0; + *size = nv50->vtxbuf[vbi].buffer->width0; + } else { + /* NOTE: if there are user buffers, we *must* have index bounds */ + assert(nv50->vb_elt_limit != ~0); + *base = nv50->vb_elt_first * nv50->vtxbuf[vbi].stride; + *size = nv50->vb_elt_limit * nv50->vtxbuf[vbi].stride + + nv50->vertex->vb_access_size[vbi]; + } +} + +static void +nv50_upload_user_buffers(struct nv50_context *nv50, + uint64_t addrs[], uint32_t limits[]) +{ + unsigned b; + + for (b = 0; b < nv50->num_vtxbufs; ++b) { + struct nouveau_bo *bo; + const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b]; + uint32_t base, size; + + if (!(nv50->vbo_user & (1 << b)) || !vb->stride) + continue; + nv50_user_vbuf_range(nv50, b, &base, &size); + + limits[b] = base + size - 1; + addrs[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, base, size, + &bo); + if (addrs[b]) + BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, NOUVEAU_BO_GART | + NOUVEAU_BO_RD, bo); + } + nv50->base.vbo_dirty = TRUE; +} + +static void +nv50_update_user_vbufs(struct nv50_context *nv50) +{ + uint64_t address[PIPE_MAX_ATTRIBS]; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned i; + uint32_t written = 0; + + for (i = 0; i < nv50->vertex->num_elements; ++i) { + struct pipe_vertex_element *ve = &nv50->vertex->element[i].pipe; + const unsigned b = ve->vertex_buffer_index; + struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b]; + uint32_t base, size; + + if (!(nv50->vbo_user & (1 << b))) + continue; + + if (!vb->stride) { + nv50_emit_vtxattr(nv50, vb, ve, i); + continue; + } + nv50_user_vbuf_range(nv50, b, &base, &size); + + if (!(written & (1 << b))) { + struct nouveau_bo *bo; + const uint32_t bo_flags = NOUVEAU_BO_GART | NOUVEAU_BO_RD; + written |= 1 << b; + address[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, + base, size, &bo); + if (address[b]) + BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo); + } + + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); + PUSH_DATAh(push, address[b] + base + size - 1); + PUSH_DATA (push, address[b] + base + size - 1); + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_START_HIGH(i)), 2); + PUSH_DATAh(push, address[b] + ve->src_offset); + PUSH_DATA (push, address[b] + ve->src_offset); + } + nv50->base.vbo_dirty = TRUE; +} + +static INLINE void +nv50_release_user_vbufs(struct nv50_context *nv50) +{ + if (nv50->vbo_user) { + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX_TMP); + nouveau_scratch_done(&nv50->base); + } +} + +void +nv50_vertex_arrays_validate(struct nv50_context *nv50) +{ + uint64_t addrs[PIPE_MAX_ATTRIBS]; + uint32_t limits[PIPE_MAX_ATTRIBS]; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_vertex_stateobj *vertex = nv50->vertex; + struct pipe_vertex_buffer *vb; + struct nv50_vertex_element *ve; + uint32_t mask; + uint32_t refd = 0; + unsigned i; + const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts); + + if (unlikely(vertex->need_conversion)) + nv50->vbo_fifo = ~0; + else + if (nv50->vbo_user & ~nv50->vbo_constant) + nv50->vbo_fifo = nv50->vbo_push_hint ? ~0 : 0; + else + nv50->vbo_fifo = 0; + + if (!nv50->vbo_fifo) { + /* if vertex buffer was written by GPU - flush VBO cache */ + for (i = 0; i < nv50->num_vtxbufs; ++i) { + struct nv04_resource *buf = nv04_resource(nv50->vtxbuf[i].buffer); + if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + nv50->base.vbo_dirty = TRUE; + break; + } + } + } + + /* update vertex format state */ + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_ATTRIB(0)), n); + if (nv50->vbo_fifo) { + nv50->state.num_vtxelts = vertex->num_elements; + for (i = 0; i < vertex->num_elements; ++i) + PUSH_DATA (push, vertex->element[i].state); + for (; i < n; ++i) + PUSH_DATA (push, NV50_3D_VERTEX_ATTRIB_INACTIVE); + for (i = 0; i < n; ++i) { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1); + PUSH_DATA (push, 0); + } + return; + } + for (i = 0; i < vertex->num_elements; ++i) { + const unsigned b = vertex->element[i].pipe.vertex_buffer_index; + ve = &vertex->element[i]; + vb = &nv50->vtxbuf[b]; + + if (likely(vb->stride) || !(nv50->vbo_user & (1 << b))) + PUSH_DATA(push, ve->state); + else + PUSH_DATA(push, ve->state | NV50_3D_VERTEX_ARRAY_ATTRIB_CONST); + } + for (; i < n; ++i) + PUSH_DATA(push, NV50_3D_VERTEX_ATTRIB_INACTIVE); + + /* update per-instance enables */ + mask = vertex->instance_elts ^ nv50->state.instance_elts; + while (mask) { + const int i = ffs(mask) - 1; + mask &= ~(1 << i); + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); + PUSH_DATA (push, (vertex->instance_elts >> i) & 1); + } + nv50->state.instance_elts = vertex->instance_elts; + + if (nv50->vbo_user & ~nv50->vbo_constant) + nv50_upload_user_buffers(nv50, addrs, limits); + + /* update buffers and set constant attributes */ + for (i = 0; i < vertex->num_elements; ++i) { + uint64_t address, limit; + const unsigned b = vertex->element[i].pipe.vertex_buffer_index; + ve = &vertex->element[i]; + vb = &nv50->vtxbuf[b]; + + if (unlikely(nv50->vbo_constant & (1 << b))) { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1); + PUSH_DATA (push, 0); + nv50_emit_vtxattr(nv50, vb, &ve->pipe, i); + continue; + } else + if (nv50->vbo_user & (1 << b)) { + address = addrs[b] + ve->pipe.src_offset; + limit = addrs[b] + limits[b]; + } else { + struct nv04_resource *buf = nv04_resource(vb->buffer); + if (!(refd & (1 << b))) { + refd |= 1 << b; + BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD); + } + address = buf->address + vb->buffer_offset + ve->pipe.src_offset; + limit = buf->address + buf->base.width0 - 1; + } + + if (unlikely(ve->pipe.instance_divisor)) { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 4); + PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + PUSH_DATA (push, ve->pipe.instance_divisor); + } else { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 3); + PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + } + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); + PUSH_DATAh(push, limit); + PUSH_DATA (push, limit); + } + for (; i < nv50->state.num_vtxelts; ++i) { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1); + PUSH_DATA (push, 0); + } + nv50->state.num_vtxelts = vertex->num_elements; +} + +#define NV50_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nv50_prim_gl(unsigned prim) +{ + switch (prim) { + NV50_PRIM_GL_CASE(POINTS); + NV50_PRIM_GL_CASE(LINES); + NV50_PRIM_GL_CASE(LINE_LOOP); + NV50_PRIM_GL_CASE(LINE_STRIP); + NV50_PRIM_GL_CASE(TRIANGLES); + NV50_PRIM_GL_CASE(TRIANGLE_STRIP); + NV50_PRIM_GL_CASE(TRIANGLE_FAN); + NV50_PRIM_GL_CASE(QUADS); + NV50_PRIM_GL_CASE(QUAD_STRIP); + NV50_PRIM_GL_CASE(POLYGON); + NV50_PRIM_GL_CASE(LINES_ADJACENCY); + NV50_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NV50_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NV50_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + default: + return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } +} + +/* For pre-nva0 transform feedback. */ +static const uint8_t nv50_pipe_prim_to_prim_size[PIPE_PRIM_MAX + 1] = +{ + [PIPE_PRIM_POINTS] = 1, + [PIPE_PRIM_LINES] = 2, + [PIPE_PRIM_LINE_LOOP] = 2, + [PIPE_PRIM_LINE_STRIP] = 2, + [PIPE_PRIM_TRIANGLES] = 3, + [PIPE_PRIM_TRIANGLE_STRIP] = 3, + [PIPE_PRIM_TRIANGLE_FAN] = 3, + [PIPE_PRIM_QUADS] = 3, + [PIPE_PRIM_QUAD_STRIP] = 3, + [PIPE_PRIM_POLYGON] = 3, + [PIPE_PRIM_LINES_ADJACENCY] = 2, + [PIPE_PRIM_LINE_STRIP_ADJACENCY] = 2, + [PIPE_PRIM_TRIANGLES_ADJACENCY] = 3, + [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = 3 +}; + +static void +nv50_draw_arrays(struct nv50_context *nv50, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned prim; + + if (nv50->state.index_bias) { + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); + PUSH_DATA (push, 0); + nv50->state.index_bias = 0; + } + + prim = nv50_prim_gl(mode); + + while (instance_count--) { + BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, prim); + BEGIN_NV04(push, NV50_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, start); + PUSH_DATA (push, count); + BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (push, 0); + + prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } +} + +static void +nv50_draw_elements_inline_u08(struct nouveau_pushbuf *push, const uint8_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 3) { + unsigned i; + BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U32), count & 3); + for (i = 0; i < (count & 3); ++i) + PUSH_DATA(push, *map++); + count &= ~3; + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4; + + BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U8), nr); + for (i = 0; i < nr; ++i) { + PUSH_DATA(push, + (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]); + map += 4; + } + count -= nr * 4; + } +} + +static void +nv50_draw_elements_inline_u16(struct nouveau_pushbuf *push, const uint16_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + count &= ~1; + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + PUSH_DATA(push, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } +} + +static void +nv50_draw_elements_inline_u32(struct nouveau_pushbuf *push, const uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + while (count) { + const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U32), nr); + PUSH_DATAp(push, map, nr); + + map += nr; + count -= nr; + } +} + +static void +nv50_draw_elements_inline_u32_short(struct nouveau_pushbuf *push, + const uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + count--; + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + PUSH_DATA(push, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } +} + +static void +nv50_draw_elements(struct nv50_context *nv50, boolean shorten, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count, int32_t index_bias) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned prim; + const unsigned index_size = nv50->idxbuf.index_size; + + prim = nv50_prim_gl(mode); + + if (index_bias != nv50->state.index_bias) { + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); + PUSH_DATA (push, index_bias); + nv50->state.index_bias = index_bias; + } + + if (nv50->idxbuf.buffer) { + struct nv04_resource *buf = nv04_resource(nv50->idxbuf.buffer); + unsigned pb_start; + unsigned pb_bytes; + const unsigned base = (buf->offset + nv50->idxbuf.offset) & ~3; + + start += ((buf->offset + nv50->idxbuf.offset) & 3) >> (index_size >> 1); + + assert(nouveau_resource_mapped_by_gpu(nv50->idxbuf.buffer)); + + while (instance_count--) { + BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, prim); + + nouveau_pushbuf_space(push, 8, 0, 1); + + switch (index_size) { + case 4: + BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U32), count); + nouveau_pushbuf_data(push, buf->bo, base + start * 4, count * 4); + break; + case 2: + pb_start = (start & ~1) * 2; + pb_bytes = ((start + count + 1) & ~1) * 2 - pb_start; + + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U16_SETUP), 1); + PUSH_DATA (push, (start << 31) | count); + BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U16), pb_bytes / 4); + nouveau_pushbuf_data(push, buf->bo, base + pb_start, pb_bytes); + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U16_SETUP), 1); + PUSH_DATA (push, 0); + break; + default: + assert(index_size == 1); + pb_start = start & ~3; + pb_bytes = ((start + count + 3) & ~3) - pb_start; + + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U8_SETUP), 1); + PUSH_DATA (push, (start << 30) | count); + BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U8), pb_bytes / 4); + nouveau_pushbuf_data(push, buf->bo, base + pb_start, pb_bytes); + BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U8_SETUP), 1); + PUSH_DATA (push, 0); + break; + } + BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (push, 0); + + prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + } else { + const void *data = nv50->idxbuf.user_buffer; + + while (instance_count--) { + BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, prim); + switch (index_size) { + case 1: + nv50_draw_elements_inline_u08(push, data, start, count); + break; + case 2: + nv50_draw_elements_inline_u16(push, data, start, count); + break; + case 4: + if (shorten) + nv50_draw_elements_inline_u32_short(push, data, start, count); + else + nv50_draw_elements_inline_u32(push, data, start, count); + break; + default: + assert(0); + return; + } + BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (push, 0); + + prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + } +} + +static void +nva0_draw_stream_output(struct nv50_context *nv50, + const struct pipe_draw_info *info) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_so_target *so = nv50_so_target(info->count_from_stream_output); + struct nv04_resource *res = nv04_resource(so->pipe.buffer); + unsigned num_instances = info->instance_count; + unsigned mode = nv50_prim_gl(info->mode); + + if (unlikely(nv50->screen->base.class_3d < NVA0_3D_CLASS)) { + /* A proper implementation without waiting doesn't seem possible, + * so don't bother. + */ + NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n"); + return; + } + + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + PUSH_SPACE(push, 4); + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1); + PUSH_DATA (push, 0); + } + + assert(num_instances); + do { + PUSH_SPACE(push, 8); + BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, mode); + BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1); + nv50_query_pushbuf_submit(push, so->pq, 0x4); + BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (push, 0); + + mode |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } while (--num_instances); +} + +static void +nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan) +{ + struct nv50_screen *screen = chan->user_priv; + + nouveau_fence_update(&screen->base, TRUE); + + nv50_bufctx_fence(screen->cur_ctx->bufctx_3d, TRUE); +} + +void +nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + + /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ + nv50->vb_elt_first = info->min_index + info->index_bias; + nv50->vb_elt_limit = info->max_index - info->min_index; + nv50->instance_off = info->start_instance; + nv50->instance_max = info->instance_count - 1; + + /* For picking only a few vertices from a large user buffer, push is better, + * if index count is larger and we expect repeated vertices, suggest upload. + */ + nv50->vbo_push_hint = /* the 64 is heuristic */ + !(info->indexed && ((nv50->vb_elt_limit + 64) < info->count)); + + if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_ARRAYS | NV50_NEW_VERTEX))) { + if (!!nv50->vbo_fifo != nv50->vbo_push_hint) + nv50->dirty |= NV50_NEW_ARRAYS; + else + if (!nv50->vbo_fifo) + nv50_update_user_vbufs(nv50); + } + + if (unlikely(nv50->num_so_targets && !nv50->gmtyprog)) + nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode]; + + nv50_state_validate(nv50, ~0, 8); /* 8 as minimum, we use flush_notify */ + + push->kick_notify = nv50_draw_vbo_kick_notify; + + if (nv50->vbo_fifo) { + nv50_push_vbo(nv50, info); + push->kick_notify = nv50_default_kick_notify; + nouveau_pushbuf_bufctx(push, NULL); + return; + } + + if (nv50->state.instance_base != info->start_instance) { + nv50->state.instance_base = info->start_instance; + /* NOTE: this does not affect the shader input, should it ? */ + BEGIN_NV04(push, NV50_3D(VB_INSTANCE_BASE), 1); + PUSH_DATA (push, info->start_instance); + } + + if (nv50->base.vbo_dirty) { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1); + PUSH_DATA (push, 0); + nv50->base.vbo_dirty = FALSE; + } + + if (info->indexed) { + boolean shorten = info->max_index <= 65535; + + if (info->primitive_restart != nv50->state.prim_restart) { + if (info->primitive_restart) { + BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 2); + PUSH_DATA (push, 1); + PUSH_DATA (push, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } else { + BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 1); + PUSH_DATA (push, 0); + } + nv50->state.prim_restart = info->primitive_restart; + } else + if (info->primitive_restart) { + BEGIN_NV04(push, NV50_3D(PRIM_RESTART_INDEX), 1); + PUSH_DATA (push, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } + + nv50_draw_elements(nv50, shorten, + info->mode, info->start, info->count, + info->instance_count, info->index_bias); + } else + if (unlikely(info->count_from_stream_output)) { + nva0_draw_stream_output(nv50, info); + } else { + nv50_draw_arrays(nv50, + info->mode, info->start, info->count, + info->instance_count); + } + push->kick_notify = nv50_default_kick_notify; + + nv50_release_user_vbufs(nv50); + + nouveau_pushbuf_bufctx(push, NULL); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h new file mode 100644 index 00000000000..e8578c8be6f --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h @@ -0,0 +1,125 @@ + +#ifndef __NV50_WINSYS_H__ +#define __NV50_WINSYS_H__ + +#include <stdint.h> +#include <unistd.h> + +#include "pipe/p_defines.h" + +#include "nouveau_winsys.h" +#include "nouveau_buffer.h" + + +#ifndef NV04_PFIFO_MAX_PACKET_LEN +#define NV04_PFIFO_MAX_PACKET_LEN 2047 +#endif + + +static INLINE void +nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin, + unsigned flags, struct nouveau_bo *bo) +{ + nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL; +} + +static INLINE void +nv50_add_bufctx_resident(struct nouveau_bufctx *bufctx, int bin, + struct nv04_resource *res, unsigned flags) +{ + struct nouveau_bufref *ref = + nouveau_bufctx_refn(bufctx, bin, res->bo, flags | res->domain); + ref->priv = res; + ref->priv_data = flags; +} + +#define BCTX_REFN_bo(ctx, bin, fl, bo) \ + nv50_add_bufctx_resident_bo(ctx, NV50_BIND_##bin, fl, bo); + +#define BCTX_REFN(bctx, bin, res, acc) \ + nv50_add_bufctx_resident(bctx, NV50_BIND_##bin, res, NOUVEAU_BO_##acc) + +static INLINE void +PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) +{ + struct nouveau_pushbuf_refn ref = { bo, flags }; + nouveau_pushbuf_refn(push, &ref, 1); +} + + +#define SUBC_3D(m) 3, (m) +#define NV50_3D(n) SUBC_3D(NV50_3D_##n) +#define NVA0_3D(n) SUBC_3D(NVA0_3D_##n) + +#define SUBC_2D(m) 4, (m) +#define NV50_2D(n) SUBC_2D(NV50_2D_##n) + +#define SUBC_M2MF(m) 5, (m) +#define NV50_M2MF(n) SUBC_M2MF(NV50_M2MF_##n) + +#define SUBC_COMPUTE(m) 6, (m) +#define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n) + + +static INLINE uint32_t +NV50_FIFO_PKHDR(int subc, int mthd, unsigned size) +{ + return 0x00000000 | (size << 18) | (subc << 13) | mthd; +} + +static INLINE uint32_t +NV50_FIFO_PKHDR_NI(int subc, int mthd, unsigned size) +{ + return 0x40000000 | (size << 18) | (subc << 13) | mthd; +} + +static INLINE uint32_t +NV50_FIFO_PKHDR_L(int subc, int mthd) +{ + return 0x00030000 | (subc << 13) | mthd; +} + + +static INLINE uint32_t +nouveau_bo_memtype(const struct nouveau_bo *bo) +{ + return bo->config.nv50.memtype; +} + + +static INLINE void +PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data) +{ + *push->cur++ = (uint32_t)(data >> 32); +} + +static INLINE void +BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size) +{ +#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, size + 1); +#endif + PUSH_DATA (push, NV50_FIFO_PKHDR(subc, mthd, size)); +} + +static INLINE void +BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size) +{ +#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, size + 1); +#endif + PUSH_DATA (push, NV50_FIFO_PKHDR_NI(subc, mthd, size)); +} + +/* long, non-incremental, nv50-only */ +static INLINE void +BEGIN_NL50(struct nouveau_pushbuf *push, int subc, int mthd, uint32_t size) +{ +#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, 2); +#endif + PUSH_DATA (push, NV50_FIFO_PKHDR_L(subc, mthd)); + PUSH_DATA (push, size); +} + +#endif /* __NV50_WINSYS_H__ */ diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video.c b/src/gallium/drivers/nouveau/nv50/nv84_video.c new file mode 100644 index 00000000000..3fee6d95f66 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv84_video.c @@ -0,0 +1,797 @@ +/* + * Copyright 2013 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <fcntl.h> + +#include "util/u_format.h" +#include "util/u_sampler.h" +#include "vl/vl_zscan.h" + +#include "nv50/nv84_video.h" + +static int +nv84_copy_firmware(const char *path, void *dest, ssize_t len) +{ + int fd = open(path, O_RDONLY | O_CLOEXEC); + ssize_t r; + if (fd < 0) { + fprintf(stderr, "opening firmware file %s failed: %m\n", path); + return 1; + } + r = read(fd, dest, len); + close(fd); + + if (r != len) { + fprintf(stderr, "reading firwmare file %s failed: %m\n", path); + return 1; + } + + return 0; +} + +static int +filesize(const char *path) +{ + int ret; + struct stat statbuf; + + ret = stat(path, &statbuf); + if (ret) + return ret; + return statbuf.st_size; +} + +static struct nouveau_bo * +nv84_load_firmwares(struct nouveau_device *dev, struct nv84_decoder *dec, + const char *fw1, const char *fw2) +{ + int ret, size1, size2 = 0; + struct nouveau_bo *fw; + + size1 = filesize(fw1); + if (fw2) + size2 = filesize(fw2); + if (size1 < 0 || size2 < 0) + return NULL; + + dec->vp_fw2_offset = align(size1, 0x100); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, dec->vp_fw2_offset + size2, NULL, &fw); + if (ret) + return NULL; + ret = nouveau_bo_map(fw, NOUVEAU_BO_WR, dec->client); + if (ret) + goto error; + + ret = nv84_copy_firmware(fw1, fw->map, size1); + if (fw2 && !ret) + ret = nv84_copy_firmware(fw2, fw->map + dec->vp_fw2_offset, size2); + munmap(fw->map, fw->size); + fw->map = NULL; + if (!ret) + return fw; +error: + nouveau_bo_ref(NULL, &fw); + return NULL; +} + +static struct nouveau_bo * +nv84_load_bsp_firmware(struct nouveau_device *dev, struct nv84_decoder *dec) +{ + return nv84_load_firmwares( + dev, dec, "/lib/firmware/nouveau/nv84_bsp-h264", NULL); +} + +static struct nouveau_bo * +nv84_load_vp_firmware(struct nouveau_device *dev, struct nv84_decoder *dec) +{ + return nv84_load_firmwares( + dev, dec, + "/lib/firmware/nouveau/nv84_vp-h264-1", + "/lib/firmware/nouveau/nv84_vp-h264-2"); +} + +static struct nouveau_bo * +nv84_load_vp_firmware_mpeg(struct nouveau_device *dev, struct nv84_decoder *dec) +{ + return nv84_load_firmwares( + dev, dec, "/lib/firmware/nouveau/nv84_vp-mpeg12", NULL); +} + +static void +nv84_decoder_decode_bitstream_h264(struct pipe_video_codec *decoder, + struct pipe_video_buffer *video_target, + struct pipe_picture_desc *picture, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes) +{ + struct nv84_decoder *dec = (struct nv84_decoder *)decoder; + struct nv84_video_buffer *target = (struct nv84_video_buffer *)video_target; + + struct pipe_h264_picture_desc *desc = (struct pipe_h264_picture_desc *)picture; + + assert(target->base.buffer_format == PIPE_FORMAT_NV12); + + nv84_decoder_bsp(dec, desc, num_buffers, data, num_bytes, target); + nv84_decoder_vp_h264(dec, desc, target); +} + +static void +nv84_decoder_flush(struct pipe_video_codec *decoder) +{ +} + +static void +nv84_decoder_begin_frame_h264(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ +} + +static void +nv84_decoder_end_frame_h264(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ +} + +static void +nv84_decoder_decode_bitstream_mpeg12(struct pipe_video_codec *decoder, + struct pipe_video_buffer *video_target, + struct pipe_picture_desc *picture, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes) +{ + struct nv84_decoder *dec = (struct nv84_decoder *)decoder; + + assert(video_target->buffer_format == PIPE_FORMAT_NV12); + + vl_mpg12_bs_decode(dec->mpeg12_bs, + video_target, + (struct pipe_mpeg12_picture_desc *)picture, + num_buffers, + data, + num_bytes); +} + +static void +nv84_decoder_begin_frame_mpeg12(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ + struct nv84_decoder *dec = (struct nv84_decoder *)decoder; + struct pipe_mpeg12_picture_desc *desc = (struct pipe_mpeg12_picture_desc *)picture; + int i; + + nouveau_bo_wait(dec->mpeg12_bo, NOUVEAU_BO_RDWR, dec->client); + dec->mpeg12_mb_info = dec->mpeg12_bo->map + 0x100; + dec->mpeg12_data = dec->mpeg12_bo->map + 0x100 + + align(0x20 * mb(dec->base.width) * mb(dec->base.height), 0x100); + if (desc->intra_matrix) { + dec->zscan = desc->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; + for (i = 0; i < 64; i++) { + dec->mpeg12_intra_matrix[i] = desc->intra_matrix[dec->zscan[i]]; + dec->mpeg12_non_intra_matrix[i] = desc->non_intra_matrix[dec->zscan[i]]; + } + dec->mpeg12_intra_matrix[0] = 1 << (7 - desc->intra_dc_precision); + } +} + +static void +nv84_decoder_end_frame_mpeg12(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ + nv84_decoder_vp_mpeg12( + (struct nv84_decoder *)decoder, + (struct pipe_mpeg12_picture_desc *)picture, + (struct nv84_video_buffer *)target); +} + +static void +nv84_decoder_decode_macroblock(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, + const struct pipe_macroblock *macroblocks, + unsigned num_macroblocks) +{ + const struct pipe_mpeg12_macroblock *mb = (const struct pipe_mpeg12_macroblock *)macroblocks; + for (int i = 0; i < num_macroblocks; i++, mb++) { + nv84_decoder_vp_mpeg12_mb( + (struct nv84_decoder *)decoder, + (struct pipe_mpeg12_picture_desc *)picture, + mb); + } +} + +static void +nv84_decoder_destroy(struct pipe_video_codec *decoder) +{ + struct nv84_decoder *dec = (struct nv84_decoder *)decoder; + + nouveau_bo_ref(NULL, &dec->bsp_fw); + nouveau_bo_ref(NULL, &dec->bsp_data); + nouveau_bo_ref(NULL, &dec->vp_fw); + nouveau_bo_ref(NULL, &dec->vp_data); + nouveau_bo_ref(NULL, &dec->mbring); + nouveau_bo_ref(NULL, &dec->vpring); + nouveau_bo_ref(NULL, &dec->bitstream); + nouveau_bo_ref(NULL, &dec->vp_params); + nouveau_bo_ref(NULL, &dec->fence); + + nouveau_object_del(&dec->bsp); + nouveau_object_del(&dec->vp); + + nouveau_bufctx_del(&dec->bsp_bufctx); + nouveau_pushbuf_del(&dec->bsp_pushbuf); + nouveau_object_del(&dec->bsp_channel); + + nouveau_bufctx_del(&dec->vp_bufctx); + nouveau_pushbuf_del(&dec->vp_pushbuf); + nouveau_object_del(&dec->vp_channel); + + nouveau_client_del(&dec->client); + + if (dec->mpeg12_bs) + FREE(dec->mpeg12_bs); + FREE(dec); +} + +struct pipe_video_codec * +nv84_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ) +{ + struct nv50_context *nv50 = (struct nv50_context *)context; + struct nouveau_screen *screen = &nv50->screen->base; + struct nv84_decoder *dec; + struct nouveau_pushbuf *bsp_push, *vp_push; + struct nv50_surface surf; + struct nv50_miptree mip; + union pipe_color_union color; + struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 }; + int ret, i; + int is_h264 = u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC; + int is_mpeg12 = u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_MPEG12; + + if (getenv("XVMC_VL")) + return vl_create_decoder(context, templ); + + if ((is_h264 && templ->entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) || + (is_mpeg12 && templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_IDCT)) { + debug_printf("%x\n", templ->entrypoint); + return NULL; + } + + if (!is_h264 && !is_mpeg12) { + debug_printf("invalid profile: %x\n", templ->profile); + return NULL; + } + + dec = CALLOC_STRUCT(nv84_decoder); + if (!dec) + return NULL; + + dec->base = *templ; + dec->base.context = context; + dec->base.destroy = nv84_decoder_destroy; + dec->base.flush = nv84_decoder_flush; + if (is_h264) { + dec->base.decode_bitstream = nv84_decoder_decode_bitstream_h264; + dec->base.begin_frame = nv84_decoder_begin_frame_h264; + dec->base.end_frame = nv84_decoder_end_frame_h264; + + dec->frame_mbs = mb(dec->base.width) * mb_half(dec->base.height) * 2; + dec->frame_size = dec->frame_mbs << 8; + dec->vpring_deblock = align(0x30 * dec->frame_mbs, 0x100); + dec->vpring_residual = 0x2000 + MAX2(0x32000, 0x600 * dec->frame_mbs); + dec->vpring_ctrl = MAX2(0x10000, align(0x1080 + 0x144 * dec->frame_mbs, 0x100)); + } else if (is_mpeg12) { + dec->base.decode_macroblock = nv84_decoder_decode_macroblock; + dec->base.begin_frame = nv84_decoder_begin_frame_mpeg12; + dec->base.end_frame = nv84_decoder_end_frame_mpeg12; + + if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + dec->mpeg12_bs = CALLOC_STRUCT(vl_mpg12_bs); + if (!dec->mpeg12_bs) + goto fail; + vl_mpg12_bs_init(dec->mpeg12_bs, &dec->base); + dec->base.decode_bitstream = nv84_decoder_decode_bitstream_mpeg12; + } + } else { + goto fail; + } + + ret = nouveau_client_new(screen->device, &dec->client); + if (ret) + goto fail; + + if (is_h264) { + ret = nouveau_object_new(&screen->device->object, 0, + NOUVEAU_FIFO_CHANNEL_CLASS, + &nv04_data, sizeof(nv04_data), &dec->bsp_channel); + if (ret) + goto fail; + + ret = nouveau_pushbuf_new(dec->client, dec->bsp_channel, 4, + 32 * 1024, true, &dec->bsp_pushbuf); + if (ret) + goto fail; + + ret = nouveau_bufctx_new(dec->client, 1, &dec->bsp_bufctx); + if (ret) + goto fail; + } + + ret = nouveau_object_new(&screen->device->object, 0, + NOUVEAU_FIFO_CHANNEL_CLASS, + &nv04_data, sizeof(nv04_data), &dec->vp_channel); + if (ret) + goto fail; + ret = nouveau_pushbuf_new(dec->client, dec->vp_channel, 4, + 32 * 1024, true, &dec->vp_pushbuf); + if (ret) + goto fail; + + ret = nouveau_bufctx_new(dec->client, 1, &dec->vp_bufctx); + if (ret) + goto fail; + + bsp_push = dec->bsp_pushbuf; + vp_push = dec->vp_pushbuf; + + if (is_h264) { + dec->bsp_fw = nv84_load_bsp_firmware(screen->device, dec); + dec->vp_fw = nv84_load_vp_firmware(screen->device, dec); + if (!dec->bsp_fw || !dec->vp_fw) + goto fail; + } + if (is_mpeg12) { + dec->vp_fw = nv84_load_vp_firmware_mpeg(screen->device, dec); + if (!dec->vp_fw) + goto fail; + } + + if (is_h264) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, + 0, 0x40000, NULL, &dec->bsp_data); + if (ret) + goto fail; + } + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, + 0, 0x40000, NULL, &dec->vp_data); + if (ret) + goto fail; + if (is_h264) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, + 0, + 2 * (dec->vpring_deblock + + dec->vpring_residual + + dec->vpring_ctrl + + 0x1000), + NULL, &dec->vpring); + if (ret) + goto fail; + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, + 0, + (templ->max_references + 1) * dec->frame_mbs * 0x40 + + dec->frame_size + 0x2000, + NULL, &dec->mbring); + if (ret) + goto fail; + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART, + 0, 2 * (0x700 + MAX2(0x40000, 0x800 + 0x180 * dec->frame_mbs)), + NULL, &dec->bitstream); + if (ret) + goto fail; + ret = nouveau_bo_map(dec->bitstream, NOUVEAU_BO_WR, dec->client); + if (ret) + goto fail; + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART, + 0, 0x2000, NULL, &dec->vp_params); + if (ret) + goto fail; + ret = nouveau_bo_map(dec->vp_params, NOUVEAU_BO_WR, dec->client); + if (ret) + goto fail; + } + if (is_mpeg12) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART, + 0, + align(0x20 * mb(templ->width) * mb(templ->height), 0x100) + + (6 * 64 * 8) * mb(templ->width) * mb(templ->height) + 0x100, + NULL, &dec->mpeg12_bo); + if (ret) + goto fail; + ret = nouveau_bo_map(dec->mpeg12_bo, NOUVEAU_BO_WR, dec->client); + if (ret) + goto fail; + } + + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0, 0x1000, NULL, &dec->fence); + if (ret) + goto fail; + ret = nouveau_bo_map(dec->fence, NOUVEAU_BO_WR, dec->client); + if (ret) + goto fail; + *(uint32_t *)dec->fence->map = 0; + + if (is_h264) { + nouveau_pushbuf_bufctx(bsp_push, dec->bsp_bufctx); + nouveau_bufctx_refn(dec->bsp_bufctx, 0, + dec->bsp_fw, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + nouveau_bufctx_refn(dec->bsp_bufctx, 0, + dec->bsp_data, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + } + + nouveau_pushbuf_bufctx(vp_push, dec->vp_bufctx); + nouveau_bufctx_refn(dec->vp_bufctx, 0, dec->vp_fw, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + nouveau_bufctx_refn(dec->vp_bufctx, 0, dec->vp_data, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + + if (is_h264 && !ret) + ret = nouveau_object_new(dec->bsp_channel, 0xbeef74b0, 0x74b0, + NULL, 0, &dec->bsp); + + if (!ret) + ret = nouveau_object_new(dec->vp_channel, 0xbeef7476, 0x7476, + NULL, 0, &dec->vp); + + if (ret) + goto fail; + + + if (is_h264) { + /* Zero out some parts of mbring/vpring. there's gotta be some cleaner way + * of doing this... perhaps makes sense to just copy the relevant logic + * here. */ + color.f[0] = color.f[1] = color.f[2] = color.f[3] = 0; + surf.offset = dec->frame_size; + surf.width = 64; + surf.height = (templ->max_references + 1) * dec->frame_mbs / 4; + surf.depth = 1; + surf.base.format = PIPE_FORMAT_B8G8R8A8_UNORM; + surf.base.u.tex.level = 0; + surf.base.texture = &mip.base.base; + mip.level[0].tile_mode = 0; + mip.level[0].pitch = surf.width * 4; + mip.base.domain = NOUVEAU_BO_VRAM; + mip.base.bo = dec->mbring; + context->clear_render_target(context, &surf.base, &color, 0, 0, 64, 4760); + surf.offset = dec->vpring->size / 2 - 0x1000; + surf.width = 1024; + surf.height = 1; + mip.level[0].pitch = surf.width * 4; + mip.base.bo = dec->vpring; + context->clear_render_target(context, &surf.base, &color, 0, 0, 1024, 1); + surf.offset = dec->vpring->size - 0x1000; + context->clear_render_target(context, &surf.base, &color, 0, 0, 1024, 1); + + PUSH_SPACE(screen->pushbuf, 5); + PUSH_REFN(screen->pushbuf, dec->fence, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + /* The clear_render_target is done via 3D engine, so use it to write to a + * sempahore to indicate that it's done. + */ + BEGIN_NV04(screen->pushbuf, NV50_3D(QUERY_ADDRESS_HIGH), 4); + PUSH_DATAh(screen->pushbuf, dec->fence->offset); + PUSH_DATA (screen->pushbuf, dec->fence->offset); + PUSH_DATA (screen->pushbuf, 1); + PUSH_DATA (screen->pushbuf, 0xf010); + PUSH_KICK (screen->pushbuf); + + PUSH_SPACE(bsp_push, 2 + 12 + 2 + 4 + 3); + + BEGIN_NV04(bsp_push, SUBC_BSP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (bsp_push, dec->bsp->handle); + + BEGIN_NV04(bsp_push, SUBC_BSP(0x180), 11); + for (i = 0; i < 11; i++) + PUSH_DATA(bsp_push, nv04_data.vram); + BEGIN_NV04(bsp_push, SUBC_BSP(0x1b8), 1); + PUSH_DATA (bsp_push, nv04_data.vram); + + BEGIN_NV04(bsp_push, SUBC_BSP(0x600), 3); + PUSH_DATAh(bsp_push, dec->bsp_fw->offset); + PUSH_DATA (bsp_push, dec->bsp_fw->offset); + PUSH_DATA (bsp_push, dec->bsp_fw->size); + + BEGIN_NV04(bsp_push, SUBC_BSP(0x628), 2); + PUSH_DATA (bsp_push, dec->bsp_data->offset >> 8); + PUSH_DATA (bsp_push, dec->bsp_data->size); + PUSH_KICK (bsp_push); + } + + PUSH_SPACE(vp_push, 2 + 12 + 2 + 4 + 3); + + BEGIN_NV04(vp_push, SUBC_VP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (vp_push, dec->vp->handle); + + BEGIN_NV04(vp_push, SUBC_VP(0x180), 11); + for (i = 0; i < 11; i++) + PUSH_DATA(vp_push, nv04_data.vram); + + BEGIN_NV04(vp_push, SUBC_VP(0x1b8), 1); + PUSH_DATA (vp_push, nv04_data.vram); + + BEGIN_NV04(vp_push, SUBC_VP(0x600), 3); + PUSH_DATAh(vp_push, dec->vp_fw->offset); + PUSH_DATA (vp_push, dec->vp_fw->offset); + PUSH_DATA (vp_push, dec->vp_fw->size); + + BEGIN_NV04(vp_push, SUBC_VP(0x628), 2); + PUSH_DATA (vp_push, dec->vp_data->offset >> 8); + PUSH_DATA (vp_push, dec->vp_data->size); + PUSH_KICK (vp_push); + + return &dec->base; +fail: + nv84_decoder_destroy(&dec->base); + return NULL; +} + +static struct pipe_sampler_view ** +nv84_video_buffer_sampler_view_planes(struct pipe_video_buffer *buffer) +{ + struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer; + return buf->sampler_view_planes; +} + +static struct pipe_sampler_view ** +nv84_video_buffer_sampler_view_components(struct pipe_video_buffer *buffer) +{ + struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer; + return buf->sampler_view_components; +} + +static struct pipe_surface ** +nv84_video_buffer_surfaces(struct pipe_video_buffer *buffer) +{ + struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer; + return buf->surfaces; +} + +static void +nv84_video_buffer_destroy(struct pipe_video_buffer *buffer) +{ + struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer; + unsigned i; + + assert(buf); + + for (i = 0; i < VL_NUM_COMPONENTS; ++i) { + pipe_resource_reference(&buf->resources[i], NULL); + pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL); + pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL); + pipe_surface_reference(&buf->surfaces[i * 2], NULL); + pipe_surface_reference(&buf->surfaces[i * 2 + 1], NULL); + } + + nouveau_bo_ref(NULL, &buf->interlaced); + nouveau_bo_ref(NULL, &buf->full); + + FREE(buffer); +} + +struct pipe_video_buffer * +nv84_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *template) +{ + struct nv84_video_buffer *buffer; + struct pipe_resource templ; + unsigned i, j, component; + struct pipe_sampler_view sv_templ; + struct pipe_surface surf_templ; + struct nv50_miptree *mt0, *mt1; + struct nouveau_bo *empty = NULL; + struct nouveau_screen *screen = &((struct nv50_context *)pipe)->screen->base; + union nouveau_bo_config cfg; + unsigned bo_size; + + if (getenv("XVMC_VL") || template->buffer_format != PIPE_FORMAT_NV12) + return vl_video_buffer_create(pipe, template); + + if (!template->interlaced) { + debug_printf("Require interlaced video buffers\n"); + return NULL; + } + if (template->chroma_format != PIPE_VIDEO_CHROMA_FORMAT_420) { + debug_printf("Must use 4:2:0 format\n"); + return NULL; + } + + /* + * Note that there are always going to be exactly two planes, one for Y, + * and one for UV. These are also the resources. VP expects these to be + * adjacent, so they need to belong to the same BO. + */ + + buffer = CALLOC_STRUCT(nv84_video_buffer); + if (!buffer) return NULL; + + buffer->mvidx = -1; + + buffer->base.buffer_format = template->buffer_format; + buffer->base.context = pipe; + buffer->base.destroy = nv84_video_buffer_destroy; + buffer->base.chroma_format = template->chroma_format; + buffer->base.width = template->width; + buffer->base.height = template->height; + buffer->base.get_sampler_view_planes = nv84_video_buffer_sampler_view_planes; + buffer->base.get_sampler_view_components = nv84_video_buffer_sampler_view_components; + buffer->base.get_surfaces = nv84_video_buffer_surfaces; + buffer->base.interlaced = true; + + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D_ARRAY; + templ.depth0 = 1; + templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; + templ.format = PIPE_FORMAT_R8_UNORM; + templ.width0 = align(template->width, 2); + templ.height0 = align(template->height, 4) / 2; + templ.flags = NV50_RESOURCE_FLAG_VIDEO | NV50_RESOURCE_FLAG_NOALLOC; + templ.array_size = 2; + + cfg.nv50.tile_mode = 0x20; + cfg.nv50.memtype = 0x70; + + buffer->resources[0] = pipe->screen->resource_create(pipe->screen, &templ); + if (!buffer->resources[0]) + goto error; + + templ.format = PIPE_FORMAT_R8G8_UNORM; + templ.width0 /= 2; + templ.height0 /= 2; + buffer->resources[1] = pipe->screen->resource_create(pipe->screen, &templ); + if (!buffer->resources[1]) + goto error; + + mt0 = nv50_miptree(buffer->resources[0]); + mt1 = nv50_miptree(buffer->resources[1]); + + bo_size = mt0->total_size + mt1->total_size; + if (nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, 0, + bo_size, &cfg, &buffer->interlaced)) + goto error; + /* XXX Change reference frame management so that this is only allocated in + * the decoder when necessary. */ + if (nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, 0, + bo_size, &cfg, &buffer->full)) + goto error; + + mt0->base.bo = buffer->interlaced; + mt0->base.domain = NOUVEAU_BO_VRAM; + mt0->base.offset = 0; + mt0->base.address = buffer->interlaced->offset + mt0->base.offset; + nouveau_bo_ref(buffer->interlaced, &empty); + + mt1->base.bo = buffer->interlaced; + mt1->base.domain = NOUVEAU_BO_VRAM; + mt1->base.offset = mt0->layer_stride * 2; + mt1->base.address = buffer->interlaced->offset + mt1->base.offset; + nouveau_bo_ref(buffer->interlaced, &empty); + + memset(&sv_templ, 0, sizeof(sv_templ)); + for (component = 0, i = 0; i < 2; ++i ) { + struct pipe_resource *res = buffer->resources[i]; + unsigned nr_components = util_format_get_nr_components(res->format); + + u_sampler_view_default_template(&sv_templ, res, res->format); + buffer->sampler_view_planes[i] = + pipe->create_sampler_view(pipe, res, &sv_templ); + if (!buffer->sampler_view_planes[i]) + goto error; + + for (j = 0; j < nr_components; ++j, ++component) { + sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = + PIPE_SWIZZLE_RED + j; + sv_templ.swizzle_a = PIPE_SWIZZLE_ONE; + + buffer->sampler_view_components[component] = + pipe->create_sampler_view(pipe, res, &sv_templ); + if (!buffer->sampler_view_components[component]) + goto error; + } + } + + memset(&surf_templ, 0, sizeof(surf_templ)); + for (j = 0; j < 2; ++j) { + surf_templ.format = buffer->resources[j]->format; + surf_templ.u.tex.first_layer = surf_templ.u.tex.last_layer = 0; + buffer->surfaces[j * 2] = + pipe->create_surface(pipe, buffer->resources[j], &surf_templ); + if (!buffer->surfaces[j * 2]) + goto error; + + surf_templ.u.tex.first_layer = surf_templ.u.tex.last_layer = 1; + buffer->surfaces[j * 2 + 1] = + pipe->create_surface(pipe, buffer->resources[j], &surf_templ); + if (!buffer->surfaces[j * 2 + 1]) + goto error; + } + + return &buffer->base; + +error: + nv84_video_buffer_destroy(&buffer->base); + return NULL; +} + +int +nv84_screen_get_video_param(struct pipe_screen *pscreen, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint, + enum pipe_video_cap param) +{ + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + return u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC || + u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG12; + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 1; + case PIPE_VIDEO_CAP_MAX_WIDTH: + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return 2048; + case PIPE_VIDEO_CAP_PREFERED_FORMAT: + return PIPE_FORMAT_NV12; + case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: + case PIPE_VIDEO_CAP_PREFERS_INTERLACED: + return true; + case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: + return false; + case PIPE_VIDEO_CAP_MAX_LEVEL: + switch (profile) { + case PIPE_VIDEO_PROFILE_MPEG1: + return 0; + case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE: + case PIPE_VIDEO_PROFILE_MPEG2_MAIN: + return 3; + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + return 41; + default: + debug_printf("unknown video profile: %d\n", profile); + return 0; + } + default: + debug_printf("unknown video param: %d\n", param); + return 0; + } +} + +boolean +nv84_screen_video_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint) +{ + if (profile != PIPE_VIDEO_PROFILE_UNKNOWN) + return format == PIPE_FORMAT_NV12; + + return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video.h b/src/gallium/drivers/nouveau/nv50/nv84_video.h new file mode 100644 index 00000000000..2edba389dbf --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv84_video.h @@ -0,0 +1,138 @@ +/* + * Copyright 2013 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef NV84_VIDEO_H_ +#define NV84_VIDEO_H_ + +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" +#include "vl/vl_types.h" + +#include "vl/vl_mpeg12_bitstream.h" + +#include "util/u_video.h" + +#include "nv50/nv50_context.h" + +/* These are expected to be on their own pushbufs */ +#define SUBC_BSP(m) 2, (m) +#define SUBC_VP(m) 2, (m) + +union pipe_desc { + struct pipe_picture_desc *base; + struct pipe_mpeg12_picture_desc *mpeg12; + struct pipe_mpeg4_picture_desc *mpeg4; + struct pipe_vc1_picture_desc *vc1; + struct pipe_h264_picture_desc *h264; +}; + +struct nv84_video_buffer { + struct pipe_video_buffer base; + struct pipe_resource *resources[VL_NUM_COMPONENTS]; + struct pipe_sampler_view *sampler_view_planes[VL_NUM_COMPONENTS]; + struct pipe_sampler_view *sampler_view_components[VL_NUM_COMPONENTS]; + struct pipe_surface *surfaces[VL_NUM_COMPONENTS * 2]; + + struct nouveau_bo *interlaced, *full; + int mvidx; + unsigned frame_num, frame_num_max; +}; + +struct nv84_decoder { + struct pipe_video_codec base; + struct nouveau_client *client; + struct nouveau_object *bsp_channel, *vp_channel, *bsp, *vp; + struct nouveau_pushbuf *bsp_pushbuf, *vp_pushbuf; + struct nouveau_bufctx *bsp_bufctx, *vp_bufctx; + + struct nouveau_bo *bsp_fw, *bsp_data; + struct nouveau_bo *vp_fw, *vp_data; + struct nouveau_bo *mbring, *vpring; + + /* + * states: + * 0: init + * 1: vpring/mbring cleared, bsp is ready + * 2: bsp is done, vp is ready + * and then vp it back to 1 + */ + struct nouveau_bo *fence; + + struct nouveau_bo *bitstream; + struct nouveau_bo *vp_params; + + size_t vp_fw2_offset; + + unsigned frame_mbs, frame_size; + /* VPRING layout: + RESIDUAL + CTRL + DEBLOCK + 0x1000 + */ + unsigned vpring_deblock, vpring_residual, vpring_ctrl; + + + struct vl_mpg12_bs *mpeg12_bs; + + struct nouveau_bo *mpeg12_bo; + void *mpeg12_mb_info; + uint16_t *mpeg12_data; + const int *zscan; + uint8_t mpeg12_intra_matrix[64]; + uint8_t mpeg12_non_intra_matrix[64]; +}; + +static INLINE uint32_t mb(uint32_t coord) +{ + return (coord + 0xf)>>4; +} + +static INLINE uint32_t mb_half(uint32_t coord) +{ + return (coord + 0x1f)>>5; +} + +int +nv84_decoder_bsp(struct nv84_decoder *dec, + struct pipe_h264_picture_desc *desc, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes, + struct nv84_video_buffer *dest); + +void +nv84_decoder_vp_h264(struct nv84_decoder *dec, + struct pipe_h264_picture_desc *desc, + struct nv84_video_buffer *dest); + +void +nv84_decoder_vp_mpeg12_mb(struct nv84_decoder *dec, + struct pipe_mpeg12_picture_desc *desc, + const struct pipe_mpeg12_macroblock *mb); + +void +nv84_decoder_vp_mpeg12(struct nv84_decoder *dec, + struct pipe_mpeg12_picture_desc *desc, + struct nv84_video_buffer *dest); + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c b/src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c new file mode 100644 index 00000000000..86047b5f463 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c @@ -0,0 +1,250 @@ +/* + * Copyright 2013 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv84_video.h" + +struct iparm { + struct iseqparm { + uint32_t chroma_format_idc; // 00 + uint32_t pad[(0x128 - 0x4) / 4]; + uint32_t log2_max_frame_num_minus4; // 128 + uint32_t pic_order_cnt_type; // 12c + uint32_t log2_max_pic_order_cnt_lsb_minus4; // 130 + uint32_t delta_pic_order_always_zero_flag; // 134 + uint32_t num_ref_frames; // 138 + uint32_t pic_width_in_mbs_minus1; // 13c + uint32_t pic_height_in_map_units_minus1; // 140 + uint32_t frame_mbs_only_flag; // 144 + uint32_t mb_adaptive_frame_field_flag; // 148 + uint32_t direct_8x8_inference_flag; // 14c + } iseqparm; // 000 + struct ipicparm { + uint32_t entropy_coding_mode_flag; // 00 + uint32_t pic_order_present_flag; // 04 + uint32_t num_slice_groups_minus1; // 08 + uint32_t slice_group_map_type; // 0c + uint32_t pad1[0x60 / 4]; + uint32_t u70; // 70 + uint32_t u74; // 74 + uint32_t u78; // 78 + uint32_t num_ref_idx_l0_active_minus1; // 7c + uint32_t num_ref_idx_l1_active_minus1; // 80 + uint32_t weighted_pred_flag; // 84 + uint32_t weighted_bipred_idc; // 88 + uint32_t pic_init_qp_minus26; // 8c + uint32_t chroma_qp_index_offset; // 90 + uint32_t deblocking_filter_control_present_flag; // 94 + uint32_t constrained_intra_pred_flag; // 98 + uint32_t redundant_pic_cnt_present_flag; // 9c + uint32_t transform_8x8_mode_flag; // a0 + uint32_t pad2[(0x1c8 - 0xa0 - 4) / 4]; + uint32_t second_chroma_qp_index_offset; // 1c8 + uint32_t u1cc; // 1cc + uint32_t curr_pic_order_cnt; // 1d0 + uint32_t field_order_cnt[2]; // 1d4 + uint32_t curr_mvidx; // 1dc + struct iref { + uint32_t u00; // 00 + uint32_t field_is_ref; // 04 // bit0: top, bit1: bottom + uint8_t is_long_term; // 08 + uint8_t non_existing; // 09 + uint32_t frame_idx; // 0c + uint32_t field_order_cnt[2]; // 10 + uint32_t mvidx; // 18 + uint8_t field_pic_flag; // 1c + // 20 + } refs[0x10]; // 1e0 + } ipicparm; // 150 +}; + +int +nv84_decoder_bsp(struct nv84_decoder *dec, + struct pipe_h264_picture_desc *desc, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes, + struct nv84_video_buffer *dest) +{ + struct iparm params; + uint32_t more_params[0x44 / 4] = {0}; + unsigned total_bytes = 0; + int i; + static const uint32_t end[] = {0x0b010000, 0, 0x0b010000, 0}; + char indexes[17] = {0}; + struct nouveau_pushbuf *push = dec->bsp_pushbuf; + struct nouveau_pushbuf_refn bo_refs[] = { + { dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dec->bitstream, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART }, + { dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + }; + + nouveau_bo_wait(dec->fence, NOUVEAU_BO_RDWR, dec->client); + + STATIC_ASSERT(sizeof(struct iparm) == 0x530); + + memset(¶ms, 0, sizeof(params)); + + dest->frame_num = dest->frame_num_max = desc->frame_num; + + for (i = 0; i < 16; i++) { + struct iref *ref = ¶ms.ipicparm.refs[i]; + struct nv84_video_buffer *frame = (struct nv84_video_buffer *)desc->ref[i]; + if (!frame) break; + /* The frame index is relative to the last IDR frame. So once the frame + * num goes back to 0, previous reference frames need to have a negative + * index. + */ + if (desc->frame_num >= frame->frame_num_max) { + frame->frame_num_max = desc->frame_num; + } else { + frame->frame_num -= frame->frame_num_max + 1; + frame->frame_num_max = desc->frame_num; + } + ref->non_existing = 0; + ref->field_is_ref = (desc->top_is_reference[i] ? 1 : 0) | + (desc->bottom_is_reference[i] ? 2 : 0); + ref->is_long_term = desc->is_long_term[i]; + ref->field_order_cnt[0] = desc->field_order_cnt_list[i][0]; + ref->field_order_cnt[1] = desc->field_order_cnt_list[i][1]; + ref->frame_idx = frame->frame_num; + ref->u00 = ref->mvidx = frame->mvidx; + ref->field_pic_flag = desc->field_pic_flag; + indexes[frame->mvidx] = 1; + } + + /* Needs to be adjusted if we ever support non-4:2:0 videos */ + params.iseqparm.chroma_format_idc = 1; + + params.iseqparm.pic_width_in_mbs_minus1 = mb(dec->base.width) - 1; + if (desc->field_pic_flag || desc->mb_adaptive_frame_field_flag) + params.iseqparm.pic_height_in_map_units_minus1 = mb_half(dec->base.height) - 1; + else + params.iseqparm.pic_height_in_map_units_minus1 = mb(dec->base.height) - 1; + + if (desc->bottom_field_flag) + params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[1]; + else + params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[0]; + params.ipicparm.field_order_cnt[0] = desc->field_order_cnt[0]; + params.ipicparm.field_order_cnt[1] = desc->field_order_cnt[1]; + if (desc->is_reference) { + if (dest->mvidx < 0) { + for (i = 0; i < desc->num_ref_frames + 1; i++) { + if (!indexes[i]) { + dest->mvidx = i; + break; + } + } + assert(i != desc->num_ref_frames + 1); + } + + params.ipicparm.u1cc = params.ipicparm.curr_mvidx = dest->mvidx; + } + + params.iseqparm.num_ref_frames = desc->num_ref_frames; + params.iseqparm.mb_adaptive_frame_field_flag = desc->mb_adaptive_frame_field_flag; + params.ipicparm.constrained_intra_pred_flag = desc->constrained_intra_pred_flag; + params.ipicparm.weighted_pred_flag = desc->weighted_pred_flag; + params.ipicparm.weighted_bipred_idc = desc->weighted_bipred_idc; + params.iseqparm.frame_mbs_only_flag = desc->frame_mbs_only_flag; + params.ipicparm.transform_8x8_mode_flag = desc->transform_8x8_mode_flag; + params.ipicparm.chroma_qp_index_offset = desc->chroma_qp_index_offset; + params.ipicparm.second_chroma_qp_index_offset = desc->second_chroma_qp_index_offset; + params.ipicparm.pic_init_qp_minus26 = desc->pic_init_qp_minus26; + params.ipicparm.num_ref_idx_l0_active_minus1 = desc->num_ref_idx_l0_active_minus1; + params.ipicparm.num_ref_idx_l1_active_minus1 = desc->num_ref_idx_l1_active_minus1; + params.iseqparm.log2_max_frame_num_minus4 = desc->log2_max_frame_num_minus4; + params.iseqparm.pic_order_cnt_type = desc->pic_order_cnt_type; + params.iseqparm.log2_max_pic_order_cnt_lsb_minus4 = desc->log2_max_pic_order_cnt_lsb_minus4; + params.iseqparm.delta_pic_order_always_zero_flag = desc->delta_pic_order_always_zero_flag; + params.iseqparm.direct_8x8_inference_flag = desc->direct_8x8_inference_flag; + params.ipicparm.entropy_coding_mode_flag = desc->entropy_coding_mode_flag; + params.ipicparm.pic_order_present_flag = desc->pic_order_present_flag; + params.ipicparm.deblocking_filter_control_present_flag = desc->deblocking_filter_control_present_flag; + params.ipicparm.redundant_pic_cnt_present_flag = desc->redundant_pic_cnt_present_flag; + + memcpy(dec->bitstream->map, ¶ms, sizeof(params)); + for (i = 0; i < num_buffers; i++) { + assert(total_bytes + num_bytes[i] < dec->bitstream->size / 2 - 0x700); + memcpy(dec->bitstream->map + 0x700 + total_bytes, data[i], num_bytes[i]); + total_bytes += num_bytes[i]; + } + memcpy(dec->bitstream->map + 0x700 + total_bytes, end, sizeof(end)); + total_bytes += sizeof(end); + more_params[1] = total_bytes; + memcpy(dec->bitstream->map + 0x600, more_params, sizeof(more_params)); + + PUSH_SPACE(push, 5 + 21 + 3 + 2 + 4 + 2); + nouveau_pushbuf_refn(push, bo_refs, sizeof(bo_refs)/sizeof(bo_refs[0])); + + /* Wait for the fence = 1 */ + BEGIN_NV04(push, SUBC_BSP(0x10), 4); + PUSH_DATAh(push, dec->fence->offset); + PUSH_DATA (push, dec->fence->offset); + PUSH_DATA (push, 1); + PUSH_DATA (push, 1); + + /* TODO: Use both halves of bitstream/vpring for alternating frames */ + + /* Kick off the BSP */ + BEGIN_NV04(push, SUBC_BSP(0x400), 20); + PUSH_DATA (push, dec->bitstream->offset >> 8); + PUSH_DATA (push, (dec->bitstream->offset >> 8) + 7); + PUSH_DATA (push, dec->bitstream->size / 2 - 0x700); + PUSH_DATA (push, (dec->bitstream->offset >> 8) + 6); + PUSH_DATA (push, 1); + PUSH_DATA (push, dec->mbring->offset >> 8); + PUSH_DATA (push, dec->frame_size); + PUSH_DATA (push, (dec->mbring->offset + dec->frame_size) >> 8); + PUSH_DATA (push, dec->vpring->offset >> 8); + PUSH_DATA (push, dec->vpring->size / 2); + PUSH_DATA (push, dec->vpring_residual); + PUSH_DATA (push, dec->vpring_ctrl); + PUSH_DATA (push, 0); + PUSH_DATA (push, dec->vpring_residual); + PUSH_DATA (push, dec->vpring_residual + dec->vpring_ctrl); + PUSH_DATA (push, dec->vpring_deblock); + PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl + + dec->vpring_residual + dec->vpring_deblock) >> 8); + PUSH_DATA (push, 0x654321); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0x100008); + + BEGIN_NV04(push, SUBC_BSP(0x620), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 0); + + /* Write fence = 2, intr */ + BEGIN_NV04(push, SUBC_BSP(0x610), 3); + PUSH_DATAh(push, dec->fence->offset); + PUSH_DATA (push, dec->fence->offset); + PUSH_DATA (push, 2); + + BEGIN_NV04(push, SUBC_BSP(0x304), 1); + PUSH_DATA (push, 0x101); + PUSH_KICK (push); + return 0; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c b/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c new file mode 100644 index 00000000000..619aa4e7a40 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c @@ -0,0 +1,552 @@ +/* + * Copyright 2013 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv84_video.h" + +#include "util/u_sse.h" + +struct h264_iparm1 { + uint8_t scaling_lists_4x4[6][16]; // 00 + uint8_t scaling_lists_8x8[2][64]; // 60 + uint32_t width; // e0 + uint32_t height; // e4 + uint64_t ref1_addrs[16]; // e8 + uint64_t ref2_addrs[16]; // 168 + uint32_t unk1e8; + uint32_t unk1ec; + uint32_t w1; // 1f0 + uint32_t w2; // 1f4 + uint32_t w3; // 1f8 + uint32_t h1; // 1fc + uint32_t h2; // 200 + uint32_t h3; // 204 + uint32_t mb_adaptive_frame_field_flag; // 208 + uint32_t field_pic_flag; // 20c + uint32_t format; // 210 + uint32_t unk214; // 214 +}; + +struct h264_iparm2 { + uint32_t width; // 00 + uint32_t height; // 04 + uint32_t mbs; // 08 + uint32_t w1; // 0c + uint32_t w2; // 10 + uint32_t w3; // 14 + uint32_t h1; // 18 + uint32_t h2; // 1c + uint32_t h3; // 20 + uint32_t unk24; + uint32_t mb_adaptive_frame_field_flag; // 28 + uint32_t top; // 2c + uint32_t bottom; // 30 + uint32_t is_reference; // 34 +}; + +void +nv84_decoder_vp_h264(struct nv84_decoder *dec, + struct pipe_h264_picture_desc *desc, + struct nv84_video_buffer *dest) +{ + struct h264_iparm1 param1; + struct h264_iparm2 param2; + int i, width = align(dest->base.width, 16), + height = align(dest->base.height, 16); + + struct nouveau_pushbuf *push = dec->vp_pushbuf; + struct nouveau_pushbuf_refn bo_refs[] = { + { dest->interlaced, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dest->full, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dec->vp_params, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART }, + { dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + bool is_ref = desc->is_reference; + + STATIC_ASSERT(sizeof(struct h264_iparm1) == 0x218); + STATIC_ASSERT(sizeof(struct h264_iparm2) == 0x38); + + memset(¶m1, 0, sizeof(param1)); + memset(¶m2, 0, sizeof(param2)); + + memcpy(¶m1.scaling_lists_4x4, desc->scaling_lists_4x4, + sizeof(param1.scaling_lists_4x4)); + memcpy(¶m1.scaling_lists_8x8, desc->scaling_lists_8x8, + sizeof(param1.scaling_lists_8x8)); + + param1.width = width; + param1.w1 = param1.w2 = param1.w3 = align(width, 64); + param1.height = param1.h2 = height; + param1.h1 = param1.h3 = align(height, 32); + param1.format = 0x3231564e; /* 'NV12' */ + param1.mb_adaptive_frame_field_flag = desc->mb_adaptive_frame_field_flag; + param1.field_pic_flag = desc->field_pic_flag; + + param2.width = width; + param2.w1 = param2.w2 = param2.w3 = param1.w1; + if (desc->field_pic_flag) + param2.height = align(height, 32) / 2; + else + param2.height = height; + param2.h1 = param2.h2 = align(height, 32); + param2.h3 = height; + param2.mbs = width * height >> 8; + if (desc->field_pic_flag) { + param2.top = desc->bottom_field_flag ? 2 : 1; + param2.bottom = desc->bottom_field_flag; + } + param2.mb_adaptive_frame_field_flag = desc->mb_adaptive_frame_field_flag; + param2.is_reference = desc->is_reference; + + PUSH_SPACE(push, 5 + 16 + 3 + 2 + 6 + (is_ref ? 2 : 0) + 3 + 2 + 4 + 2); + + struct nouveau_bo *ref2_default = dest->full; + + for (i = 0; i < 16; i++) { + struct nv84_video_buffer *buf = (struct nv84_video_buffer *)desc->ref[i]; + struct nouveau_bo *bo1, *bo2; + if (buf) { + bo1 = buf->interlaced; + bo2 = buf->full; + if (i == 0) + ref2_default = buf->full; + } else { + bo1 = dest->interlaced; + bo2 = ref2_default; + } + param1.ref1_addrs[i] = bo1->offset; + param1.ref2_addrs[i] = bo2->offset; + struct nouveau_pushbuf_refn bo_refs[] = { + { bo1, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { bo2, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + }; + nouveau_pushbuf_refn(push, bo_refs, sizeof(bo_refs)/sizeof(bo_refs[0])); + } + + memcpy(dec->vp_params->map, ¶m1, sizeof(param1)); + memcpy(dec->vp_params->map + 0x400, ¶m2, sizeof(param2)); + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + /* Wait for BSP to have completed */ + BEGIN_NV04(push, SUBC_VP(0x10), 4); + PUSH_DATAh(push, dec->fence->offset); + PUSH_DATA (push, dec->fence->offset); + PUSH_DATA (push, 2); + PUSH_DATA (push, 1); /* wait for sem == 2 */ + + /* VP step 1 */ + BEGIN_NV04(push, SUBC_VP(0x400), 15); + PUSH_DATA (push, 1); + PUSH_DATA (push, param2.mbs); + PUSH_DATA (push, 0x3987654); /* each nibble probably a dma index */ + PUSH_DATA (push, 0x55001); /* constant */ + PUSH_DATA (push, dec->vp_params->offset >> 8); + PUSH_DATA (push, (dec->vpring->offset + dec->vpring_residual) >> 8); + PUSH_DATA (push, dec->vpring_ctrl); + PUSH_DATA (push, dec->vpring->offset >> 8); + PUSH_DATA (push, dec->bitstream->size / 2 - 0x700); + PUSH_DATA (push, (dec->mbring->offset + dec->mbring->size - 0x2000) >> 8); + PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl + + dec->vpring_residual + dec->vpring_deblock) >> 8); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0x100008); + PUSH_DATA (push, dest->interlaced->offset >> 8); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, SUBC_VP(0x620), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 0); + + /* VP step 2 */ + BEGIN_NV04(push, SUBC_VP(0x400), 5); + PUSH_DATA (push, 0x54530201); + PUSH_DATA (push, (dec->vp_params->offset >> 8) + 0x4); + PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl + + dec->vpring_residual) >> 8); + PUSH_DATA (push, dest->interlaced->offset >> 8); + PUSH_DATA (push, dest->interlaced->offset >> 8); + + if (is_ref) { + BEGIN_NV04(push, SUBC_VP(0x414), 1); + PUSH_DATA (push, dest->full->offset >> 8); + } + + BEGIN_NV04(push, SUBC_VP(0x620), 2); + PUSH_DATAh(push, dec->vp_fw2_offset); + PUSH_DATA (push, dec->vp_fw2_offset); + + BEGIN_NV04(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 0); + + /* Set the semaphore back to 1 */ + BEGIN_NV04(push, SUBC_VP(0x610), 3); + PUSH_DATAh(push, dec->fence->offset); + PUSH_DATA (push, dec->fence->offset); + PUSH_DATA (push, 1); + + /* Write to the semaphore location, intr */ + BEGIN_NV04(push, SUBC_VP(0x304), 1); + PUSH_DATA (push, 0x101); + + for (i = 0; i < 2; i++) { + struct nv50_miptree *mt = nv50_miptree(dest->resources[i]); + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + } + + PUSH_KICK (push); +} + +static INLINE int16_t inverse_quantize(int16_t val, uint8_t quant, int mpeg1) { + int16_t ret = val * quant / 16; + if (mpeg1 && ret) { + if (ret > 0) + ret = (ret - 1) | 1; + else + ret = (ret + 1) | 1; + } + if (ret < -2048) + ret = -2048; + else if (ret > 2047) + ret = 2047; + return ret; +} + +struct mpeg12_mb_info { + uint32_t index; + uint8_t unk4; + uint8_t unk5; + uint16_t coded_block_pattern; + uint8_t block_counts[6]; + uint16_t PMV[8]; + uint16_t skipped; +}; + +void +nv84_decoder_vp_mpeg12_mb(struct nv84_decoder *dec, + struct pipe_mpeg12_picture_desc *desc, + const struct pipe_mpeg12_macroblock *macrob) +{ + STATIC_ASSERT(sizeof(struct mpeg12_mb_info) == 32); + + struct mpeg12_mb_info info = {0}; + int i, sum = 0, mask, block_index, count; + const int16_t *blocks; + int intra = macrob->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA; + int motion = macrob->macroblock_type & + (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD); + const uint8_t *quant_matrix = intra ? dec->mpeg12_intra_matrix : + dec->mpeg12_non_intra_matrix; + int mpeg1 = dec->base.profile == PIPE_VIDEO_PROFILE_MPEG1; + + info.index = macrob->y * mb(dec->base.width) + macrob->x; + info.unk4 = motion; + if (intra) + info.unk4 |= 1; + if (macrob->macroblock_modes.bits.dct_type) + info.unk4 |= 0x20; + info.unk5 = (macrob->motion_vertical_field_select << 4) | + (macrob->macroblock_modes.value & 0xf); + info.coded_block_pattern = macrob->coded_block_pattern; + if (motion) { + memcpy(info.PMV, macrob->PMV, sizeof(info.PMV)); + } + blocks = macrob->blocks; + for (mask = 0x20, block_index = 0; mask > 0; mask >>= 1, block_index++) { + if ((macrob->coded_block_pattern & mask) == 0) + continue; + + count = 0; + + /* + * The observation here is that there are a lot of 0's, and things go + * a lot faster if one skips over them. + */ + +#if defined(PIPE_ARCH_SSE) && defined(PIPE_ARCH_X86_64) +/* Note that the SSE implementation is much more tuned to X86_64. As it's not + * benchmarked on X86_32, disable it there. I suspect that the code needs to + * be reorganized in terms of 32-bit wide data in order to be more + * efficient. NV84+ were released well into the 64-bit CPU era, so it should + * be a minority case. + */ + +/* This returns a 16-bit bit-mask, each 2 bits are both 1 or both 0, depending + * on whether the corresponding (16-bit) word in blocks is zero or non-zero. */ +#define wordmask(blocks, zero) \ + (uint64_t)(_mm_movemask_epi8( \ + _mm_cmpeq_epi16( \ + zero, _mm_load_si128((__m128i *)(blocks))))) + + __m128i zero = _mm_setzero_si128(); + + /* TODO: Look into doing the inverse quantization in terms of SSE + * operations unconditionally, when necessary. */ + uint64_t bmask0 = wordmask(blocks, zero); + bmask0 |= wordmask(blocks + 8, zero) << 16; + bmask0 |= wordmask(blocks + 16, zero) << 32; + bmask0 |= wordmask(blocks + 24, zero) << 48; + uint64_t bmask1 = wordmask(blocks + 32, zero); + bmask1 |= wordmask(blocks + 40, zero) << 16; + bmask1 |= wordmask(blocks + 48, zero) << 32; + bmask1 |= wordmask(blocks + 56, zero) << 48; + + /* The wordmask macro returns the inverse of what we want, since it + * returns a 1 for equal-to-zero. Invert. */ + bmask0 = ~bmask0; + bmask1 = ~bmask1; + + /* Note that the bitmask is actually sequences of 2 bits for each block + * index. This is because there is no movemask_epi16. That means that + * (a) ffs will never return 64, since the prev bit will always be set + * in that case, and (b) we need to do an extra bit shift. Or'ing the + * bitmasks together is faster than having a loop that computes them one + * at a time and processes them, on a Core i7-920. Trying to put bmask + * into an array and then looping also slows things down. + */ + + /* shift needs to be the same width as i, and unsigned so that / 2 + * becomes a rshift operation */ + uint32_t shift; + i = 0; + + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + int16_t tmp; + while ((shift = __builtin_ffsll(bmask0))) { + i += (shift - 1) / 2; + bmask0 >>= shift - 1; + *dec->mpeg12_data++ = dec->zscan[i] * 2; + tmp = inverse_quantize(blocks[i], quant_matrix[i], mpeg1); + *dec->mpeg12_data++ = tmp; + sum += tmp; + count++; + i++; + bmask0 >>= 2; + } + i = 32; + while ((shift = __builtin_ffsll(bmask1))) { + i += (shift - 1) / 2; + bmask1 >>= shift - 1; + *dec->mpeg12_data++ = dec->zscan[i] * 2; + tmp = inverse_quantize(blocks[i], quant_matrix[i], mpeg1); + *dec->mpeg12_data++ = tmp; + sum += tmp; + count++; + i++; + bmask1 >>= 2; + } + } else { + while ((shift = __builtin_ffsll(bmask0))) { + i += (shift - 1) / 2; + bmask0 >>= shift - 1; + *dec->mpeg12_data++ = i * 2; + *dec->mpeg12_data++ = blocks[i]; + count++; + i++; + bmask0 >>= 2; + } + i = 32; + while ((shift = __builtin_ffsll(bmask1))) { + i += (shift - 1) / 2; + bmask1 >>= shift - 1; + *dec->mpeg12_data++ = i * 2; + *dec->mpeg12_data++ = blocks[i]; + count++; + i++; + bmask1 >>= 2; + } + } +#undef wordmask +#else + + /* + * This loop looks ridiculously written... and it is. I tried a lot of + * different ways of achieving this scan, and this was the fastest, at + * least on a Core i7-920. Note that it's not necessary to skip the 0's, + * the firmware will deal with those just fine. But it's faster to skip + * them. Note to people trying benchmarks: make sure to use realistic + * mpeg data, which can often be a single data point first followed by + * 63 0's, or <data> 7x <0> <data> 7x <0> etc. + */ + i = 0; + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + while (true) { + int16_t tmp; + while (likely(i < 64 && !(tmp = blocks[i]))) i++; + if (i >= 64) break; + *dec->mpeg12_data++ = dec->zscan[i] * 2; + tmp = inverse_quantize(tmp, quant_matrix[i], mpeg1); + *dec->mpeg12_data++ = tmp; + sum += tmp; + count++; + i++; + } + } else { + while (true) { + int16_t tmp; + while (likely(i < 64 && !(tmp = blocks[i]))) i++; + if (i >= 64) break; + *dec->mpeg12_data++ = i * 2; + *dec->mpeg12_data++ = tmp; + count++; + i++; + } + } + +#endif + + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + if (!mpeg1 && (sum & 1) == 0) { + if (count && *(dec->mpeg12_data - 2) == 63 * 2) { + uint16_t *val = dec->mpeg12_data - 1; + if (*val & 1) *val -= 1; + else *val += 1; + } else { + *dec->mpeg12_data++ = 63 * 2; + *dec->mpeg12_data++ = 1; + count++; + } + } + } + + if (count) { + *(dec->mpeg12_data - 2) |= 1; + } else { + *dec->mpeg12_data++ = 1; + *dec->mpeg12_data++ = 0; + count = 1; + } + info.block_counts[block_index] = count; + blocks += 64; + } + + memcpy(dec->mpeg12_mb_info, &info, sizeof(info)); + dec->mpeg12_mb_info += sizeof(info); + + if (macrob->num_skipped_macroblocks) { + info.index++; + info.coded_block_pattern = 0; + info.skipped = macrob->num_skipped_macroblocks - 1; + memset(info.block_counts, 0, sizeof(info.block_counts)); + memcpy(dec->mpeg12_mb_info, &info, sizeof(info)); + dec->mpeg12_mb_info += sizeof(info); + } +} + +struct mpeg12_header { + uint32_t luma_top_size; // 00 + uint32_t luma_bottom_size; // 04 + uint32_t chroma_top_size; // 08 + uint32_t mbs; // 0c + uint32_t mb_info_size; // 10 + uint32_t mb_width_minus1; // 14 + uint32_t mb_height_minus1; // 18 + uint32_t width; // 1c + uint32_t height; // 20 + uint8_t progressive; // 24 + uint8_t mocomp_only; // 25 + uint8_t frames; // 26 + uint8_t picture_structure; // 27 + uint32_t unk28; // 28 -- 0x50100 + uint32_t unk2c; // 2c + uint32_t pad[4 * 13]; +}; + +void +nv84_decoder_vp_mpeg12(struct nv84_decoder *dec, + struct pipe_mpeg12_picture_desc *desc, + struct nv84_video_buffer *dest) +{ + struct nouveau_pushbuf *push = dec->vp_pushbuf; + struct nv84_video_buffer *ref1 = (struct nv84_video_buffer *)desc->ref[0]; + struct nv84_video_buffer *ref2 = (struct nv84_video_buffer *)desc->ref[1]; + struct nouveau_pushbuf_refn bo_refs[] = { + { dest->interlaced, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { NULL, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { NULL, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + { dec->mpeg12_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART }, + }; + int i, num_refs = sizeof(bo_refs) / sizeof(*bo_refs); + struct mpeg12_header header = {0}; + struct nv50_miptree *y = nv50_miptree(dest->resources[0]); + struct nv50_miptree *uv = nv50_miptree(dest->resources[1]); + + STATIC_ASSERT(sizeof(struct mpeg12_header) == 0x100); + + if (ref1 == NULL) + ref1 = dest; + if (ref2 == NULL) + ref2 = dest; + bo_refs[1].bo = ref1->interlaced; + bo_refs[2].bo = ref2->interlaced; + + header.luma_top_size = y->layer_stride; + header.luma_bottom_size = y->layer_stride; + header.chroma_top_size = uv->layer_stride; + header.mbs = mb(dec->base.width) * mb(dec->base.height); + header.mb_info_size = dec->mpeg12_mb_info - dec->mpeg12_bo->map - 0x100; + header.mb_width_minus1 = mb(dec->base.width) - 1; + header.mb_height_minus1 = mb(dec->base.height) - 1; + header.width = align(dec->base.width, 16); + header.height = align(dec->base.height, 16); + header.progressive = desc->frame_pred_frame_dct; + header.frames = 1 + (desc->ref[0] != NULL) + (desc->ref[1] != NULL); + header.picture_structure = desc->picture_structure; + header.unk28 = 0x50100; + + memcpy(dec->mpeg12_bo->map, &header, sizeof(header)); + + PUSH_SPACE(push, 10 + 3 + 2); + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + BEGIN_NV04(push, SUBC_VP(0x400), 9); + PUSH_DATA (push, 0x543210); /* each nibble possibly a dma index */ + PUSH_DATA (push, 0x555001); /* constant */ + PUSH_DATA (push, dec->mpeg12_bo->offset >> 8); + PUSH_DATA (push, (dec->mpeg12_bo->offset + 0x100) >> 8); + PUSH_DATA (push, (dec->mpeg12_bo->offset + 0x100 + + align(0x20 * mb(dec->base.width) * + mb(dec->base.height), 0x100)) >> 8); + PUSH_DATA (push, dest->interlaced->offset >> 8); + PUSH_DATA (push, ref1->interlaced->offset >> 8); + PUSH_DATA (push, ref2->interlaced->offset >> 8); + PUSH_DATA (push, 6 * 64 * 8 * header.mbs); + + BEGIN_NV04(push, SUBC_VP(0x620), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 0); + + for (i = 0; i < 2; i++) { + struct nv50_miptree *mt = nv50_miptree(dest->resources[i]); + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + } + PUSH_KICK (push); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video.c b/src/gallium/drivers/nouveau/nv50/nv98_video.c new file mode 100644 index 00000000000..069481de207 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv98_video.c @@ -0,0 +1,297 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv98_video.h" + +#include "util/u_sampler.h" +#include "util/u_format.h" + +static void +nv98_decoder_decode_bitstream(struct pipe_video_codec *decoder, + struct pipe_video_buffer *video_target, + struct pipe_picture_desc *picture, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes) +{ + struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder; + struct nouveau_vp3_video_buffer *target = (struct nouveau_vp3_video_buffer *)video_target; + uint32_t comm_seq = ++dec->fence_seq; + union pipe_desc desc; + + unsigned vp_caps, is_ref, ret; + struct nouveau_vp3_video_buffer *refs[16] = {}; + + desc.base = picture; + + assert(target->base.buffer_format == PIPE_FORMAT_NV12); + + ret = nv98_decoder_bsp(dec, desc, target, comm_seq, + num_buffers, data, num_bytes, + &vp_caps, &is_ref, refs); + + /* did we decode bitstream correctly? */ + assert(ret == 2); + + nv98_decoder_vp(dec, desc, target, comm_seq, vp_caps, is_ref, refs); + nv98_decoder_ppp(dec, desc, target, comm_seq); +} + +struct pipe_video_codec * +nv98_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ) +{ + struct nouveau_screen *screen = &((struct nv50_context *)context)->screen->base; + struct nouveau_vp3_decoder *dec; + struct nouveau_pushbuf **push; + struct nv04_fifo nv04_data = {.vram = 0xbeef0201, .gart = 0xbeef0202}; + union nouveau_bo_config cfg; + + cfg.nv50.tile_mode = 0x20; + cfg.nv50.memtype = 0x70; + + int ret, i; + uint32_t codec = 1, ppp_codec = 3; + uint32_t timeout; + u32 tmp_size = 0; + + if (getenv("XVMC_VL")) + return vl_create_decoder(context, templ); + + if (templ->entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + debug_printf("%x\n", templ->entrypoint); + return NULL; + } + + dec = CALLOC_STRUCT(nouveau_vp3_decoder); + if (!dec) + return NULL; + dec->client = screen->client; + dec->base = *templ; + nouveau_vp3_decoder_init_common(&dec->base); + + dec->bsp_idx = 5; + dec->vp_idx = 6; + dec->ppp_idx = 7; + + ret = nouveau_object_new(&screen->device->object, 0, + NOUVEAU_FIFO_CHANNEL_CLASS, + &nv04_data, sizeof(nv04_data), &dec->channel[0]); + + if (!ret) + ret = nouveau_pushbuf_new(screen->client, dec->channel[0], 4, + 32 * 1024, true, &dec->pushbuf[0]); + + for (i = 1; i < 3; ++i) { + dec->channel[i] = dec->channel[0]; + dec->pushbuf[i] = dec->pushbuf[0]; + } + push = dec->pushbuf; + + if (!ret) + ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x85b1, NULL, 0, &dec->bsp); + if (!ret) + ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x85b2, NULL, 0, &dec->vp); + if (!ret) + ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x85b3, NULL, 0, &dec->ppp); + if (ret) + goto fail; + + BEGIN_NV04(push[0], SUBC_BSP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[0], dec->bsp->handle); + + BEGIN_NV04(push[0], SUBC_BSP(0x180), 5); + for (i = 0; i < 5; i++) + PUSH_DATA (push[0], nv04_data.vram); + + BEGIN_NV04(push[1], SUBC_VP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[1], dec->vp->handle); + + BEGIN_NV04(push[1], SUBC_VP(0x180), 6); + for (i = 0; i < 6; i++) + PUSH_DATA (push[1], nv04_data.vram); + + BEGIN_NV04(push[2], SUBC_PPP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[2], dec->ppp->handle); + + BEGIN_NV04(push[2], SUBC_PPP(0x180), 5); + for (i = 0; i < 5; i++) + PUSH_DATA (push[2], nv04_data.vram); + + dec->base.context = context; + dec->base.decode_bitstream = nv98_decoder_decode_bitstream; + + for (i = 0; i < NOUVEAU_VP3_VIDEO_QDEPTH && !ret; ++i) + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0, 1 << 20, NULL, &dec->bsp_bo[i]); + if (!ret) + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0x100, 4 << 20, NULL, &dec->inter_bo[0]); + if (!ret) + nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]); + if (ret) + goto fail; + + switch (u_reduce_video_profile(templ->profile)) { + case PIPE_VIDEO_FORMAT_MPEG12: { + codec = 1; + assert(templ->max_references <= 2); + break; + } + case PIPE_VIDEO_FORMAT_MPEG4: { + codec = 4; + tmp_size = mb(templ->height)*16 * mb(templ->width)*16; + assert(templ->max_references <= 2); + break; + } + case PIPE_VIDEO_FORMAT_VC1: { + ppp_codec = codec = 2; + tmp_size = mb(templ->height)*16 * mb(templ->width)*16; + assert(templ->max_references <= 2); + break; + } + case PIPE_VIDEO_FORMAT_MPEG4_AVC: { + codec = 3; + dec->tmp_stride = 16 * mb_half(templ->width) * nouveau_vp3_video_align(templ->height) * 3 / 2; + tmp_size = dec->tmp_stride * (templ->max_references + 1); + assert(templ->max_references <= 16); + break; + } + default: + fprintf(stderr, "invalid codec\n"); + goto fail; + } + + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + 0x4000, NULL, &dec->fw_bo); + if (ret) + goto fail; + + ret = nouveau_vp3_load_firmware(dec, templ->profile, screen->device->chipset); + if (ret) + goto fw_fail; + + if (codec != 3) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + 0x400, NULL, &dec->bitplane_bo); + if (ret) + goto fail; + } + + dec->ref_stride = mb(templ->width)*16 * (mb_half(templ->height)*32 + nouveau_vp3_video_align(templ->height)/2); + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + dec->ref_stride * (templ->max_references+2) + tmp_size, + &cfg, &dec->ref_bo); + if (ret) + goto fail; + + timeout = 0; + + BEGIN_NV04(push[0], SUBC_BSP(0x200), 2); + PUSH_DATA (push[0], codec); + PUSH_DATA (push[0], timeout); + + BEGIN_NV04(push[1], SUBC_VP(0x200), 2); + PUSH_DATA (push[1], codec); + PUSH_DATA (push[1], timeout); + + BEGIN_NV04(push[2], SUBC_PPP(0x200), 2); + PUSH_DATA (push[2], ppp_codec); + PUSH_DATA (push[2], timeout); + + ++dec->fence_seq; + +#if NOUVEAU_VP3_DEBUG_FENCE + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART|NOUVEAU_BO_MAP, + 0, 0x1000, NULL, &dec->fence_bo); + if (ret) + goto fail; + + nouveau_bo_map(dec->fence_bo, NOUVEAU_BO_RDWR, screen->client); + dec->fence_map = dec->fence_bo->map; + dec->fence_map[0] = dec->fence_map[4] = dec->fence_map[8] = 0; + dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map))); + + /* So lets test if the fence is working? */ + nouveau_pushbuf_space(push[0], 6, 1, 0); + PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); + BEGIN_NV04(push[0], SUBC_BSP(0x240), 3); + PUSH_DATAh(push[0], dec->fence_bo->offset); + PUSH_DATA (push[0], dec->fence_bo->offset); + PUSH_DATA (push[0], dec->fence_seq); + + BEGIN_NV04(push[0], SUBC_BSP(0x304), 1); + PUSH_DATA (push[0], 0); + PUSH_KICK (push[0]); + + nouveau_pushbuf_space(push[1], 6, 1, 0); + PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); + BEGIN_NV04(push[1], SUBC_VP(0x240), 3); + PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push[1], (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push[1], dec->fence_seq); + + BEGIN_NV04(push[1], SUBC_VP(0x304), 1); + PUSH_DATA (push[1], 0); + PUSH_KICK (push[1]); + + nouveau_pushbuf_space(push[2], 6, 1, 0); + PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); + BEGIN_NV04(push[2], SUBC_PPP(0x240), 3); + PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push[2], (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push[2], dec->fence_seq); + + BEGIN_NV04(push[2], SUBC_PPP(0x304), 1); + PUSH_DATA (push[2], 0); + PUSH_KICK (push[2]); + + usleep(100); + while (dec->fence_seq > dec->fence_map[0] || + dec->fence_seq > dec->fence_map[4] || + dec->fence_seq > dec->fence_map[8]) { + debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); + usleep(100); + } + debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); +#endif + + return &dec->base; + +fw_fail: + debug_printf("Cannot create decoder without firmware..\n"); + dec->base.destroy(&dec->base); + return NULL; + +fail: + debug_printf("Creation failed: %s (%i)\n", strerror(-ret), ret); + dec->base.destroy(&dec->base); + return NULL; +} + +struct pipe_video_buffer * +nv98_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *templat) +{ + return nouveau_vp3_video_buffer_create( + pipe, templat, NV50_RESOURCE_FLAG_VIDEO); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video.h b/src/gallium/drivers/nouveau/nv50/nv98_video.h new file mode 100644 index 00000000000..cec761df4ab --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv98_video.h @@ -0,0 +1,48 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv50_context.h" +#include "nv50/nv50_screen.h" +#include "nouveau_vp3_video.h" + +#include "vl/vl_decoder.h" +#include "vl/vl_types.h" + +#include "util/u_video.h" + +extern unsigned +nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, + unsigned comm_seq, unsigned num_buffers, + const void *const *data, const unsigned *num_bytes, + unsigned *vp_caps, unsigned *is_ref, + struct nouveau_vp3_video_buffer *refs[16]); + +extern void +nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, unsigned comm_seq, + unsigned caps, unsigned is_ref, + struct nouveau_vp3_video_buffer *refs[16]); + +extern void +nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, unsigned comm_seq); diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c new file mode 100644 index 00000000000..97d4119b6d1 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c @@ -0,0 +1,159 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv98_video.h" + +#if NOUVEAU_VP3_DEBUG_FENCE +static void dump_comm_bsp(struct comm *comm) +{ + unsigned idx = comm->bsp_cur_index & 0xf; + debug_printf("Cur seq: %x, bsp byte ofs: %x\n", comm->bsp_cur_index, comm->byte_ofs); + debug_printf("Status: %08x, pos: %08x\n", comm->status[idx], comm->pos[idx]); +} +#endif + +unsigned +nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, + unsigned comm_seq, unsigned num_buffers, + const void *const *data, const unsigned *num_bytes, + unsigned *vp_caps, unsigned *is_ref, + struct nouveau_vp3_video_buffer *refs[16]) +{ + struct nouveau_pushbuf *push = dec->pushbuf[0]; + enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); + uint32_t bsp_addr, comm_addr, inter_addr; + uint32_t slice_size, bucket_size, ring_size; + uint32_t caps; + int ret; + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH]; + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; + unsigned fence_extra = 0; + struct nouveau_pushbuf_refn bo_refs[] = { + { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, + { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, +#if NOUVEAU_VP3_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + + if (!dec->bitplane_bo) + num_refs--; + +#if NOUVEAU_VP3_DEBUG_FENCE + fence_extra = 4; +#endif + + ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client); + if (ret) { + debug_printf("map failed: %i %s\n", ret, strerror(-ret)); + return -1; + } + + caps = nouveau_vp3_bsp(dec, desc, target, comm_seq, + num_buffers, data, num_bytes); + + nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs); + + nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 8) + fence_extra + 2, num_refs, 0); + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + bsp_addr = bsp_bo->offset >> 8; + inter_addr = inter_bo->offset >> 8; + +#if NOUVEAU_VP3_DEBUG_FENCE + memset(dec->comm, 0, 0x200); + comm_addr = (dec->fence_bo->offset + COMM_OFFSET) >> 8; +#else + comm_addr = bsp_addr + (COMM_OFFSET>>8); +#endif + + BEGIN_NV04(push, SUBC_BSP(0x700), 5); + PUSH_DATA (push, caps); // 700 cmd + PUSH_DATA (push, bsp_addr + 1); // 704 strparm_bsp + PUSH_DATA (push, bsp_addr + 7); // 708 str addr + PUSH_DATA (push, comm_addr); // 70c comm + PUSH_DATA (push, comm_seq); // 710 seq + + if (codec != PIPE_VIDEO_FORMAT_MPEG4_AVC) { + u32 bitplane_addr; + int mpeg12 = (codec == PIPE_VIDEO_FORMAT_MPEG12); + + bitplane_addr = dec->bitplane_bo->offset >> 8; + + nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size); + BEGIN_NV04(push, SUBC_BSP(0x400), mpeg12 ? 5 : 7); + PUSH_DATA (push, bsp_addr); // 400 picparm addr + PUSH_DATA (push, inter_addr); // 404 interparm addr + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 408 interdata addr + PUSH_DATA (push, ring_size << 8); // 40c interdata_size + if (!mpeg12) { + PUSH_DATA (push, bitplane_addr); // 410 BITPLANE_DATA + PUSH_DATA (push, 0x400); // 414 BITPLANE_DATA_SIZE + } + PUSH_DATA (push, 0); // dma idx + } else { + nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); + BEGIN_NV04(push, SUBC_BSP(0x400), 8); + PUSH_DATA (push, bsp_addr); // 400 picparm addr + PUSH_DATA (push, inter_addr); // 404 interparm addr + PUSH_DATA (push, slice_size << 8); // 408 interparm size? + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 40c interdata addr + PUSH_DATA (push, ring_size << 8); // 410 interdata size + PUSH_DATA (push, inter_addr + slice_size); // 414 bucket? + PUSH_DATA (push, bucket_size << 8); // 418 bucket size? unshifted.. + PUSH_DATA (push, 0); // 41c targets + // TODO: Double check 414 / 418 with nvidia trace + } + +#if NOUVEAU_VP3_DEBUG_FENCE + BEGIN_NV04(push, SUBC_BSP(0x240), 3); + PUSH_DATAh(push, dec->fence_bo->offset); + PUSH_DATA (push, dec->fence_bo->offset); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NV04(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK (push); + + { + unsigned spin = 0; + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) { + debug_printf("b%u: %u\n", dec->fence_seq, dec->fence_map[0]); + dump_comm_bsp(dec->comm); + } + } while (dec->fence_seq > dec->fence_map[0]); + } + + dump_comm_bsp(dec->comm); + return dec->comm->status[comm_seq & 0xf]; +#else + BEGIN_NV04(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); + return 2; +#endif +} diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c new file mode 100644 index 00000000000..6b0b7148dcb --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c @@ -0,0 +1,143 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv98_video.h" + +static void +nv98_decoder_setup_ppp(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target, uint32_t low700) { + struct nouveau_pushbuf *push = dec->pushbuf[2]; + + uint32_t stride_in = mb(dec->base.width); + uint32_t stride_out = mb(target->resources[0]->width0); + uint32_t dec_h = mb(dec->base.height); + uint32_t dec_w = mb(dec->base.width); + uint64_t in_addr; + uint32_t y2, cbcr, cbcr2, i; + struct nouveau_pushbuf_refn bo_refs[] = { + { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { dec->ref_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, +#if NOUVEAU_VP3_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + }; + unsigned num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + + for (i = 0; i < 2; ++i) { + struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i]; + bo_refs[i].bo = mt->base.bo; + } + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + nouveau_vp3_ycbcr_offsets(dec, &y2, &cbcr, &cbcr2); + + BEGIN_NV04(push, SUBC_PPP(0x700), 10); + in_addr = nouveau_vp3_video_addr(dec, target) >> 8; + + PUSH_DATA (push, (stride_out << 24) | (stride_out << 16) | low700); // 700 + PUSH_DATA (push, (stride_in << 24) | (stride_in << 16) | (dec_h << 8) | dec_w); // 704 + assert(dec_w == stride_in); + + /* Input: */ + PUSH_DATA (push, in_addr); // 708 + PUSH_DATA (push, in_addr + y2); // 70c + PUSH_DATA (push, in_addr + cbcr); // 710 + PUSH_DATA (push, in_addr + cbcr2); // 714 + + for (i = 0; i < 2; ++i) { + struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i]; + + PUSH_DATA (push, mt->base.address >> 8); + PUSH_DATA (push, (mt->base.address + mt->total_size/2) >> 8); + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + } +} + +static uint32_t +nv98_decoder_vc1_ppp(struct nouveau_vp3_decoder *dec, struct pipe_vc1_picture_desc *desc, struct nouveau_vp3_video_buffer *target) { + struct nouveau_pushbuf *push = dec->pushbuf[2]; + + nv98_decoder_setup_ppp(dec, target, 0x1412); + assert(!desc->deblockEnable); + assert(!(dec->base.width & 0xf)); + assert(!(dec->base.height & 0xf)); + + BEGIN_NV04(push, SUBC_PPP(0x400), 1); + PUSH_DATA (push, desc->pquant << 11); + + // 728 = wtf? + return 0x10; +} + +void +nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct nouveau_vp3_video_buffer *target, unsigned comm_seq) { + enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); + struct nouveau_pushbuf *push = dec->pushbuf[2]; + unsigned ppp_caps = 0x10; + unsigned fence_extra = 0; + +#if NOUVEAU_VP3_DEBUG_FENCE + fence_extra = 4; +#endif + + nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0); + + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG12: { + unsigned mpeg2 = dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1; + nv98_decoder_setup_ppp(dec, target, 0x1410 | mpeg2); + break; + } + case PIPE_VIDEO_FORMAT_MPEG4: nv98_decoder_setup_ppp(dec, target, 0x1414); break; + case PIPE_VIDEO_FORMAT_VC1: ppp_caps = nv98_decoder_vc1_ppp(dec, desc.vc1, target); break; + case PIPE_VIDEO_FORMAT_MPEG4_AVC: nv98_decoder_setup_ppp(dec, target, 0x1413); break; + default: assert(0); + } + BEGIN_NV04(push, SUBC_PPP(0x734), 2); + PUSH_DATA (push, comm_seq); + PUSH_DATA (push, ppp_caps); + +#if NOUVEAU_VP3_DEBUG_FENCE + BEGIN_NV04(push, SUBC_PPP(0x240), 3); + PUSH_DATAh(push, (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push, (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NV04(push, SUBC_PPP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK (push); + + { + unsigned spin = 0; + + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) + debug_printf("p%u: %u\n", dec->fence_seq, dec->fence_map[8]); + } while (dec->fence_seq > dec->fence_map[8]); + } +#else + BEGIN_NV04(push, SUBC_PPP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); +#endif +} diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c new file mode 100644 index 00000000000..9b756ea73f5 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c @@ -0,0 +1,202 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv98_video.h" +#include <sys/mman.h> + +#if NOUVEAU_VP3_DEBUG_FENCE +static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq, + struct nouveau_bo *inter_bo, unsigned slice_size) +{ + unsigned i, idx = comm->pvp_cur_index & 0xf; + debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); +#if 0 + debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); + debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); + + for (i = 0; i != comm->irq_index; ++i) + debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); + for (i = 0; i != comm->parse_endpos_index; ++i) + debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); +#endif + debug_printf("mb_y = %u\n", comm->mb_y[idx]); + if (comm->status_vp[idx] == 1) + return; + + if ((comm->pvp_stage & 0xff) != 0xff) { + unsigned *map; + assert(nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client) >= 0); + map = inter_bo->map; + for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { + debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); + } + munmap(inter_bo->map, inter_bo->size); + inter_bo->map = NULL; + } + assert((comm->pvp_stage & 0xff) == 0xff); +} +#endif + +static void +nv98_decoder_kick_ref(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target) +{ + dec->refs[target->valid_ref].vidbuf = NULL; + dec->refs[target->valid_ref].last_used = 0; +// debug_printf("Unreffed %p\n", target); +} + +void +nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, unsigned comm_seq, + unsigned caps, unsigned is_ref, + struct nouveau_vp3_video_buffer *refs[16]) +{ + struct nouveau_pushbuf *push = dec->pushbuf[1]; + uint32_t bsp_addr, comm_addr, inter_addr, ucode_addr, pic_addr[17], last_addr, null_addr; + uint32_t slice_size, bucket_size, ring_size, i; + enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH]; + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; + u32 fence_extra = 0, codec_extra = 0; + struct nouveau_pushbuf_refn bo_refs[] = { + { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, +#if NOUVEAU_VP3_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + { dec->fw_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs) - !dec->fw_bo; + +#if NOUVEAU_VP3_DEBUG_FENCE + fence_extra = 4; +#endif + + if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) { + nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); + codec_extra += 2; + } else + nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size); + + if (dec->base.max_references > 2) + codec_extra += 1 + (dec->base.max_references - 2); + + pic_addr[16] = nouveau_vp3_video_addr(dec, target) >> 8; + last_addr = null_addr = nouveau_vp3_video_addr(dec, NULL) >> 8; + + for (i = 0; i < dec->base.max_references; ++i) { + if (!refs[i]) + pic_addr[i] = last_addr; + else if (dec->refs[refs[i]->valid_ref].vidbuf == refs[i]) + last_addr = pic_addr[i] = nouveau_vp3_video_addr(dec, refs[i]) >> 8; + else + pic_addr[i] = null_addr; + } + if (!is_ref) + nv98_decoder_kick_ref(dec, target); + + nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) + + 6 + codec_extra + fence_extra + 2, num_refs, 0); + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + bsp_addr = bsp_bo->offset >> 8; +#if NOUVEAU_VP3_DEBUG_FENCE + comm_addr = (dec->fence_bo->offset + COMM_OFFSET)>>8; +#else + comm_addr = bsp_addr + (COMM_OFFSET>>8); +#endif + inter_addr = inter_bo->offset >> 8; + if (dec->fw_bo) + ucode_addr = dec->fw_bo->offset >> 8; + else + ucode_addr = 0; + + BEGIN_NV04(push, SUBC_VP(0x700), 7); + PUSH_DATA (push, caps); // 700 + PUSH_DATA (push, comm_seq); // 704 + PUSH_DATA (push, 0); // 708 fuc targets, ignored for nv98 + PUSH_DATA (push, dec->fw_sizes); // 70c + PUSH_DATA (push, bsp_addr+(VP_OFFSET>>8)); // 710 picparm_addr + PUSH_DATA (push, inter_addr); // 714 inter_parm + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 718 inter_data_ofs + + if (bucket_size) { + uint64_t tmpimg_addr = dec->ref_bo->offset + dec->ref_stride * (dec->base.max_references+2); + + BEGIN_NV04(push, SUBC_VP(0x71c), 2); + PUSH_DATA (push, tmpimg_addr >> 8); // 71c + PUSH_DATA (push, inter_addr + slice_size); // 720 bucket_ofs + } + + BEGIN_NV04(push, SUBC_VP(0x724), 5); + PUSH_DATA (push, comm_addr); // 724 + PUSH_DATA (push, ucode_addr); // 728 + PUSH_DATA (push, pic_addr[16]); // 734 + PUSH_DATA (push, pic_addr[0]); // 72c + PUSH_DATA (push, pic_addr[1]); // 730 + + if (dec->base.max_references > 2) { + int i; + + BEGIN_NV04(push, SUBC_VP(0x400), dec->base.max_references - 2); + for (i = 2; i < dec->base.max_references; ++i) { + assert(0x400 + (i - 2) * 4 < 0x438); + PUSH_DATA (push, pic_addr[i]); + } + } + + if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) { + BEGIN_NV04(push, SUBC_VP(0x438), 1); + PUSH_DATA (push, desc.h264->slice_count); + } + + //debug_printf("Decoding %08lx with %08lx and %08lx\n", pic_addr[16], pic_addr[0], pic_addr[1]); + +#if NOUVEAU_VP3_DEBUG_FENCE + BEGIN_NV04(push, SUBC_VP(0x240), 3); + PUSH_DATAh(push, (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push, (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NV04(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK(push); + + { + unsigned spin = 0; + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) { + debug_printf("v%u: %u\n", dec->fence_seq, dec->fence_map[4]); + dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8); + } + } while (dec->fence_seq > dec->fence_map[4]); + } + dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8); +#else + BEGIN_NV04(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); +#endif +} |