diff options
Diffstat (limited to 'src/gallium/drivers/nouveau/nvc0')
40 files changed, 16969 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h new file mode 100644 index 00000000000..9a488c17be1 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h @@ -0,0 +1,380 @@ +#ifndef NVC0_2D_XML +#define NVC0_2D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_2d.xml ( 9454 bytes, from 2010-10-16 16:03:11) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24) +- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_2D_DST_FORMAT 0x00000200 + +#define NVC0_2D_DST_LINEAR 0x00000204 + +#define NVC0_2D_DST_TILE_MODE 0x00000208 + +#define NVC0_2D_DST_DEPTH 0x0000020c + +#define NVC0_2D_DST_LAYER 0x00000210 + +#define NVC0_2D_DST_PITCH 0x00000214 + +#define NVC0_2D_DST_WIDTH 0x00000218 + +#define NVC0_2D_DST_HEIGHT 0x0000021c + +#define NVC0_2D_DST_ADDRESS_HIGH 0x00000220 + +#define NVC0_2D_DST_ADDRESS_LOW 0x00000224 + +#define NVC0_2D_UNK228 0x00000228 + +#define NVC0_2D_SRC_FORMAT 0x00000230 + +#define NVC0_2D_SRC_LINEAR 0x00000234 + +#define NVC0_2D_SRC_TILE_MODE 0x00000238 + +#define NVC0_2D_SRC_DEPTH 0x0000023c + +#define NVC0_2D_SRC_LAYER 0x00000240 + +#define NVC0_2D_SRC_PITCH 0x00000244 +#define NVC0_2D_SRC_PITCH__MAX 0x00040000 + +#define NVC0_2D_SRC_WIDTH 0x00000248 +#define NVC0_2D_SRC_WIDTH__MAX 0x00010000 + +#define NVC0_2D_SRC_HEIGHT 0x0000024c +#define NVC0_2D_SRC_HEIGHT__MAX 0x00010000 + +#define NVC0_2D_SRC_ADDRESS_HIGH 0x00000250 + +#define NVC0_2D_SRC_ADDRESS_LOW 0x00000254 + +#define NVC0_2D_UNK258 0x00000258 + +#define NVC0_2D_SINGLE_GPC 0x00000260 + +#define NVC0_2D_COND_ADDRESS_HIGH 0x00000264 + +#define NVC0_2D_COND_ADDRESS_LOW 0x00000268 + +#define NVC0_2D_COND_MODE 0x0000026c +#define NVC0_2D_COND_MODE_NEVER 0x00000000 +#define NVC0_2D_COND_MODE_ALWAYS 0x00000001 +#define NVC0_2D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NVC0_2D_COND_MODE_EQUAL 0x00000003 +#define NVC0_2D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NVC0_2D_CLIP_X 0x00000280 + +#define NVC0_2D_CLIP_Y 0x00000284 + +#define NVC0_2D_CLIP_W 0x00000288 + +#define NVC0_2D_CLIP_H 0x0000028c + +#define NVC0_2D_CLIP_ENABLE 0x00000290 + +#define NVC0_2D_COLOR_KEY_FORMAT 0x00000294 +#define NVC0_2D_COLOR_KEY_FORMAT_16BPP 0x00000000 +#define NVC0_2D_COLOR_KEY_FORMAT_15BPP 0x00000001 +#define NVC0_2D_COLOR_KEY_FORMAT_24BPP 0x00000002 +#define NVC0_2D_COLOR_KEY_FORMAT_30BPP 0x00000003 +#define NVC0_2D_COLOR_KEY_FORMAT_8BPP 0x00000004 +#define NVC0_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005 +#define NVC0_2D_COLOR_KEY_FORMAT_32BPP 0x00000006 + +#define NVC0_2D_COLOR_KEY 0x00000298 + +#define NVC0_2D_COLOR_KEY_ENABLE 0x0000029c + +#define NVC0_2D_ROP 0x000002a0 + +#define NVC0_2D_BETA1 0x000002a4 + +#define NVC0_2D_BETA4 0x000002a8 + +#define NVC0_2D_OPERATION 0x000002ac +#define NVC0_2D_OPERATION_SRCCOPY_AND 0x00000000 +#define NVC0_2D_OPERATION_ROP_AND 0x00000001 +#define NVC0_2D_OPERATION_BLEND 0x00000002 +#define NVC0_2D_OPERATION_SRCCOPY 0x00000003 +#define NVC0_2D_OPERATION_ROP 0x00000004 +#define NVC0_2D_OPERATION_SRCCOPY_PREMULT 0x00000005 +#define NVC0_2D_OPERATION_BLEND_PREMULT 0x00000006 + +#define NVC0_2D_UNK2B0 0x000002b0 +#define NVC0_2D_UNK2B0_UNK0__MASK 0x0000003f +#define NVC0_2D_UNK2B0_UNK0__SHIFT 0 +#define NVC0_2D_UNK2B0_UNK1__MASK 0x00003f00 +#define NVC0_2D_UNK2B0_UNK1__SHIFT 8 + +#define NVC0_2D_PATTERN_SELECT 0x000002b4 +#define NVC0_2D_PATTERN_SELECT_MONO_8X8 0x00000000 +#define NVC0_2D_PATTERN_SELECT_MONO_64X1 0x00000001 +#define NVC0_2D_PATTERN_SELECT_MONO_1X64 0x00000002 +#define NVC0_2D_PATTERN_SELECT_COLOR 0x00000003 + +#define NVC0_2D_PATTERN_COLOR_FORMAT 0x000002e8 +#define NVC0_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000 +#define NVC0_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001 +#define NVC0_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002 +#define NVC0_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003 +#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004 +#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005 + +#define NVC0_2D_PATTERN_MONO_FORMAT 0x000002ec +#define NVC0_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000 +#define NVC0_2D_PATTERN_MONO_FORMAT_LE 0x00000001 + +#define NVC0_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0)) +#define NVC0_2D_PATTERN_COLOR__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_COLOR__LEN 0x00000002 + +#define NVC0_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0)) +#define NVC0_2D_PATTERN_BITMAP__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_BITMAP__LEN 0x00000002 + +#define NVC0_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0)) +#define NVC0_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_X8R8G8B8__LEN 0x00000040 +#define NVC0_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff +#define NVC0_2D_PATTERN_X8R8G8B8_B__SHIFT 0 +#define NVC0_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00 +#define NVC0_2D_PATTERN_X8R8G8B8_G__SHIFT 8 +#define NVC0_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000 +#define NVC0_2D_PATTERN_X8R8G8B8_R__SHIFT 16 + +#define NVC0_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0)) +#define NVC0_2D_PATTERN_R5G6B5__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_R5G6B5__LEN 0x00000020 +#define NVC0_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f +#define NVC0_2D_PATTERN_R5G6B5_B0__SHIFT 0 +#define NVC0_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0 +#define NVC0_2D_PATTERN_R5G6B5_G0__SHIFT 5 +#define NVC0_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800 +#define NVC0_2D_PATTERN_R5G6B5_R0__SHIFT 11 +#define NVC0_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000 +#define NVC0_2D_PATTERN_R5G6B5_B1__SHIFT 16 +#define NVC0_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000 +#define NVC0_2D_PATTERN_R5G6B5_G1__SHIFT 21 +#define NVC0_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000 +#define NVC0_2D_PATTERN_R5G6B5_R1__SHIFT 27 + +#define NVC0_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0)) +#define NVC0_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_X1R5G5B5__LEN 0x00000020 +#define NVC0_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f +#define NVC0_2D_PATTERN_X1R5G5B5_B0__SHIFT 0 +#define NVC0_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0 +#define NVC0_2D_PATTERN_X1R5G5B5_G0__SHIFT 5 +#define NVC0_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00 +#define NVC0_2D_PATTERN_X1R5G5B5_R0__SHIFT 10 +#define NVC0_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000 +#define NVC0_2D_PATTERN_X1R5G5B5_B1__SHIFT 16 +#define NVC0_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000 +#define NVC0_2D_PATTERN_X1R5G5B5_G1__SHIFT 21 +#define NVC0_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000 +#define NVC0_2D_PATTERN_X1R5G5B5_R1__SHIFT 26 + +#define NVC0_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0)) +#define NVC0_2D_PATTERN_Y8__ESIZE 0x00000004 +#define NVC0_2D_PATTERN_Y8__LEN 0x00000010 +#define NVC0_2D_PATTERN_Y8_Y0__MASK 0x000000ff +#define NVC0_2D_PATTERN_Y8_Y0__SHIFT 0 +#define NVC0_2D_PATTERN_Y8_Y1__MASK 0x0000ff00 +#define NVC0_2D_PATTERN_Y8_Y1__SHIFT 8 +#define NVC0_2D_PATTERN_Y8_Y2__MASK 0x00ff0000 +#define NVC0_2D_PATTERN_Y8_Y2__SHIFT 16 +#define NVC0_2D_PATTERN_Y8_Y3__MASK 0xff000000 +#define NVC0_2D_PATTERN_Y8_Y3__SHIFT 24 + +#define NVC0_2D_DRAW_SHAPE 0x00000580 +#define NVC0_2D_DRAW_SHAPE_POINTS 0x00000000 +#define NVC0_2D_DRAW_SHAPE_LINES 0x00000001 +#define NVC0_2D_DRAW_SHAPE_LINE_STRIP 0x00000002 +#define NVC0_2D_DRAW_SHAPE_TRIANGLES 0x00000003 +#define NVC0_2D_DRAW_SHAPE_RECTANGLES 0x00000004 + +#define NVC0_2D_DRAW_COLOR_FORMAT 0x00000584 + +#define NVC0_2D_DRAW_COLOR 0x00000588 + +#define NVC0_2D_UNK58C 0x0000058c +#define NVC0_2D_UNK58C_0 0x00000001 +#define NVC0_2D_UNK58C_1 0x00000010 +#define NVC0_2D_UNK58C_2 0x00000100 +#define NVC0_2D_UNK58C_3 0x00001000 + +#define NVC0_2D_DRAW_POINT16 0x000005e0 +#define NVC0_2D_DRAW_POINT16_X__MASK 0x0000ffff +#define NVC0_2D_DRAW_POINT16_X__SHIFT 0 +#define NVC0_2D_DRAW_POINT16_Y__MASK 0xffff0000 +#define NVC0_2D_DRAW_POINT16_Y__SHIFT 16 + +#define NVC0_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0)) +#define NVC0_2D_DRAW_POINT32_X__ESIZE 0x00000008 +#define NVC0_2D_DRAW_POINT32_X__LEN 0x00000040 + +#define NVC0_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0)) +#define NVC0_2D_DRAW_POINT32_Y__ESIZE 0x00000008 +#define NVC0_2D_DRAW_POINT32_Y__LEN 0x00000040 + +#define NVC0_2D_SIFC_BITMAP_ENABLE 0x00000800 + +#define NVC0_2D_SIFC_FORMAT 0x00000804 + +#define NVC0_2D_SIFC_BITMAP_FORMAT 0x00000808 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I1 0x00000000 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I4 0x00000001 +#define NVC0_2D_SIFC_BITMAP_FORMAT_I8 0x00000002 + +#define NVC0_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c + +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001 +#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002 + +#define NVC0_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814 + +#define NVC0_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818 + +#define NVC0_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c + +#define NVC0_2D_SIFC_WIDTH 0x00000838 + +#define NVC0_2D_SIFC_HEIGHT 0x0000083c + +#define NVC0_2D_SIFC_DX_DU_FRACT 0x00000840 + +#define NVC0_2D_SIFC_DX_DU_INT 0x00000844 + +#define NVC0_2D_SIFC_DY_DV_FRACT 0x00000848 + +#define NVC0_2D_SIFC_DY_DV_INT 0x0000084c + +#define NVC0_2D_SIFC_DST_X_FRACT 0x00000850 + +#define NVC0_2D_SIFC_DST_X_INT 0x00000854 + +#define NVC0_2D_SIFC_DST_Y_FRACT 0x00000858 + +#define NVC0_2D_SIFC_DST_Y_INT 0x0000085c + +#define NVC0_2D_SIFC_DATA 0x00000860 + +#define NVC0_2D_UNK0870 0x00000870 + +#define NVC0_2D_UNK0880 0x00000880 + +#define NVC0_2D_UNK0884 0x00000884 + +#define NVC0_2D_UNK0888 0x00000888 + +#define NVC0_2D_BLIT_CONTROL 0x0000088c +#define NVC0_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001 +#define NVC0_2D_BLIT_CONTROL_ORIGIN__SHIFT 0 +#define NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000 +#define NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001 +#define NVC0_2D_BLIT_CONTROL_FILTER__MASK 0x00000010 +#define NVC0_2D_BLIT_CONTROL_FILTER__SHIFT 4 +#define NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000 +#define NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010 + +#define NVC0_2D_BLIT_DST_X 0x000008b0 + +#define NVC0_2D_BLIT_DST_Y 0x000008b4 + +#define NVC0_2D_BLIT_DST_W 0x000008b8 + +#define NVC0_2D_BLIT_DST_H 0x000008bc + +#define NVC0_2D_BLIT_DU_DX_FRACT 0x000008c0 + +#define NVC0_2D_BLIT_DU_DX_INT 0x000008c4 + +#define NVC0_2D_BLIT_DV_DY_FRACT 0x000008c8 + +#define NVC0_2D_BLIT_DV_DY_INT 0x000008cc + +#define NVC0_2D_BLIT_SRC_X_FRACT 0x000008d0 + +#define NVC0_2D_BLIT_SRC_X_INT 0x000008d4 + +#define NVC0_2D_BLIT_SRC_Y_FRACT 0x000008d8 + +#define NVC0_2D_BLIT_SRC_Y_INT 0x000008dc + + +#endif /* NVC0_2D_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h new file mode 100644 index 00000000000..d3f719d333f --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h @@ -0,0 +1,1350 @@ +#ifndef NVC0_3D_XML +#define NVC0_3D_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_3d.xml ( 30827 bytes, from 2011-01-13 18:23:07) +- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv_3ddefs.xml ( 16394 bytes, from 2010-12-17 15:10:40) +- nv_object.xml ( 11898 bytes, from 2010-12-23 14:14:20) +- nvchipsets.xml ( 3074 bytes, from 2010-11-07 00:36:28) +- nv50_defs.xml ( 4487 bytes, from 2010-12-10 00:37:17) + +Copyright (C) 2006-2011 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_3D_NOTIFY_ADDRESS_HIGH 0x00000104 +#define NVC0_3D_NOTIFY_ADDRESS_LOW 0x00000108 +#define NVC0_3D_NOTIFY 0x0000010c + +#define NVC0_3D_SERIALIZE 0x00000110 + +#define NVC0_3D_LINE_WIDTH_SEPARATE 0x0000020c + +#define NVC0_3D_FORCE_EARLY_FRAGMENT_TESTS 0x00000210 + +#define NVC0_3D_MEM_BARRIER 0x0000021c +#define NVC0_3D_MEM_BARRIER_UNK0 0x00000001 +#define NVC0_3D_MEM_BARRIER_UNK1 0x00000002 +#define NVC0_3D_MEM_BARRIER_UNK2 0x00000004 +#define NVC0_3D_MEM_BARRIER_UNK4 0x00000010 +#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100 +#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000 + +#define NVC0_3D_CACHE_SPLIT 0x00000308 +#define NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001 +#define NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1 0x00000002 +#define NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003 + +#define NVC0_3D_TESS_MODE 0x00000320 +#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f +#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0 +#define NVC0_3D_TESS_MODE_PRIM_ISOLINES 0x00000000 +#define NVC0_3D_TESS_MODE_PRIM_TRIANGLES 0x00000001 +#define NVC0_3D_TESS_MODE_PRIM_QUADS 0x00000002 +#define NVC0_3D_TESS_MODE_SPACING__MASK 0x000000f0 +#define NVC0_3D_TESS_MODE_SPACING__SHIFT 4 +#define NVC0_3D_TESS_MODE_SPACING_EQUAL 0x00000000 +#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD 0x00000010 +#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN 0x00000020 +#define NVC0_3D_TESS_MODE_CW 0x00000100 +#define NVC0_3D_TESS_MODE_CONNECTED 0x00000200 + +#define NVC0_3D_TESS_LEVEL_OUTER(i0) (0x00000324 + 0x4*(i0)) +#define NVC0_3D_TESS_LEVEL_OUTER__ESIZE 0x00000004 +#define NVC0_3D_TESS_LEVEL_OUTER__LEN 0x00000004 + +#define NVC0_3D_TESS_LEVEL_INNER(i0) (0x00000334 + 0x4*(i0)) +#define NVC0_3D_TESS_LEVEL_INNER__ESIZE 0x00000004 +#define NVC0_3D_TESS_LEVEL_INNER__LEN 0x00000002 + +#define NVC0_3D_RASTERIZE_ENABLE 0x0000037c + +#define NVC0_3D_TFB(i0) (0x00000380 + 0x20*(i0)) +#define NVC0_3D_TFB__ESIZE 0x00000020 +#define NVC0_3D_TFB__LEN 0x00000004 + +#define NVC0_3D_TFB_BUFFER_ENABLE(i0) (0x00000380 + 0x20*(i0)) + +#define NVC0_3D_TFB_ADDRESS_HIGH(i0) (0x00000384 + 0x20*(i0)) + +#define NVC0_3D_TFB_ADDRESS_LOW(i0) (0x00000388 + 0x20*(i0)) + +#define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0)) + +#define NVC0_3D_TFB_BUFFER_OFFSET(i0) (0x00000390 + 0x20*(i0)) + +#define NVC0_3D_TFB_STREAM(i0) (0x00000700 + 0x10*(i0)) +#define NVC0_3D_TFB_STREAM__ESIZE 0x00000010 +#define NVC0_3D_TFB_STREAM__LEN 0x00000004 + +#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0)) +#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010 +#define NVC0_3D_TFB_VARYING_COUNT__LEN 0x00000004 + +#define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0)) +#define NVC0_3D_TFB_BUFFER_STRIDE__ESIZE 0x00000010 +#define NVC0_3D_TFB_BUFFER_STRIDE__LEN 0x00000004 + +#define NVC0_3D_TFB_ENABLE 0x00000744 + +#define NVC0_3D_SAMPLE_SHADING 0x00000754 +#define NVC0_3D_SAMPLE_SHADING_MIN_SAMPLES__MASK 0x0000000f +#define NVC0_3D_SAMPLE_SHADING_MIN_SAMPLES__SHIFT 0 +#define NVC0_3D_SAMPLE_SHADING_ENABLE 0x00000010 + +#define NVC0_3D_LOCAL_BASE 0x0000077c + +#define NVC0_3D_TEMP_ADDRESS_HIGH 0x00000790 + +#define NVC0_3D_TEMP_ADDRESS_LOW 0x00000794 + +#define NVC0_3D_TEMP_SIZE_HIGH 0x00000798 + +#define NVC0_3D_TEMP_SIZE_LOW 0x0000079c + +#define NVC0_3D_WARP_TEMP_ALLOC 0x000007a0 + +#define NVC0_3D_ZCULL_WIDTH 0x000007c0 + +#define NVC0_3D_ZCULL_HEIGHT 0x000007c4 + +#define NVC0_3D_ZCULL_ADDRESS_HIGH 0x000007e8 + +#define NVC0_3D_ZCULL_ADDRESS_LOW 0x000007ec + +#define NVC0_3D_ZCULL_LIMIT_HIGH 0x000007f0 + +#define NVC0_3D_ZCULL_LIMIT_LOW 0x000007f4 + +#define NVC0_3D_RT(i0) (0x00000800 + 0x40*(i0)) +#define NVC0_3D_RT__ESIZE 0x00000040 +#define NVC0_3D_RT__LEN 0x00000008 + +#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x40*(i0)) + +#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x40*(i0)) + +#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x40*(i0)) + +#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x40*(i0)) + +#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x40*(i0)) + +#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x40*(i0)) +#define NVC0_3D_RT_TILE_MODE_X 0x00000001 +#define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070 +#define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4 +#define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700 +#define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8 +#define NVC0_3D_RT_TILE_MODE_LINEAR 0x00001000 +#define NVC0_3D_RT_TILE_MODE_UNK16 0x00010000 + +#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x40*(i0)) +#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0 +#define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000 + +#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x40*(i0)) + +#define NVC0_3D_RT_BASE_LAYER(i0) (0x00000820 + 0x40*(i0)) + +#define NVC0_3D_RT_UNK14(i0) (0x00000824 + 0x40*(i0)) + +#define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_X__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_Y__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_SCALE_Z__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0)) +#define NVC0_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020 +#define NVC0_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010 + +#define NVC0_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0)) +#define NVC0_3D_VIEWPORT_HORIZ__ESIZE 0x00000010 +#define NVC0_3D_VIEWPORT_HORIZ__LEN 0x00000010 +#define NVC0_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff +#define NVC0_3D_VIEWPORT_HORIZ_X__SHIFT 0 +#define NVC0_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000 +#define NVC0_3D_VIEWPORT_HORIZ_W__SHIFT 16 + +#define NVC0_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0)) +#define NVC0_3D_VIEWPORT_VERT__ESIZE 0x00000010 +#define NVC0_3D_VIEWPORT_VERT__LEN 0x00000010 +#define NVC0_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff +#define NVC0_3D_VIEWPORT_VERT_Y__SHIFT 0 +#define NVC0_3D_VIEWPORT_VERT_H__MASK 0xffff0000 +#define NVC0_3D_VIEWPORT_VERT_H__SHIFT 16 + +#define NVC0_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0)) +#define NVC0_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010 +#define NVC0_3D_DEPTH_RANGE_NEAR__LEN 0x00000010 + +#define NVC0_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0)) +#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010 +#define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010 + +#define NVC0_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16 + +#define NVC0_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_VERT__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_VERT_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_VERT_MAX__SHIFT 16 + +#define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0)) +#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008 +#define NVC0_3D_CLIPID_REGION_HORIZ__LEN 0x00000004 +#define NVC0_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff +#define NVC0_3D_CLIPID_REGION_HORIZ_X__SHIFT 0 +#define NVC0_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000 +#define NVC0_3D_CLIPID_REGION_HORIZ_W__SHIFT 16 + +#define NVC0_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0)) +#define NVC0_3D_CLIPID_REGION_VERT__ESIZE 0x00000008 +#define NVC0_3D_CLIPID_REGION_VERT__LEN 0x00000004 +#define NVC0_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff +#define NVC0_3D_CLIPID_REGION_VERT_Y__SHIFT 0 +#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000 +#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16 + +#define NVC0_3D_CALL_LIMIT_LOG 0x00000d64 + +#define NVC0_3D_COUNTER_ENABLE 0x00000d68 +#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001 +#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002 +#define NVC0_3D_COUNTER_ENABLE_UNK02 0x00000004 +#define NVC0_3D_COUNTER_ENABLE_UNK03 0x00000008 +#define NVC0_3D_COUNTER_ENABLE_UNK04 0x00000010 +#define NVC0_3D_COUNTER_ENABLE_EMITTED_PRIMITIVES 0x00000020 +#define NVC0_3D_COUNTER_ENABLE_UNK06 0x00000040 +#define NVC0_3D_COUNTER_ENABLE_UNK07 0x00000080 +#define NVC0_3D_COUNTER_ENABLE_UNK08 0x00000100 +#define NVC0_3D_COUNTER_ENABLE_UNK09 0x00000200 +#define NVC0_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000400 +#define NVC0_3D_COUNTER_ENABLE_UNK0B 0x00000800 +#define NVC0_3D_COUNTER_ENABLE_UNK0C 0x00001000 +#define NVC0_3D_COUNTER_ENABLE_UNK0D 0x00002000 +#define NVC0_3D_COUNTER_ENABLE_UNK0E 0x00004000 +#define NVC0_3D_COUNTER_ENABLE_UNK0F 0x00008000 + +#define NVC0_3D_VERTEX_BUFFER_FIRST 0x00000d74 + +#define NVC0_3D_VERTEX_BUFFER_COUNT 0x00000d78 + +#define NVC0_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0)) +#define NVC0_3D_CLEAR_COLOR__ESIZE 0x00000004 +#define NVC0_3D_CLEAR_COLOR__LEN 0x00000004 + +#define NVC0_3D_CLEAR_DEPTH 0x00000d90 + +#define NVC0_3D_CLEAR_STENCIL 0x00000da0 + +#define NVC0_3D_POLYGON_SMOOTH_ENABLE 0x00000db4 + +#define NVC0_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0 + +#define NVC0_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4 + +#define NVC0_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8 + +#define NVC0_3D_PATCH_VERTICES 0x00000dcc + +#define NVC0_3D_WATCHDOG_TIMER 0x00000de4 + +#define NVC0_3D_WINDOW_OFFSET_X 0x00000df8 + +#define NVC0_3D_WINDOW_OFFSET_Y 0x00000dfc + +#define NVC0_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_ENABLE__ESIZE 0x00000010 +#define NVC0_3D_SCISSOR_ENABLE__LEN 0x00000010 + +#define NVC0_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_HORIZ__ESIZE 0x00000010 +#define NVC0_3D_SCISSOR_HORIZ__LEN 0x00000010 +#define NVC0_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff +#define NVC0_3D_SCISSOR_HORIZ_MIN__SHIFT 0 +#define NVC0_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000 +#define NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT 16 + +#define NVC0_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0)) +#define NVC0_3D_SCISSOR_VERT__ESIZE 0x00000010 +#define NVC0_3D_SCISSOR_VERT__LEN 0x00000010 +#define NVC0_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff +#define NVC0_3D_SCISSOR_VERT_MIN__SHIFT 0 +#define NVC0_3D_SCISSOR_VERT_MAX__MASK 0xffff0000 +#define NVC0_3D_SCISSOR_VERT_MAX__SHIFT 16 + +#define NVC0_3D_STENCIL_BACK_FUNC_REF 0x00000f54 + +#define NVC0_3D_STENCIL_BACK_MASK 0x00000f58 + +#define NVC0_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c + +#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84 + +#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88 + +#define NVC0_3D_COLOR_MASK_COMMON 0x00000f90 + +#define NVC0_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0)) +#define NVC0_3D_DEPTH_BOUNDS__ESIZE 0x00000004 +#define NVC0_3D_DEPTH_BOUNDS__LEN 0x00000002 + +#define NVC0_3D_RT_SEPARATE_FRAG_DATA 0x00000fac + +#define NVC0_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0)) +#define NVC0_3D_MSAA_MASK__ESIZE 0x00000004 +#define NVC0_3D_MSAA_MASK__LEN 0x00000004 + +#define NVC0_3D_CLIPID_ADDRESS_HIGH 0x00000fcc + +#define NVC0_3D_CLIPID_ADDRESS_LOW 0x00000fd0 + +#define NVC0_3D_ZETA_ADDRESS_HIGH 0x00000fe0 + +#define NVC0_3D_ZETA_ADDRESS_LOW 0x00000fe4 + +#define NVC0_3D_ZETA_FORMAT 0x00000fe8 + +#define NVC0_3D_ZETA_TILE_MODE 0x00000fec + +#define NVC0_3D_ZETA_LAYER_STRIDE 0x00000ff0 + +#define NVC0_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16 +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff +#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0 + +#define NVC0_3D_SCREEN_SCISSOR_VERT 0x00000ff8 +#define NVC0_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000 +#define NVC0_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16 +#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff +#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0 + +#define NVC0_3D_CLEAR_FLAGS 0x000010f8 +#define NVC0_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001 +#define NVC0_3D_CLEAR_FLAGS_UNK4 0x00000010 +#define NVC0_3D_CLEAR_FLAGS_SCISSOR 0x00000100 +#define NVC0_3D_CLEAR_FLAGS_VIEWPORT 0x00001000 + +#define NVC0_3D_VERTEX_ID 0x00001118 + +#define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c +#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff +#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001 +#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000 +#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000 +#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000 + +#define NVC0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0)) +#define NVC0_3D_VTX_ATTR_DATA__ESIZE 0x00000004 +#define NVC0_3D_VTX_ATTR_DATA__LEN 0x00000004 + +#define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0) (0x00001160 + 0x4*(i0)) +#define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE 0x00000004 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN 0x00000020 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK 0x0000001f +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT 0 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST 0x00000040 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK 0x001fff80 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT 7 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__MASK 0x07e00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__SHIFT 21 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32_32 0x00200000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32 0x00400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16_16 0x00600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32 0x00800000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16 0x00a00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8_8 0x01400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16 0x01e00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 0x02400000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8 0x02600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8 0x03000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16 0x03600000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8 0x03a00000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_10_10_10_2 0x06000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK 0x38000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT 27 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM 0x08000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM 0x10000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SINT 0x18000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT 0x20000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_USCALED 0x28000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED 0x30000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT 0x38000000 +#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BGRA 0x80000000 + +#define NVC0_3D_RT_CONTROL 0x0000121c +#define NVC0_3D_RT_CONTROL_COUNT__MASK 0x0000000f +#define NVC0_3D_RT_CONTROL_COUNT__SHIFT 0 +#define NVC0_3D_RT_CONTROL_MAP0__MASK 0x00000070 +#define NVC0_3D_RT_CONTROL_MAP0__SHIFT 4 +#define NVC0_3D_RT_CONTROL_MAP1__MASK 0x00000380 +#define NVC0_3D_RT_CONTROL_MAP1__SHIFT 7 +#define NVC0_3D_RT_CONTROL_MAP2__MASK 0x00001c00 +#define NVC0_3D_RT_CONTROL_MAP2__SHIFT 10 +#define NVC0_3D_RT_CONTROL_MAP3__MASK 0x0000e000 +#define NVC0_3D_RT_CONTROL_MAP3__SHIFT 13 +#define NVC0_3D_RT_CONTROL_MAP4__MASK 0x00070000 +#define NVC0_3D_RT_CONTROL_MAP4__SHIFT 16 +#define NVC0_3D_RT_CONTROL_MAP5__MASK 0x00380000 +#define NVC0_3D_RT_CONTROL_MAP5__SHIFT 19 +#define NVC0_3D_RT_CONTROL_MAP6__MASK 0x01c00000 +#define NVC0_3D_RT_CONTROL_MAP6__SHIFT 22 +#define NVC0_3D_RT_CONTROL_MAP7__MASK 0x0e000000 +#define NVC0_3D_RT_CONTROL_MAP7__SHIFT 25 + +#define NVC0_3D_ZETA_HORIZ 0x00001228 + +#define NVC0_3D_ZETA_VERT 0x0000122c + +#define NVC0_3D_ZETA_ARRAY_MODE 0x00001230 +#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff +#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0 +#define NVC0_3D_ZETA_ARRAY_MODE_UNK 0x00010000 + +#define NVC0_3D_LINKED_TSC 0x00001234 + +#define NVC0_3D_DRAW_TFB_BYTES 0x0000123c + +#define NVC0_3D_FP_RESULT_COUNT 0x00001298 + +#define NVC0_3D_DEPTH_TEST_ENABLE 0x000012cc + +#define NVC0_3D_D3D_FILL_MODE 0x000012d0 +#define NVC0_3D_D3D_FILL_MODE_POINT 0x00000001 +#define NVC0_3D_D3D_FILL_MODE_WIREFRAME 0x00000002 +#define NVC0_3D_D3D_FILL_MODE_SOLID 0x00000003 + +#define NVC0_3D_SHADE_MODEL 0x000012d4 +#define NVC0_3D_SHADE_MODEL_FLAT 0x00001d00 +#define NVC0_3D_SHADE_MODEL_SMOOTH 0x00001d01 + +#define NVC0_3D_BLEND_INDEPENDENT 0x000012e4 + +#define NVC0_3D_DEPTH_WRITE_ENABLE 0x000012e8 + +#define NVC0_3D_ALPHA_TEST_ENABLE 0x000012ec + +#define NVC0_3D_VB_ELEMENT_U8_SETUP 0x00001300 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30 +#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff +#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0 + +#define NVC0_3D_VB_ELEMENT_U8 0x00001304 +#define NVC0_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff +#define NVC0_3D_VB_ELEMENT_U8_I0__SHIFT 0 +#define NVC0_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00 +#define NVC0_3D_VB_ELEMENT_U8_I1__SHIFT 8 +#define NVC0_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000 +#define NVC0_3D_VB_ELEMENT_U8_I2__SHIFT 16 +#define NVC0_3D_VB_ELEMENT_U8_I3__MASK 0xff000000 +#define NVC0_3D_VB_ELEMENT_U8_I3__SHIFT 24 + +#define NVC0_3D_D3D_CULL_MODE 0x00001308 +#define NVC0_3D_D3D_CULL_MODE_NONE 0x00000001 +#define NVC0_3D_D3D_CULL_MODE_FRONT 0x00000002 +#define NVC0_3D_D3D_CULL_MODE_BACK 0x00000003 + +#define NVC0_3D_DEPTH_TEST_FUNC 0x0000130c +#define NVC0_3D_DEPTH_TEST_FUNC_NEVER 0x00000200 +#define NVC0_3D_DEPTH_TEST_FUNC_LESS 0x00000201 +#define NVC0_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202 +#define NVC0_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_DEPTH_TEST_FUNC_GREATER 0x00000204 +#define NVC0_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_ALPHA_TEST_REF 0x00001310 + +#define NVC0_3D_ALPHA_TEST_FUNC 0x00001314 +#define NVC0_3D_ALPHA_TEST_FUNC_NEVER 0x00000200 +#define NVC0_3D_ALPHA_TEST_FUNC_LESS 0x00000201 +#define NVC0_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202 +#define NVC0_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NVC0_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_DRAW_TFB_STRIDE 0x00001318 +#define NVC0_3D_DRAW_TFB_STRIDE__MIN 0x00000001 +#define NVC0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff + +#define NVC0_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0)) +#define NVC0_3D_BLEND_COLOR__ESIZE 0x00000004 +#define NVC0_3D_BLEND_COLOR__LEN 0x00000004 + +#define NVC0_3D_TSC_FLUSH 0x00001330 +#define NVC0_3D_TSC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_3D_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_3D_TIC_FLUSH 0x00001334 +#define NVC0_3D_TIC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_3D_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_3D_TEX_CACHE_CTL 0x00001338 +#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030 +#define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT 4 + +#define NVC0_3D_BLEND_SEPARATE_ALPHA 0x0000133c + +#define NVC0_3D_BLEND_EQUATION_RGB 0x00001340 +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NVC0_3D_BLEND_EQUATION_RGB_MIN 0x00008007 +#define NVC0_3D_BLEND_EQUATION_RGB_MAX 0x00008008 +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_BLEND_FUNC_SRC_RGB 0x00001344 + +#define NVC0_3D_BLEND_FUNC_DST_RGB 0x00001348 + +#define NVC0_3D_BLEND_EQUATION_ALPHA 0x0000134c +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NVC0_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NVC0_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_BLEND_FUNC_SRC_ALPHA 0x00001350 + +#define NVC0_3D_BLEND_FUNC_DST_ALPHA 0x00001358 + +#define NVC0_3D_BLEND_ENABLE_COMMON 0x0000135c + +#define NVC0_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0)) +#define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004 +#define NVC0_3D_BLEND_ENABLE__LEN 0x00000008 + +#define NVC0_3D_STENCIL_ENABLE 0x00001380 + +#define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394 + +#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x00001398 + +#define NVC0_3D_STENCIL_FRONT_MASK 0x0000139c + +#define NVC0_3D_DRAW_TFB_BASE 0x000013a4 + +#define NVC0_3D_FRAG_COLOR_CLAMP_EN 0x000013a8 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000 +#define NVC0_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000 + +#define NVC0_3D_SCREEN_Y_CONTROL 0x000013ac +#define NVC0_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001 +#define NVC0_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010 + +#define NVC0_3D_LINE_WIDTH_SMOOTH 0x000013b0 + +#define NVC0_3D_LINE_WIDTH_ALIASED 0x000013b4 + +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420 +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001 +#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400 + +#define NVC0_3D_VERTEX_ARRAY_FLUSH 0x0000142c + +#define NVC0_3D_VB_ELEMENT_BASE 0x00001434 + +#define NVC0_3D_VB_INSTANCE_BASE 0x00001438 + +#define NVC0_3D_CODE_CB_FLUSH 0x00001440 + +#define NVC0_3D_CLIPID_HEIGHT 0x00001504 +#define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000 + +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ 0x00001508 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16 + +#define NVC0_3D_CLIPID_FILL_RECT_VERT 0x0000150c +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16 + +#define NVC0_3D_CLIP_DISTANCE_ENABLE 0x00001510 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_0 0x00000001 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_1 0x00000002 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_2 0x00000004 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_3 0x00000008 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_4 0x00000010 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_5 0x00000020 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_6 0x00000040 +#define NVC0_3D_CLIP_DISTANCE_ENABLE_7 0x00000080 + +#define NVC0_3D_SAMPLECNT_ENABLE 0x00001514 + +#define NVC0_3D_POINT_SIZE 0x00001518 + +#define NVC0_3D_ZCULL_STATCTRS_ENABLE 0x0000151c + +#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520 + +#define NVC0_3D_COUNTER_RESET 0x00001530 +#define NVC0_3D_COUNTER_RESET_SAMPLECNT 0x00000001 +#define NVC0_3D_COUNTER_RESET_UNK02 0x00000002 +#define NVC0_3D_COUNTER_RESET_UNK03 0x00000003 +#define NVC0_3D_COUNTER_RESET_UNK04 0x00000004 +#define NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES 0x00000010 +#define NVC0_3D_COUNTER_RESET_UNK11 0x00000011 +#define NVC0_3D_COUNTER_RESET_UNK12 0x00000012 +#define NVC0_3D_COUNTER_RESET_UNK13 0x00000013 +#define NVC0_3D_COUNTER_RESET_UNK15 0x00000015 +#define NVC0_3D_COUNTER_RESET_UNK16 0x00000016 +#define NVC0_3D_COUNTER_RESET_UNK17 0x00000017 +#define NVC0_3D_COUNTER_RESET_UNK18 0x00000018 +#define NVC0_3D_COUNTER_RESET_UNK1A 0x0000001a +#define NVC0_3D_COUNTER_RESET_UNK1B 0x0000001b +#define NVC0_3D_COUNTER_RESET_UNK1C 0x0000001c +#define NVC0_3D_COUNTER_RESET_UNK1D 0x0000001d +#define NVC0_3D_COUNTER_RESET_UNK1E 0x0000001e +#define NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x0000001f + +#define NVC0_3D_MULTISAMPLE_ENABLE 0x00001534 + +#define NVC0_3D_ZETA_ENABLE 0x00001538 + +#define NVC0_3D_MULTISAMPLE_CTRL 0x0000153c +#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001 +#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010 + +#define NVC0_3D_COND_ADDRESS_HIGH 0x00001550 + +#define NVC0_3D_COND_ADDRESS_LOW 0x00001554 + +#define NVC0_3D_COND_MODE 0x00001558 +#define NVC0_3D_COND_MODE_NEVER 0x00000000 +#define NVC0_3D_COND_MODE_ALWAYS 0x00000001 +#define NVC0_3D_COND_MODE_RES_NON_ZERO 0x00000002 +#define NVC0_3D_COND_MODE_EQUAL 0x00000003 +#define NVC0_3D_COND_MODE_NOT_EQUAL 0x00000004 + +#define NVC0_3D_TSC_ADDRESS_HIGH 0x0000155c + +#define NVC0_3D_TSC_ADDRESS_LOW 0x00001560 +#define NVC0_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020 + +#define NVC0_3D_TSC_LIMIT 0x00001564 +#define NVC0_3D_TSC_LIMIT__MAX 0x00001fff + +#define NVC0_3D_POLYGON_OFFSET_FACTOR 0x0000156c + +#define NVC0_3D_LINE_SMOOTH_ENABLE 0x00001570 + +#define NVC0_3D_TIC_ADDRESS_HIGH 0x00001574 + +#define NVC0_3D_TIC_ADDRESS_LOW 0x00001578 + +#define NVC0_3D_TIC_LIMIT 0x0000157c + +#define NVC0_3D_ZCULL_REGION 0x00001590 + +#define NVC0_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594 + +#define NVC0_3D_STENCIL_BACK_OP_FAIL 0x00001598 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_BACK_OP_ZPASS 0x000015a0 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 +#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 + +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 +#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 + +#define NVC0_3D_CSAA_ENABLE 0x000015b4 + +#define NVC0_3D_FRAMEBUFFER_SRGB 0x000015b8 + +#define NVC0_3D_POLYGON_OFFSET_UNITS 0x000015bc + +#define NVC0_3D_LAYER 0x000015cc +#define NVC0_3D_LAYER_IDX__MASK 0x0000ffff +#define NVC0_3D_LAYER_IDX__SHIFT 0 +#define NVC0_3D_LAYER_USE_GP 0x00010000 + +#define NVC0_3D_MULTISAMPLE_MODE 0x000015d0 +#define NVC0_3D_MULTISAMPLE_MODE_MS1 0x00000000 +#define NVC0_3D_MULTISAMPLE_MODE_MS2 0x00000001 +#define NVC0_3D_MULTISAMPLE_MODE_MS4 0x00000002 +#define NVC0_3D_MULTISAMPLE_MODE_MS8 0x00000003 +#define NVC0_3D_MULTISAMPLE_MODE_MS8_ALT 0x00000004 +#define NVC0_3D_MULTISAMPLE_MODE_MS2_ALT 0x00000005 +#define NVC0_3D_MULTISAMPLE_MODE_UNK6 0x00000006 +#define NVC0_3D_MULTISAMPLE_MODE_MS4_CS4 0x00000008 +#define NVC0_3D_MULTISAMPLE_MODE_MS4_CS12 0x00000009 +#define NVC0_3D_MULTISAMPLE_MODE_MS8_CS8 0x0000000a +#define NVC0_3D_MULTISAMPLE_MODE_MS8_CS24 0x0000000b + +#define NVC0_3D_VERTEX_BEGIN_D3D 0x000015d4 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NVC0_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000 + +#define NVC0_3D_VERTEX_END_D3D 0x000015d8 +#define NVC0_3D_VERTEX_END_D3D_UNK0 0x00000001 +#define NVC0_3D_VERTEX_END_D3D_UNK1 0x00000002 + +#define NVC0_3D_EDGEFLAG 0x000015e4 + +#define NVC0_3D_VB_ELEMENT_U32 0x000015e8 + +#define NVC0_3D_VB_ELEMENT_U16_SETUP 0x000015ec +#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30 +#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff +#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0 + +#define NVC0_3D_VB_ELEMENT_U16 0x000015f0 +#define NVC0_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff +#define NVC0_3D_VB_ELEMENT_U16_I0__SHIFT 0 +#define NVC0_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000 +#define NVC0_3D_VB_ELEMENT_U16_I1__SHIFT 16 + +#define NVC0_3D_VERTEX_BASE_HIGH 0x000015f4 + +#define NVC0_3D_VERTEX_BASE_LOW 0x000015f8 + +#define NVC0_3D_ZCULL_WINDOW_OFFSET_X 0x000015fc + +#define NVC0_3D_ZCULL_WINDOW_OFFSET_Y 0x00001600 + +#define NVC0_3D_POINT_COORD_REPLACE 0x00001604 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__MASK 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__SHIFT 2 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT 0x00000000 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__MASK 0x000007f8 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__SHIFT 3 + +#define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608 + +#define NVC0_3D_CODE_ADDRESS_LOW 0x0000160c + +#define NVC0_3D_VERTEX_END_GL 0x00001614 +#define NVC0_3D_VERTEX_END_GL_UNK0 0x00000001 +#define NVC0_3D_VERTEX_END_GL_UNK1 0x00000002 + +#define NVC0_3D_VERTEX_BEGIN_GL 0x00001618 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009 +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d +#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES 0x0000000e +#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x04000000 +#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x08000000 + +#define NVC0_3D_VERTEX_ID_REPLACE 0x0000161c +#define NVC0_3D_VERTEX_ID_REPLACE_ENABLE 0x00000001 +#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE__MASK 0x00000ff0 +#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT 4 + +#define NVC0_3D_VERTEX_DATA 0x00001640 + +#define NVC0_3D_PRIM_RESTART_ENABLE 0x00001644 + +#define NVC0_3D_PRIM_RESTART_INDEX 0x00001648 + +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100 +#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000 + +#define NVC0_3D_POINT_SMOOTH_ENABLE 0x00001658 + +#define NVC0_3D_POINT_RASTER_RULES 0x0000165c +#define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000 +#define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001 + +#define NVC0_3D_TEX_MISC 0x00001664 +#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004 + +#define NVC0_3D_LINE_STIPPLE_ENABLE 0x0000166c + +#define NVC0_3D_LINE_STIPPLE_PATTERN 0x00001680 + +#define NVC0_3D_PROVOKING_VERTEX_LAST 0x00001684 + +#define NVC0_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688 + +#define NVC0_3D_POLYGON_STIPPLE_ENABLE 0x0000168c + +#define NVC0_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0)) +#define NVC0_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004 +#define NVC0_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020 + +#define NVC0_3D_ZETA_BASE_LAYER 0x0000179c + +#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_HIGH 0x000017bc + +#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_LOW 0x000017c0 + +#define NVC0_3D_VERTEX_QUARANTINE_SIZE 0x000017c4 +#define NVC0_3D_VERTEX_QUARANTINE_SIZE_16K 0x00000001 +#define NVC0_3D_VERTEX_QUARANTINE_SIZE_32K 0x00000002 +#define NVC0_3D_VERTEX_QUARANTINE_SIZE_64K 0x00000003 + +#define NVC0_3D_STRMOUT_UNK1780(i0) (0x00001780 + 0x4*(i0)) +#define NVC0_3D_STRMOUT_UNK1780__ESIZE 0x00000004 +#define NVC0_3D_STRMOUT_UNK1780__LEN 0x00000004 + +#define NVC0_3D_UNK17BC_ADDRESS_HIGH 0x000017bc + +#define NVC0_3D_UNK17BC_ADDRESS_LOW 0x000017c0 + +#define NVC0_3D_UNK17BC_LIMIT 0x000017c4 + +#define NVC0_3D_INDEX_ARRAY_START_HIGH 0x000017c8 + +#define NVC0_3D_INDEX_ARRAY_START_LOW 0x000017cc + +#define NVC0_3D_INDEX_ARRAY_LIMIT_HIGH 0x000017d0 + +#define NVC0_3D_INDEX_ARRAY_LIMIT_LOW 0x000017d4 + +#define NVC0_3D_INDEX_LOG2_SIZE 0x000017d8 + +#define NVC0_3D_INDEX_BATCH_FIRST 0x000017dc + +#define NVC0_3D_INDEX_BATCH_COUNT 0x000017e0 + +#define NVC0_3D_POLYGON_OFFSET_CLAMP 0x0000187c + +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001880 + 0x4*(i0)) +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004 +#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000020 + +#define NVC0_3D_VP_POINT_SIZE_EN 0x00001910 + +#define NVC0_3D_CULL_FACE_ENABLE 0x00001918 + +#define NVC0_3D_FRONT_FACE 0x0000191c +#define NVC0_3D_FRONT_FACE_CW 0x00000900 +#define NVC0_3D_FRONT_FACE_CCW 0x00000901 + +#define NVC0_3D_CULL_FACE 0x00001920 +#define NVC0_3D_CULL_FACE_FRONT 0x00000404 +#define NVC0_3D_CULL_FACE_BACK 0x00000405 +#define NVC0_3D_CULL_FACE_FRONT_AND_BACK 0x00000408 + +#define NVC0_3D_LINE_LAST_PIXEL 0x00001924 + +#define NVC0_3D_VIEWPORT_TRANSFORM_EN 0x0000192c + +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1 0x00000001 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK 0x00000006 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT 1 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 0x00000002 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK2 0x00000004 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000 + +#define NVC0_3D_CLIP_DISTANCE_MODE 0x00001940 +#define NVC0_3D_CLIP_DISTANCE_MODE_0__MASK 0x00000001 +#define NVC0_3D_CLIP_DISTANCE_MODE_0__SHIFT 0 +#define NVC0_3D_CLIP_DISTANCE_MODE_0_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_0_CULL 0x00000001 +#define NVC0_3D_CLIP_DISTANCE_MODE_1__MASK 0x00000010 +#define NVC0_3D_CLIP_DISTANCE_MODE_1__SHIFT 4 +#define NVC0_3D_CLIP_DISTANCE_MODE_1_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_1_CULL 0x00000010 +#define NVC0_3D_CLIP_DISTANCE_MODE_2__MASK 0x00000100 +#define NVC0_3D_CLIP_DISTANCE_MODE_2__SHIFT 8 +#define NVC0_3D_CLIP_DISTANCE_MODE_2_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_2_CULL 0x00000100 +#define NVC0_3D_CLIP_DISTANCE_MODE_3__MASK 0x00001000 +#define NVC0_3D_CLIP_DISTANCE_MODE_3__SHIFT 12 +#define NVC0_3D_CLIP_DISTANCE_MODE_3_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_3_CULL 0x00001000 +#define NVC0_3D_CLIP_DISTANCE_MODE_4__MASK 0x00010000 +#define NVC0_3D_CLIP_DISTANCE_MODE_4__SHIFT 16 +#define NVC0_3D_CLIP_DISTANCE_MODE_4_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_4_CULL 0x00010000 +#define NVC0_3D_CLIP_DISTANCE_MODE_5__MASK 0x00100000 +#define NVC0_3D_CLIP_DISTANCE_MODE_5__SHIFT 20 +#define NVC0_3D_CLIP_DISTANCE_MODE_5_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_5_CULL 0x00100000 +#define NVC0_3D_CLIP_DISTANCE_MODE_6__MASK 0x01000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_6__SHIFT 24 +#define NVC0_3D_CLIP_DISTANCE_MODE_6_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_6_CULL 0x01000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_7__MASK 0x10000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_7__SHIFT 28 +#define NVC0_3D_CLIP_DISTANCE_MODE_7_CLIP 0x00000000 +#define NVC0_3D_CLIP_DISTANCE_MODE_7_CULL 0x10000000 + +#define NVC0_3D_CLIP_RECTS_EN 0x0000194c + +#define NVC0_3D_CLIP_RECTS_MODE 0x00001950 +#define NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000 +#define NVC0_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001 +#define NVC0_3D_CLIP_RECTS_MODE_NEVER 0x00000002 + +#define NVC0_3D_ZCULL_INVALIDATE 0x00001958 + +#define NVC0_3D_ZCULL_TEST_MASK 0x0000196c +#define NVC0_3D_ZCULL_TEST_MASK_FAIL_GT_PASS_LT 0x00000001 +#define NVC0_3D_ZCULL_TEST_MASK_PASS_GT_FAIL_LT 0x00000010 + +#define NVC0_3D_FP_ZORDER_CTRL 0x0000196c +#define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001 +#define NVC0_3D_FP_ZORDER_CTRL_1 0x00000010 + +#define NVC0_3D_CLIPID_ENABLE 0x0000197c + +#define NVC0_3D_CLIPID_WIDTH 0x00001980 +#define NVC0_3D_CLIPID_WIDTH__MAX 0x00002000 +#define NVC0_3D_CLIPID_WIDTH__ALIGN 0x00000040 + +#define NVC0_3D_CLIPID_ID 0x00001984 + +#define NVC0_3D_DEPTH_BOUNDS_EN 0x000019bc + +#define NVC0_3D_LOGIC_OP_ENABLE 0x000019c4 + +#define NVC0_3D_LOGIC_OP 0x000019c8 +#define NVC0_3D_LOGIC_OP_CLEAR 0x00001500 +#define NVC0_3D_LOGIC_OP_AND 0x00001501 +#define NVC0_3D_LOGIC_OP_AND_REVERSE 0x00001502 +#define NVC0_3D_LOGIC_OP_COPY 0x00001503 +#define NVC0_3D_LOGIC_OP_AND_INVERTED 0x00001504 +#define NVC0_3D_LOGIC_OP_NOOP 0x00001505 +#define NVC0_3D_LOGIC_OP_XOR 0x00001506 +#define NVC0_3D_LOGIC_OP_OR 0x00001507 +#define NVC0_3D_LOGIC_OP_NOR 0x00001508 +#define NVC0_3D_LOGIC_OP_EQUIV 0x00001509 +#define NVC0_3D_LOGIC_OP_INVERT 0x0000150a +#define NVC0_3D_LOGIC_OP_OR_REVERSE 0x0000150b +#define NVC0_3D_LOGIC_OP_COPY_INVERTED 0x0000150c +#define NVC0_3D_LOGIC_OP_OR_INVERTED 0x0000150d +#define NVC0_3D_LOGIC_OP_NAND 0x0000150e +#define NVC0_3D_LOGIC_OP_SET 0x0000150f + +#define NVC0_3D_ZETA_COMP_ENABLE 0x000019cc + +#define NVC0_3D_CLEAR_BUFFERS 0x000019d0 +#define NVC0_3D_CLEAR_BUFFERS_Z 0x00000001 +#define NVC0_3D_CLEAR_BUFFERS_S 0x00000002 +#define NVC0_3D_CLEAR_BUFFERS_R 0x00000004 +#define NVC0_3D_CLEAR_BUFFERS_G 0x00000008 +#define NVC0_3D_CLEAR_BUFFERS_B 0x00000010 +#define NVC0_3D_CLEAR_BUFFERS_A 0x00000020 +#define NVC0_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0 +#define NVC0_3D_CLEAR_BUFFERS_RT__SHIFT 6 +#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00 +#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10 + +#define NVC0_3D_CLIPID_FILL 0x000019d4 + +#define NVC0_3D_RT_COMP_ENABLE(i0) (0x000019e0 + 0x4*(i0)) +#define NVC0_3D_RT_COMP_ENABLE__ESIZE 0x00000004 +#define NVC0_3D_RT_COMP_ENABLE__LEN 0x00000008 + +#define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0)) +#define NVC0_3D_COLOR_MASK__ESIZE 0x00000004 +#define NVC0_3D_COLOR_MASK__LEN 0x00000008 +#define NVC0_3D_COLOR_MASK_R 0x0000000f +#define NVC0_3D_COLOR_MASK_G 0x000000f0 +#define NVC0_3D_COLOR_MASK_B 0x00000f00 +#define NVC0_3D_COLOR_MASK_A 0x0000f000 + +#define NVC0_3D_QUERY_ADDRESS_HIGH 0x00001b00 + +#define NVC0_3D_QUERY_ADDRESS_LOW 0x00001b04 + +#define NVC0_3D_QUERY_SEQUENCE 0x00001b08 + +#define NVC0_3D_QUERY_GET 0x00001b0c +#define NVC0_3D_QUERY_GET_MODE__MASK 0x00000003 +#define NVC0_3D_QUERY_GET_MODE__SHIFT 0 +#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000 +#define NVC0_3D_QUERY_GET_MODE_SYNC 0x00000001 +#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002 +#define NVC0_3D_QUERY_GET_FENCE 0x00000010 +#define NVC0_3D_QUERY_GET_STREAM__MASK 0x000000e0 +#define NVC0_3D_QUERY_GET_STREAM__SHIFT 5 +#define NVC0_3D_QUERY_GET_UNK8 0x00000100 +#define NVC0_3D_QUERY_GET_UNIT__MASK 0x0000f000 +#define NVC0_3D_QUERY_GET_UNIT__SHIFT 12 +#define NVC0_3D_QUERY_GET_SYNC_COND__MASK 0x00010000 +#define NVC0_3D_QUERY_GET_SYNC_COND__SHIFT 16 +#define NVC0_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000 +#define NVC0_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000 +#define NVC0_3D_QUERY_GET_INTR 0x00100000 +#define NVC0_3D_QUERY_GET_UNK21 0x00200000 +#define NVC0_3D_QUERY_GET_SELECT__MASK 0x0f800000 +#define NVC0_3D_QUERY_GET_SELECT__SHIFT 23 +#define NVC0_3D_QUERY_GET_SELECT_ZERO 0x00000000 +#define NVC0_3D_QUERY_GET_SELECT_SAMPLECNT 0x01000000 +#define NVC0_3D_QUERY_GET_SELECT_EMITTED_PRIMS 0x05800000 +#define NVC0_3D_QUERY_GET_SELECT_GENERATED_PRIMS 0x09000000 +#define NVC0_3D_QUERY_GET_SHORT 0x10000000 + +#define NVC0_3D_VERTEX_ARRAY_FETCH(i0) (0x00001c00 + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_FETCH__LEN 0x00000020 +#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff +#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0 +#define NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE 0x00001000 + +#define NVC0_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00001c04 + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000020 + +#define NVC0_3D_VERTEX_ARRAY_START_LOW(i0) (0x00001c08 + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000020 + +#define NVC0_3D_VERTEX_ARRAY_DIVISOR(i0) (0x00001c0c + 0x10*(i0)) +#define NVC0_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010 +#define NVC0_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000020 + +#define NVC0_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0)) +#define NVC0_3D_IBLEND__ESIZE 0x00000020 +#define NVC0_3D_IBLEND__LEN 0x00000008 + +#define NVC0_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0)) +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NVC0_3D_IBLEND_EQUATION_RGB_MIN 0x00008007 +#define NVC0_3D_IBLEND_EQUATION_RGB_MAX 0x00008008 +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0)) + +#define NVC0_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0)) + +#define NVC0_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0)) +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b + +#define NVC0_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0)) + +#define NVC0_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0)) + +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001f00 + 0x8*(i0)) +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000020 + +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001f04 + 0x8*(i0)) +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008 +#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000020 + +#define NVC0_3D_SP(i0) (0x00002000 + 0x40*(i0)) +#define NVC0_3D_SP__ESIZE 0x00000040 +#define NVC0_3D_SP__LEN 0x00000006 + +#define NVC0_3D_SP_SELECT(i0) (0x00002000 + 0x40*(i0)) +#define NVC0_3D_SP_SELECT_ENABLE 0x00000001 +#define NVC0_3D_SP_SELECT_PROGRAM__MASK 0x00000070 +#define NVC0_3D_SP_SELECT_PROGRAM__SHIFT 4 +#define NVC0_3D_SP_SELECT_PROGRAM_VP_A 0x00000000 +#define NVC0_3D_SP_SELECT_PROGRAM_VP_B 0x00000010 +#define NVC0_3D_SP_SELECT_PROGRAM_TCP 0x00000020 +#define NVC0_3D_SP_SELECT_PROGRAM_TEP 0x00000030 +#define NVC0_3D_SP_SELECT_PROGRAM_GP 0x00000040 +#define NVC0_3D_SP_SELECT_PROGRAM_FP 0x00000050 + +#define NVC0_3D_SP_START_ID(i0) (0x00002004 + 0x40*(i0)) + +#define NVC0_3D_SP_GPR_ALLOC(i0) (0x0000200c + 0x40*(i0)) + +#define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0)) +#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010 +#define NVC0_3D_TEX_LIMITS__LEN 0x00000005 + +#define NVC0_3D_FIRMWARE(i0) (0x00002300 + 0x4*(i0)) +#define NVC0_3D_FIRMWARE__ESIZE 0x00000004 +#define NVC0_3D_FIRMWARE__LEN 0x00000020 + +#define NVC0_3D_CB_SIZE 0x00002380 + +#define NVC0_3D_CB_ADDRESS_HIGH 0x00002384 + +#define NVC0_3D_CB_ADDRESS_LOW 0x00002388 + +#define NVC0_3D_CB_POS 0x0000238c + +#define NVC0_3D_CB_DATA(i0) (0x00002390 + 0x4*(i0)) +#define NVC0_3D_CB_DATA__ESIZE 0x00000004 +#define NVC0_3D_CB_DATA__LEN 0x00000010 + +#define NVC0_3D_BIND_TSC(i0) (0x00002400 + 0x20*(i0)) +#define NVC0_3D_BIND_TSC__ESIZE 0x00000020 +#define NVC0_3D_BIND_TSC__LEN 0x00000005 +#define NVC0_3D_BIND_TSC_ACTIVE 0x00000001 +#define NVC0_3D_BIND_TSC_SAMPLER__MASK 0x00000ff0 +#define NVC0_3D_BIND_TSC_SAMPLER__SHIFT 4 +#define NVC0_3D_BIND_TSC_TSC__MASK 0x01fff000 +#define NVC0_3D_BIND_TSC_TSC__SHIFT 12 + +#define NVC0_3D_BIND_TIC(i0) (0x00002404 + 0x20*(i0)) +#define NVC0_3D_BIND_TIC__ESIZE 0x00000020 +#define NVC0_3D_BIND_TIC__LEN 0x00000005 +#define NVC0_3D_BIND_TIC_ACTIVE 0x00000001 +#define NVC0_3D_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NVC0_3D_BIND_TIC_TEXTURE__SHIFT 1 +#define NVC0_3D_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NVC0_3D_BIND_TIC_TIC__SHIFT 9 + +#define NVC0_3D_CB_BIND(i0) (0x00002410 + 0x20*(i0)) +#define NVC0_3D_CB_BIND__ESIZE 0x00000020 +#define NVC0_3D_CB_BIND__LEN 0x00000005 +#define NVC0_3D_CB_BIND_VALID 0x00000001 +#define NVC0_3D_CB_BIND_INDEX__MASK 0x000000f0 +#define NVC0_3D_CB_BIND_INDEX__SHIFT 4 + +#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600 + +#define NVE4_3D_TEX_CB_INDEX 0x00002608 +#define NVE4_3D_TEX_CB_INDEX__MIN 0x00000000 +#define NVE4_3D_TEX_CB_INDEX__MAX 0x00000010 + +#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1)) +#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 +#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020 + +#define NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE 0x00003800 + +#define NVC0_3D_MACRO_VERTEX_ARRAY_SELECT 0x00003808 + +#define NVC0_3D_MACRO_BLEND_ENABLES 0x00003810 + +#define NVC0_3D_MACRO_POLYGON_MODE_FRONT 0x00003818 +#define NVC0_3D_MACRO_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NVC0_3D_MACRO_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NVC0_3D_MACRO_POLYGON_MODE_FRONT_FILL 0x00001b02 + +#define NVC0_3D_MACRO_POLYGON_MODE_BACK 0x00003820 +#define NVC0_3D_MACRO_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NVC0_3D_MACRO_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NVC0_3D_MACRO_POLYGON_MODE_BACK_FILL 0x00001b02 + +#define NVC0_3D_MACRO_GP_SELECT 0x00003828 + +#define NVC0_3D_MACRO_TEP_SELECT 0x00003830 + + +#endif /* NVC0_3D_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h new file mode 100644 index 00000000000..84b152213a2 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h @@ -0,0 +1,98 @@ +#ifndef NV_3DDEFS_XML +#define NV_3DDEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) +- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38) +- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28) +- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20) +- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000 +#define NV50_3D_BLEND_FACTOR_ONE 0x00004001 +#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303 +#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305 +#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307 +#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308 +#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002 +#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004 +#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901 +#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902 +#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903 + +#endif /* NV_3DDEFS_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c new file mode 100644 index 00000000000..b49f1aecfec --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -0,0 +1,271 @@ +/* + * Copyright 2013 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christoph Bumiller, Samuel Pitoiset + */ + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_compute.h" + +int +nvc0_screen_compute_setup(struct nvc0_screen *screen, + struct nouveau_pushbuf *push) +{ + struct nouveau_object *chan = screen->base.channel; + struct nouveau_device *dev = screen->base.device; + uint32_t obj_class; + int ret; + int i; + + switch (dev->chipset & 0xf0) { + case 0xc0: + if (dev->chipset == 0xc8) + obj_class = NVC8_COMPUTE_CLASS; + else + obj_class = NVC0_COMPUTE_CLASS; + break; + case 0xd0: + obj_class = NVC0_COMPUTE_CLASS; + break; + default: + NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); + return -1; + } + + ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0, + &screen->compute); + if (ret) { + NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); + return ret; + } + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL, + &screen->parm); + if (ret) + return ret; + + BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->compute->oclass); + + /* hardware limit */ + BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1); + PUSH_DATA (push, screen->mp_count); + BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1); + PUSH_DATA (push, 0xf); + + BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1); + PUSH_DATA (push, 0x8000); + + /* global memory setup */ + BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + PUSH_DATA (push, 0); + BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100); + for (i = 0; i <= 0xff; i++) + PUSH_DATA (push, (0xc << 28) | (i << 16) | i); + BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + PUSH_DATA (push, 1); + + /* local memory and cstack setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->tls->offset); + PUSH_DATA (push, screen->tls->offset); + BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2); + PUSH_DATAh(push, screen->tls->size); + PUSH_DATA (push, screen->tls->size); + BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1); + PUSH_DATA (push, 1 << 24); + + /* shared memory setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1); + PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); + BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1); + PUSH_DATA (push, 2 << 24); + BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1); + PUSH_DATA (push, 0); + + /* code segment setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->text->offset); + PUSH_DATA (push, screen->text->offset); + + /* bind parameters buffer */ + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + PUSH_DATA (push, screen->parm->size); + PUSH_DATAh(push, screen->parm->offset); + PUSH_DATA (push, screen->parm->offset); + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + PUSH_DATA (push, (0 << 8) | 1); + + /* TODO: textures & samplers */ + + return 0; +} + +boolean +nvc0_compute_validate_program(struct nvc0_context *nvc0) +{ + struct nvc0_program *prog = nvc0->compprog; + + if (prog->mem) + return TRUE; + + if (!prog->translated) { + prog->translated = nvc0_program_translate( + prog, nvc0->screen->base.device->chipset); + if (!prog->translated) + return FALSE; + } + if (unlikely(!prog->code_size)) + return FALSE; + + if (likely(prog->code_size)) { + if (nvc0_program_upload_code(nvc0, prog)) { + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE); + return TRUE; + } + } + return FALSE; +} + +static boolean +nvc0_compute_state_validate(struct nvc0_context *nvc0) +{ + if (!nvc0_compute_validate_program(nvc0)) + return FALSE; + + /* TODO: textures, samplers, surfaces, global memory buffers */ + + nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE); + + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp); + if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf))) + return FALSE; + if (unlikely(nvc0->state.flushed)) + nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE); + + return TRUE; + +} + +static void +nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + struct nvc0_program *cp = nvc0->compprog; + + if (cp->parm_size) { + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + PUSH_DATA (push, align(cp->parm_size, 0x100)); + PUSH_DATAh(push, screen->parm->offset); + PUSH_DATA (push, screen->parm->offset); + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + PUSH_DATA (push, (0 << 8) | 1); + /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */ + BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4); + PUSH_DATA (push, 0); + PUSH_DATAp(push, input, cp->parm_size / 4); + + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); + } +} + +void +nvc0_launch_grid(struct pipe_context *pipe, + const uint *block_layout, const uint *grid_layout, + uint32_t label, + const void *input) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *cp = nvc0->compprog; + unsigned s, i; + int ret; + + ret = !nvc0_compute_state_validate(nvc0); + if (ret) + goto out; + + nvc0_compute_upload_input(nvc0, input); + + BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1); + PUSH_DATA (push, nvc0_program_symbol_offset(cp, label)); + + BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3); + PUSH_DATA (push, align(cp->cp.lmem_size, 0x10)); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */ + + BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3); + PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); + PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]); + PUSH_DATA (push, cp->num_barriers); + BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1); + PUSH_DATA (push, cp->num_gprs); + + /* grid/block setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); + PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]); + PUSH_DATA (push, grid_layout[2]); + BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); + PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]); + PUSH_DATA (push, block_layout[2]); + + /* launch preliminary setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1); + PUSH_DATA (push, 0x1); + BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); + + /* kernel launching */ + BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1); + PUSH_DATA (push, 0x1000); + BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1); + PUSH_DATA (push, 0x1); + + /* rebind all the 3D constant buffers + * (looks like binding a CB on COMPUTE clobbers 3D state) */ + nvc0->dirty |= NVC0_NEW_CONSTBUF; + for (s = 0; s < 6; s++) { + for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++) + if (nvc0->constbuf[s][i].u.buf) + nvc0->constbuf_dirty[s] |= 1 << i; + } + memset(nvc0->state.uniform_buffer_bound, 0, + sizeof(nvc0->state.uniform_buffer_bound)); + +out: + if (ret) + NOUVEAU_ERR("Failed to launch grid !\n"); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h new file mode 100644 index 00000000000..9a1a71760d7 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h @@ -0,0 +1,10 @@ +#ifndef NVC0_COMPUTE_H +#define NVC0_COMPUTE_H + +#include "nv50/nv50_defs.xml.h" +#include "nvc0/nvc0_compute.xml.h" + +boolean +nvc0_compute_validate_program(struct nvc0_context *nvc0); + +#endif /* NVC0_COMPUTE_H */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h new file mode 100644 index 00000000000..35e6bfdbea2 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h @@ -0,0 +1,410 @@ +#ifndef NVC0_COMPUTE_XML +#define NVC0_COMPUTE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_compute.xml ( 11145 bytes, from 2013-04-27 14:00:13) +- copyright.xml ( 6452 bytes, from 2013-02-27 22:13:22) +- nvchipsets.xml ( 3954 bytes, from 2013-04-27 14:00:13) +- nv_object.xml ( 14395 bytes, from 2013-04-27 14:00:13) +- nv_defs.xml ( 4437 bytes, from 2013-02-27 22:13:22) +- nv50_defs.xml ( 16652 bytes, from 2013-06-20 13:45:33) + +Copyright (C) 2006-2013 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_COMPUTE_LOCAL_POS_ALLOC 0x00000204 + +#define NVC0_COMPUTE_LOCAL_NEG_ALLOC 0x00000208 + +#define NVC0_COMPUTE_WARP_CSTACK_SIZE 0x0000020c + +#define NVC0_COMPUTE_TEX_LIMITS 0x00000210 +#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f +#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0 +#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000 +#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004 +#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0 +#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4 +#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000 +#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007 + +#define NVC0_COMPUTE_SHARED_BASE 0x00000214 + +#define NVC0_COMPUTE_MEM_BARRIER 0x0000021c +#define NVC0_COMPUTE_MEM_BARRIER_UNK0 0x00000001 +#define NVC0_COMPUTE_MEM_BARRIER_UNK1 0x00000002 +#define NVC0_COMPUTE_MEM_BARRIER_UNK2 0x00000004 +#define NVC0_COMPUTE_MEM_BARRIER_UNK4 0x00000010 +#define NVC0_COMPUTE_MEM_BARRIER_UNK8 0x00000100 +#define NVC0_COMPUTE_MEM_BARRIER_UNK12 0x00001000 + +#define NVC0_COMPUTE_BIND_TSC 0x00000228 +#define NVC0_COMPUTE_BIND_TSC_ACTIVE 0x00000001 +#define NVC0_COMPUTE_BIND_TSC_SAMPLER__MASK 0x00000ff0 +#define NVC0_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4 +#define NVC0_COMPUTE_BIND_TSC_TSC__MASK 0x01fff000 +#define NVC0_COMPUTE_BIND_TSC_TSC__SHIFT 12 + +#define NVC0_COMPUTE_BIND_TIC 0x0000022c +#define NVC0_COMPUTE_BIND_TIC_ACTIVE 0x00000001 +#define NVC0_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NVC0_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1 +#define NVC0_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NVC0_COMPUTE_BIND_TIC_TIC__SHIFT 9 + +#define NVC0_COMPUTE_BIND_TSC2 0x00000230 +#define NVC0_COMPUTE_BIND_TSC2_ACTIVE 0x00000001 +#define NVC0_COMPUTE_BIND_TSC2_SAMPLER__MASK 0x00000010 +#define NVC0_COMPUTE_BIND_TSC2_SAMPLER__SHIFT 4 +#define NVC0_COMPUTE_BIND_TSC2_TSC__MASK 0x01fff000 +#define NVC0_COMPUTE_BIND_TSC2_TSC__SHIFT 12 + +#define NVC0_COMPUTE_BIND_TIC2 0x00000234 +#define NVC0_COMPUTE_BIND_TIC2_ACTIVE 0x00000001 +#define NVC0_COMPUTE_BIND_TIC2_TEXTURE__MASK 0x00000002 +#define NVC0_COMPUTE_BIND_TIC2_TEXTURE__SHIFT 1 +#define NVC0_COMPUTE_BIND_TIC2_TIC__MASK 0x7ffffe00 +#define NVC0_COMPUTE_BIND_TIC2_TIC__SHIFT 9 + +#define NVC0_COMPUTE_GRIDDIM_YX 0x00000238 +#define NVC0_COMPUTE_GRIDDIM_YX_X__MASK 0x0000ffff +#define NVC0_COMPUTE_GRIDDIM_YX_X__SHIFT 0 +#define NVC0_COMPUTE_GRIDDIM_YX_Y__MASK 0xffff0000 +#define NVC0_COMPUTE_GRIDDIM_YX_Y__SHIFT 16 + +#define NVC0_COMPUTE_GRIDDIM_Z 0x0000023c + +#define NVC0_COMPUTE_UNK244_TIC_FLUSH 0x00000244 + +#define NVC0_COMPUTE_SHARED_SIZE 0x0000024c + +#define NVC0_COMPUTE_THREADS_ALLOC 0x00000250 + +#define NVC0_COMPUTE_BARRIER_ALLOC 0x00000254 + +#define NVC0_COMPUTE_UNK028C 0x0000028c + +#define NVC0_COMPUTE_COMPUTE_BEGIN 0x0000029c +#define NVC0_COMPUTE_COMPUTE_BEGIN_UNK0 0x00000001 + +#define NVC0_COMPUTE_UNK02A0 0x000002a0 + +#define NVC0_COMPUTE_CP_GPR_ALLOC 0x000002c0 + +#define NVC0_COMPUTE_UNK02C4 0x000002c4 + +#define NVC0_COMPUTE_GLOBAL_BASE 0x000002c8 +#define NVC0_COMPUTE_GLOBAL_BASE_HIGH__MASK 0x000000ff +#define NVC0_COMPUTE_GLOBAL_BASE_HIGH__SHIFT 0 +#define NVC0_COMPUTE_GLOBAL_BASE_INDEX__MASK 0x00ff0000 +#define NVC0_COMPUTE_GLOBAL_BASE_INDEX__SHIFT 16 +#define NVC0_COMPUTE_GLOBAL_BASE_READ_OK 0x40000000 +#define NVC0_COMPUTE_GLOBAL_BASE_WRITE_OK 0x80000000 + +#define NVC8_COMPUTE_UNK02E0 0x000002e0 + +#define NVC0_COMPUTE_CACHE_SPLIT 0x00000308 +#define NVC0_COMPUTE_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001 +#define NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003 + +#define NVC0_COMPUTE_UNK030C 0x0000030c + +#define NVC0_COMPUTE_UNK0360 0x00000360 +#define NVC0_COMPUTE_UNK0360_UNK0 0x00000001 +#define NVC0_COMPUTE_UNK0360_UNK8__MASK 0x00000300 +#define NVC0_COMPUTE_UNK0360_UNK8__SHIFT 8 +#define NVC8_COMPUTE_UNK0360_UNK10__MASK 0x00000c00 +#define NVC8_COMPUTE_UNK0360_UNK10__SHIFT 10 + +#define NVC0_COMPUTE_LAUNCH 0x00000368 + +#define NVC0_COMPUTE_UNK036C 0x0000036c +#define NVC0_COMPUTE_UNK036C_UNK0__MASK 0x00000003 +#define NVC0_COMPUTE_UNK036C_UNK0__SHIFT 0 +#define NVC8_COMPUTE_UNK036C_UNK2__MASK 0x0000000c +#define NVC8_COMPUTE_UNK036C_UNK2__SHIFT 2 + +#define NVC0_COMPUTE_BLOCKDIM_YX 0x000003ac +#define NVC0_COMPUTE_BLOCKDIM_YX_X__MASK 0x0000ffff +#define NVC0_COMPUTE_BLOCKDIM_YX_X__SHIFT 0 +#define NVC0_COMPUTE_BLOCKDIM_YX_Y__MASK 0xffff0000 +#define NVC0_COMPUTE_BLOCKDIM_YX_Y__SHIFT 16 + +#define NVC0_COMPUTE_BLOCKDIM_Z 0x000003b0 + +#define NVC0_COMPUTE_CP_START_ID 0x000003b4 + +#define NVC0_COMPUTE_FIRMWARE(i0) (0x00000500 + 0x4*(i0)) +#define NVC0_COMPUTE_FIRMWARE__ESIZE 0x00000004 +#define NVC0_COMPUTE_FIRMWARE__LEN 0x00000020 + +#define NVC0_COMPUTE_MP_LIMIT 0x00000758 + +#define NVC0_COMPUTE_LOCAL_BASE 0x0000077c + +#define NVC0_COMPUTE_GRIDID 0x00000780 + +#define NVC0_COMPUTE_TEMP_ADDRESS_HIGH 0x00000790 + +#define NVC0_COMPUTE_TEMP_ADDRESS_LOW 0x00000794 + +#define NVC0_COMPUTE_TEMP_SIZE_HIGH 0x00000798 + +#define NVC0_COMPUTE_TEMP_SIZE_LOW 0x0000079c + +#define NVC0_COMPUTE_WARP_TEMP_ALLOC 0x000007a0 + +#define NVC0_COMPUTE_COMPUTE_END 0x00000a04 +#define NVC0_COMPUTE_COMPUTE_END_UNK0 0x00000001 + +#define NVC0_COMPUTE_UNK0A08 0x00000a08 + +#define NVC0_COMPUTE_CALL_LIMIT_LOG 0x00000d64 + +#define NVC0_COMPUTE_UNK0D94 0x00000d94 + +#define NVC0_COMPUTE_WATCHDOG_TIMER 0x00000de4 + +#define NVC0_COMPUTE_UNK10F4 0x000010f4 +#define NVC0_COMPUTE_UNK10F4_UNK0 0x00000001 +#define NVC0_COMPUTE_UNK10F4_UNK4 0x00000010 +#define NVC0_COMPUTE_UNK10F4_UNK8 0x00000100 + +#define NVC0_COMPUTE_LINKED_TSC 0x00001234 + +#define NVC0_COMPUTE_UNK1288_TIC_FLUSH 0x00001288 + +#define NVC0_COMPUTE_UNK12AC 0x000012ac + +#define NVC0_COMPUTE_TSC_FLUSH 0x00001330 +#define NVC0_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_COMPUTE_TIC_FLUSH 0x00001334 +#define NVC0_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_COMPUTE_TEX_CACHE_CTL 0x00001338 +#define NVC0_COMPUTE_TEX_CACHE_CTL_UNK0__MASK 0x00000007 +#define NVC0_COMPUTE_TEX_CACHE_CTL_UNK0__SHIFT 0 +#define NVC0_COMPUTE_TEX_CACHE_CTL_ENTRY__MASK 0x03fffff0 +#define NVC0_COMPUTE_TEX_CACHE_CTL_ENTRY__SHIFT 4 + +#define NVC0_COMPUTE_UNK1354 0x00001354 + +#define NVC0_COMPUTE_UNK1424_TSC_FLUSH 0x00001424 + +#define NVC0_COMPUTE_COND_ADDRESS_HIGH 0x00001550 + +#define NVC0_COMPUTE_COND_ADDRESS_LOW 0x00001554 + +#define NVC0_COMPUTE_COND_MODE 0x00001558 +#define NVC0_COMPUTE_COND_MODE_NEVER 0x00000000 +#define NVC0_COMPUTE_COND_MODE_ALWAYS 0x00000001 +#define NVC0_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002 +#define NVC0_COMPUTE_COND_MODE_EQUAL 0x00000003 +#define NVC0_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004 + +#define NVC0_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c + +#define NVC0_COMPUTE_TSC_ADDRESS_LOW 0x00001560 + +#define NVC0_COMPUTE_TSC_LIMIT 0x00001564 + +#define NVC0_COMPUTE_TIC_ADDRESS_HIGH 0x00001574 + +#define NVC0_COMPUTE_TIC_ADDRESS_LOW 0x00001578 + +#define NVC0_COMPUTE_TIC_LIMIT 0x0000157c + +#define NVC0_COMPUTE_CODE_ADDRESS_HIGH 0x00001608 + +#define NVC0_COMPUTE_CODE_ADDRESS_LOW 0x0000160c + +#define NVC0_COMPUTE_TEX_MISC 0x00001664 +#define NVC0_COMPUTE_TEX_MISC_UNK 0x00000001 +#define NVC0_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002 + +#define NVC0_COMPUTE_UNK1690 0x00001690 +#define NVC0_COMPUTE_UNK1690_ALWAYS_DERIV 0x00000001 +#define NVC0_COMPUTE_UNK1690_UNK16 0x00010000 + +#define NVC0_COMPUTE_CB_BIND 0x00001694 +#define NVC0_COMPUTE_CB_BIND_VALID 0x00000001 +#define NVC0_COMPUTE_CB_BIND_INDEX__MASK 0x00001f00 +#define NVC0_COMPUTE_CB_BIND_INDEX__SHIFT 8 + +#define NVC0_COMPUTE_FLUSH 0x00001698 +#define NVC0_COMPUTE_FLUSH_CODE 0x00000001 +#define NVC0_COMPUTE_FLUSH_GLOBAL 0x00000010 +#define NVC0_COMPUTE_FLUSH_UNK8 0x00000100 +#define NVC0_COMPUTE_FLUSH_CB 0x00001000 + +#define NVC0_COMPUTE_UNK1930 0x00001930 + +#define NVC0_COMPUTE_UNK1944 0x00001944 + +#define NVC0_COMPUTE_DELAY 0x00001a24 + +#define NVC0_COMPUTE_UNK1A2C(i0) (0x00001a2c + 0x4*(i0)) +#define NVC0_COMPUTE_UNK1A2C__ESIZE 0x00000004 +#define NVC0_COMPUTE_UNK1A2C__LEN 0x00000005 + +#define NVC0_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00 + +#define NVC0_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04 + +#define NVC0_COMPUTE_QUERY_SEQUENCE 0x00001b08 + +#define NVC0_COMPUTE_QUERY_GET 0x00001b0c +#define NVC0_COMPUTE_QUERY_GET_MODE__MASK 0x00000003 +#define NVC0_COMPUTE_QUERY_GET_MODE__SHIFT 0 +#define NVC0_COMPUTE_QUERY_GET_MODE_WRITE 0x00000000 +#define NVC0_COMPUTE_QUERY_GET_MODE_WRITE_INTR_NRHOST 0x00000003 +#define NVC0_COMPUTE_QUERY_GET_INTR 0x00100000 +#define NVC0_COMPUTE_QUERY_GET_SHORT 0x10000000 + +#define NVC0_COMPUTE_CB_SIZE 0x00002380 + +#define NVC0_COMPUTE_CB_ADDRESS_HIGH 0x00002384 + +#define NVC0_COMPUTE_CB_ADDRESS_LOW 0x00002388 + +#define NVC0_COMPUTE_CB_POS 0x0000238c + +#define NVC0_COMPUTE_CB_DATA(i0) (0x00002390 + 0x4*(i0)) +#define NVC0_COMPUTE_CB_DATA__ESIZE 0x00000004 +#define NVC0_COMPUTE_CB_DATA__LEN 0x00000010 + +#define NVC0_COMPUTE_IMAGE(i0) (0x00002700 + 0x20*(i0)) +#define NVC0_COMPUTE_IMAGE__ESIZE 0x00000020 +#define NVC0_COMPUTE_IMAGE__LEN 0x00000008 + +#define NVC0_COMPUTE_IMAGE_ADDRESS_HIGH(i0) (0x00002700 + 0x20*(i0)) + +#define NVC0_COMPUTE_IMAGE_ADDRESS_LOW(i0) (0x00002704 + 0x20*(i0)) + +#define NVC0_COMPUTE_IMAGE_WIDTH(i0) (0x00002708 + 0x20*(i0)) + +#define NVC0_COMPUTE_IMAGE_HEIGHT(i0) (0x0000270c + 0x20*(i0)) +#define NVC0_COMPUTE_IMAGE_HEIGHT_HEIGHT__MASK 0x0000ffff +#define NVC0_COMPUTE_IMAGE_HEIGHT_HEIGHT__SHIFT 0 +#define NVC0_COMPUTE_IMAGE_HEIGHT_UNK16 0x00010000 +#define NVC0_COMPUTE_IMAGE_HEIGHT_LINEAR 0x00100000 + +#define NVC0_COMPUTE_IMAGE_FORMAT(i0) (0x00002710 + 0x20*(i0)) +#define NVC0_COMPUTE_IMAGE_FORMAT_UNK0 0x00000001 +#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_COLOR__MASK 0x00000ff0 +#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_COLOR__SHIFT 4 +#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_ZETA__MASK 0x0001f000 +#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_ZETA__SHIFT 12 + +#define NVC0_COMPUTE_IMAGE_TILE_MODE(i0) (0x00002714 + 0x20*(i0)) + +#define NVC0_COMPUTE_MP_PM_SET(i0) (0x0000335c + 0x4*(i0)) +#define NVC0_COMPUTE_MP_PM_SET__ESIZE 0x00000004 +#define NVC0_COMPUTE_MP_PM_SET__LEN 0x00000008 + +#define NVC0_COMPUTE_MP_PM_SIGSEL(i0) (0x0000337c + 0x4*(i0)) +#define NVC0_COMPUTE_MP_PM_SIGSEL__ESIZE 0x00000004 +#define NVC0_COMPUTE_MP_PM_SIGSEL__LEN 0x00000008 + +#define NVC0_COMPUTE_MP_PM_SRCSEL(i0) (0x0000339c + 0x4*(i0)) +#define NVC0_COMPUTE_MP_PM_SRCSEL__ESIZE 0x00000004 +#define NVC0_COMPUTE_MP_PM_SRCSEL__LEN 0x00000008 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP0__MASK 0x00000007 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP0__SHIFT 0 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG0__MASK 0x00000070 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG0__SHIFT 4 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP1__MASK 0x00000700 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP1__SHIFT 8 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG1__MASK 0x00007000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG1__SHIFT 12 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP2__MASK 0x00070000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP2__SHIFT 16 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG2__MASK 0x00700000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG2__SHIFT 20 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP3__MASK 0x07000000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP3__SHIFT 24 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG3__MASK 0x70000000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG3__SHIFT 28 + +#define NVC0_COMPUTE_MP_PM_OP(i0) (0x000033bc + 0x4*(i0)) +#define NVC0_COMPUTE_MP_PM_OP__ESIZE 0x00000004 +#define NVC0_COMPUTE_MP_PM_OP__LEN 0x00000008 +#define NVC0_COMPUTE_MP_PM_OP_MODE__MASK 0x00000001 +#define NVC0_COMPUTE_MP_PM_OP_MODE__SHIFT 0 +#define NVC0_COMPUTE_MP_PM_OP_MODE_LOGOP 0x00000000 +#define NVC0_COMPUTE_MP_PM_OP_MODE_LOGOP_PULSE 0x00000001 +#define NVC0_COMPUTE_MP_PM_OP_FUNC__MASK 0x000ffff0 +#define NVC0_COMPUTE_MP_PM_OP_FUNC__SHIFT 4 + +#define NVC0_COMPUTE_MP_PM_UNK33DC 0x000033dc + + +#endif /* NVC0_COMPUTE_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c new file mode 100644 index 00000000000..e0c2b74e196 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -0,0 +1,402 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "util/u_framebuffer.h" + +#ifdef NVC0_WITH_DRAW_MODULE +#include "draw/draw_context.h" +#endif + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_screen.h" +#include "nvc0/nvc0_resource.h" + +static void +nvc0_flush(struct pipe_context *pipe, + struct pipe_fence_handle **fence, + unsigned flags) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_screen *screen = &nvc0->screen->base; + + if (fence) + nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence); + + PUSH_KICK(nvc0->base.pushbuf); /* fencing handled in kick_notify */ + + nouveau_context_update_frame_stats(&nvc0->base); +} + +static void +nvc0_texture_barrier(struct pipe_context *pipe) +{ + struct nouveau_pushbuf *push = nvc0_context(pipe)->base.pushbuf; + + IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); + IMMED_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 0); +} + +static void +nvc0_context_unreference_resources(struct nvc0_context *nvc0) +{ + unsigned s, i; + + nouveau_bufctx_del(&nvc0->bufctx_3d); + nouveau_bufctx_del(&nvc0->bufctx); + nouveau_bufctx_del(&nvc0->bufctx_cp); + + util_unreference_framebuffer_state(&nvc0->framebuffer); + + for (i = 0; i < nvc0->num_vtxbufs; ++i) + pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL); + + pipe_resource_reference(&nvc0->idxbuf.buffer, NULL); + + for (s = 0; s < 6; ++s) { + for (i = 0; i < nvc0->num_textures[s]; ++i) + pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); + + for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; ++i) + if (!nvc0->constbuf[s][i].user) + pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, NULL); + } + + for (s = 0; s < 2; ++s) { + for (i = 0; i < NVC0_MAX_SURFACE_SLOTS; ++i) + pipe_surface_reference(&nvc0->surfaces[s][i], NULL); + } + + for (i = 0; i < nvc0->num_tfbbufs; ++i) + pipe_so_target_reference(&nvc0->tfbbuf[i], NULL); + + for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *); + ++i) { + struct pipe_resource **res = util_dynarray_element( + &nvc0->global_residents, struct pipe_resource *, i); + pipe_resource_reference(res, NULL); + } + util_dynarray_fini(&nvc0->global_residents); +} + +static void +nvc0_destroy(struct pipe_context *pipe) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + if (nvc0->screen->cur_ctx == nvc0) { + nvc0->base.pushbuf->kick_notify = NULL; + nvc0->screen->cur_ctx = NULL; + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, NULL); + } + nouveau_pushbuf_kick(nvc0->base.pushbuf, nvc0->base.pushbuf->channel); + + nvc0_context_unreference_resources(nvc0); + nvc0_blitctx_destroy(nvc0); + +#ifdef NVC0_WITH_DRAW_MODULE + draw_destroy(nvc0->draw); +#endif + + nouveau_context_destroy(&nvc0->base); +} + +void +nvc0_default_kick_notify(struct nouveau_pushbuf *push) +{ + struct nvc0_screen *screen = push->user_priv; + + if (screen) { + nouveau_fence_next(&screen->base); + nouveau_fence_update(&screen->base, TRUE); + if (screen->cur_ctx) + screen->cur_ctx->state.flushed = TRUE; + } + NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1); +} + +static int +nvc0_invalidate_resource_storage(struct nouveau_context *ctx, + struct pipe_resource *res, + int ref) +{ + struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe); + unsigned s, i; + + if (res->bind & PIPE_BIND_RENDER_TARGET) { + for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) { + if (nvc0->framebuffer.cbufs[i] && + nvc0->framebuffer.cbufs[i]->texture == res) { + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + if (!--ref) + return ref; + } + } + } + if (res->bind & PIPE_BIND_DEPTH_STENCIL) { + if (nvc0->framebuffer.zsbuf && + nvc0->framebuffer.zsbuf->texture == res) { + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + if (!--ref) + return ref; + } + } + + if (res->bind & PIPE_BIND_VERTEX_BUFFER) { + for (i = 0; i < nvc0->num_vtxbufs; ++i) { + if (nvc0->vtxbuf[i].buffer == res) { + nvc0->dirty |= NVC0_NEW_ARRAYS; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); + if (!--ref) + return ref; + } + } + } + if (res->bind & PIPE_BIND_INDEX_BUFFER) { + if (nvc0->idxbuf.buffer == res) { + nvc0->dirty |= NVC0_NEW_IDXBUF; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX); + if (!--ref) + return ref; + } + } + + if (res->bind & PIPE_BIND_SAMPLER_VIEW) { + for (s = 0; s < 5; ++s) { + for (i = 0; i < nvc0->num_textures[s]; ++i) { + if (nvc0->textures[s][i] && + nvc0->textures[s][i]->texture == res) { + nvc0->textures_dirty[s] |= 1 << i; + nvc0->dirty |= NVC0_NEW_TEXTURES; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); + if (!--ref) + return ref; + } + } + } + } + + if (res->bind & PIPE_BIND_CONSTANT_BUFFER) { + for (s = 0; s < 5; ++s) { + for (i = 0; i < nvc0->num_vtxbufs; ++i) { + if (!nvc0->constbuf[s][i].user && + nvc0->constbuf[s][i].u.buf == res) { + nvc0->dirty |= NVC0_NEW_CONSTBUF; + nvc0->constbuf_dirty[s] |= 1 << i; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i)); + if (!--ref) + return ref; + } + } + } + } + + return ref; +} + +static void +nvc0_context_get_sample_position(struct pipe_context *, unsigned, unsigned, + float *); + +struct pipe_context * +nvc0_create(struct pipe_screen *pscreen, void *priv) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + struct nvc0_context *nvc0; + struct pipe_context *pipe; + int ret; + uint32_t flags; + + nvc0 = CALLOC_STRUCT(nvc0_context); + if (!nvc0) + return NULL; + pipe = &nvc0->base.pipe; + + if (!nvc0_blitctx_create(nvc0)) + goto out_err; + + nvc0->base.pushbuf = screen->base.pushbuf; + nvc0->base.client = screen->base.client; + + ret = nouveau_bufctx_new(screen->base.client, 2, &nvc0->bufctx); + if (!ret) + ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_3D_COUNT, + &nvc0->bufctx_3d); + if (!ret) + ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_CP_COUNT, + &nvc0->bufctx_cp); + if (ret) + goto out_err; + + nvc0->screen = screen; + nvc0->base.screen = &screen->base; + + pipe->screen = pscreen; + pipe->priv = priv; + + pipe->destroy = nvc0_destroy; + + pipe->draw_vbo = nvc0_draw_vbo; + pipe->clear = nvc0_clear; + pipe->launch_grid = (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) ? + nve4_launch_grid : nvc0_launch_grid; + + pipe->flush = nvc0_flush; + pipe->texture_barrier = nvc0_texture_barrier; + pipe->get_sample_position = nvc0_context_get_sample_position; + + if (!screen->cur_ctx) { + screen->cur_ctx = nvc0; + nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx); + } + screen->base.pushbuf->kick_notify = nvc0_default_kick_notify; + + nvc0_init_query_functions(nvc0); + nvc0_init_surface_functions(nvc0); + nvc0_init_state_functions(nvc0); + nvc0_init_transfer_functions(nvc0); + nvc0_init_resource_functions(pipe); + + nvc0->base.invalidate_resource_storage = nvc0_invalidate_resource_storage; + +#ifdef NVC0_WITH_DRAW_MODULE + /* no software fallbacks implemented */ + nvc0->draw = draw_create(pipe); + assert(nvc0->draw); + draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0)); +#endif + + pipe->create_video_codec = nvc0_create_decoder; + pipe->create_video_buffer = nvc0_video_buffer_create; + + /* shader builtin library is per-screen, but we need a context for m2mf */ + nvc0_program_library_upload(nvc0); + + /* add permanently resident buffers to bufctxts */ + + flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + + BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text); + BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo); + BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc); + if (screen->compute) { + BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->text); + BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc); + BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm); + } + + flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR; + + BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache); + if (screen->compute) + BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->tls); + + flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; + + BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->fence.bo); + BCTX_REFN_bo(nvc0->bufctx, FENCE, flags, screen->fence.bo); + if (screen->compute) + BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->fence.bo); + + nvc0->base.scratch.bo_size = 2 << 20; + + memset(nvc0->tex_handles, ~0, sizeof(nvc0->tex_handles)); + + util_dynarray_init(&nvc0->global_residents); + + return pipe; + +out_err: + if (nvc0) { + if (nvc0->bufctx_3d) + nouveau_bufctx_del(&nvc0->bufctx_3d); + if (nvc0->bufctx_cp) + nouveau_bufctx_del(&nvc0->bufctx_cp); + if (nvc0->bufctx) + nouveau_bufctx_del(&nvc0->bufctx); + if (nvc0->blit) + FREE(nvc0->blit); + FREE(nvc0); + } + return NULL; +} + +void +nvc0_bufctx_fence(struct nvc0_context *nvc0, struct nouveau_bufctx *bufctx, + boolean on_flush) +{ + struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending; + struct nouveau_list *it; + NOUVEAU_DRV_STAT_IFD(unsigned count = 0); + + for (it = list->next; it != list; it = it->next) { + struct nouveau_bufref *ref = (struct nouveau_bufref *)it; + struct nv04_resource *res = ref->priv; + if (res) + nvc0_resource_validate(res, (unsigned)ref->priv_data); + NOUVEAU_DRV_STAT_IFD(count++); + } + NOUVEAU_DRV_STAT(&nvc0->screen->base, resource_validate_count, count); +} + +static void +nvc0_context_get_sample_position(struct pipe_context *pipe, + unsigned sample_count, unsigned sample_index, + float *xy) +{ + static const uint8_t ms1[1][2] = { { 0x8, 0x8 } }; + static const uint8_t ms2[2][2] = { + { 0x4, 0x4 }, { 0xc, 0xc } }; /* surface coords (0,0), (1,0) */ + static const uint8_t ms4[4][2] = { + { 0x6, 0x2 }, { 0xe, 0x6 }, /* (0,0), (1,0) */ + { 0x2, 0xa }, { 0xa, 0xe } }; /* (0,1), (1,1) */ + static const uint8_t ms8[8][2] = { + { 0x1, 0x7 }, { 0x5, 0x3 }, /* (0,0), (1,0) */ + { 0x3, 0xd }, { 0x7, 0xb }, /* (0,1), (1,1) */ + { 0x9, 0x5 }, { 0xf, 0x1 }, /* (2,0), (3,0) */ + { 0xb, 0xf }, { 0xd, 0x9 } }; /* (2,1), (3,1) */ +#if 0 + /* NOTE: there are alternative modes for MS2 and MS8, currently not used */ + static const uint8_t ms8_alt[8][2] = { + { 0x9, 0x5 }, { 0x7, 0xb }, /* (2,0), (1,1) */ + { 0xd, 0x9 }, { 0x5, 0x3 }, /* (3,1), (1,0) */ + { 0x3, 0xd }, { 0x1, 0x7 }, /* (0,1), (0,0) */ + { 0xb, 0xf }, { 0xf, 0x1 } }; /* (2,1), (3,0) */ +#endif + + const uint8_t (*ptr)[2]; + + switch (sample_count) { + case 0: + case 1: ptr = ms1; break; + case 2: ptr = ms2; break; + case 4: ptr = ms4; break; + case 8: ptr = ms8; break; + default: + assert(0); + return; /* bad sample count -> undefined locations */ + } + xy[0] = ptr[sample_index][0] * 0.0625f; + xy[1] = ptr[sample_index][1] * 0.0625f; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h new file mode 100644 index 00000000000..3fbecdc1391 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -0,0 +1,357 @@ +#ifndef __NVC0_CONTEXT_H__ +#define __NVC0_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_inlines.h" +#include "util/u_dynarray.h" + +#ifdef NVC0_WITH_DRAW_MODULE +#include "draw/draw_vertex.h" +#endif + +#include "nv50/nv50_debug.h" +#include "nvc0/nvc0_winsys.h" +#include "nvc0/nvc0_stateobj.h" +#include "nvc0/nvc0_screen.h" +#include "nvc0/nvc0_program.h" +#include "nvc0/nvc0_resource.h" + +#include "nv50/nv50_transfer.h" + +#include "nouveau_context.h" + +#include "nvc0/nvc0_3ddefs.xml.h" +#include "nvc0/nvc0_3d.xml.h" +#include "nvc0/nvc0_2d.xml.h" +#include "nvc0/nvc0_m2mf.xml.h" +#include "nvc0/nve4_p2mf.xml.h" + +/* NOTE: must keep NVC0_NEW_...PROG in consecutive bits in this order */ +#define NVC0_NEW_BLEND (1 << 0) +#define NVC0_NEW_RASTERIZER (1 << 1) +#define NVC0_NEW_ZSA (1 << 2) +#define NVC0_NEW_VERTPROG (1 << 3) +#define NVC0_NEW_TCTLPROG (1 << 4) +#define NVC0_NEW_TEVLPROG (1 << 5) +#define NVC0_NEW_GMTYPROG (1 << 6) +#define NVC0_NEW_FRAGPROG (1 << 7) +#define NVC0_NEW_BLEND_COLOUR (1 << 8) +#define NVC0_NEW_STENCIL_REF (1 << 9) +#define NVC0_NEW_CLIP (1 << 10) +#define NVC0_NEW_SAMPLE_MASK (1 << 11) +#define NVC0_NEW_FRAMEBUFFER (1 << 12) +#define NVC0_NEW_STIPPLE (1 << 13) +#define NVC0_NEW_SCISSOR (1 << 14) +#define NVC0_NEW_VIEWPORT (1 << 15) +#define NVC0_NEW_ARRAYS (1 << 16) +#define NVC0_NEW_VERTEX (1 << 17) +#define NVC0_NEW_CONSTBUF (1 << 18) +#define NVC0_NEW_TEXTURES (1 << 19) +#define NVC0_NEW_SAMPLERS (1 << 20) +#define NVC0_NEW_TFB_TARGETS (1 << 21) +#define NVC0_NEW_IDXBUF (1 << 22) +#define NVC0_NEW_SURFACES (1 << 23) + +#define NVC0_NEW_CP_PROGRAM (1 << 0) +#define NVC0_NEW_CP_SURFACES (1 << 1) +#define NVC0_NEW_CP_TEXTURES (1 << 2) +#define NVC0_NEW_CP_SAMPLERS (1 << 3) +#define NVC0_NEW_CP_CONSTBUF (1 << 4) +#define NVC0_NEW_CP_GLOBALS (1 << 5) + +/* 3d bufctx (during draw_vbo, blit_3d) */ +#define NVC0_BIND_FB 0 +#define NVC0_BIND_VTX 1 +#define NVC0_BIND_VTX_TMP 2 +#define NVC0_BIND_IDX 3 +#define NVC0_BIND_TEX(s, i) ( 4 + 32 * (s) + (i)) +#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i)) +#define NVC0_BIND_TFB 244 +#define NVC0_BIND_SUF 245 +#define NVC0_BIND_SCREEN 246 +#define NVC0_BIND_TLS 247 +#define NVC0_BIND_3D_COUNT 248 + +/* compute bufctx (during launch_grid) */ +#define NVC0_BIND_CP_CB(i) ( 0 + (i)) +#define NVC0_BIND_CP_TEX(i) ( 16 + (i)) +#define NVC0_BIND_CP_SUF 48 +#define NVC0_BIND_CP_GLOBAL 49 +#define NVC0_BIND_CP_DESC 50 +#define NVC0_BIND_CP_SCREEN 51 +#define NVC0_BIND_CP_QUERY 52 +#define NVC0_BIND_CP_COUNT 53 + +/* bufctx for other operations */ +#define NVC0_BIND_2D 0 +#define NVC0_BIND_M2MF 0 +#define NVC0_BIND_FENCE 1 + + +struct nvc0_blitctx; + +boolean nvc0_blitctx_create(struct nvc0_context *); +void nvc0_blitctx_destroy(struct nvc0_context *); + +struct nvc0_context { + struct nouveau_context base; + + struct nouveau_bufctx *bufctx_3d; + struct nouveau_bufctx *bufctx; + struct nouveau_bufctx *bufctx_cp; + + struct nvc0_screen *screen; + + void (*m2mf_copy_rect)(struct nvc0_context *, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy); + + uint32_t dirty; + uint32_t dirty_cp; /* dirty flags for compute state */ + + struct { + boolean flushed; + boolean rasterizer_discard; + boolean early_z_forced; + boolean prim_restart; + uint32_t instance_elts; /* bitmask of per-instance elements */ + uint32_t instance_base; + uint32_t constant_vbos; + uint32_t constant_elts; + int32_t index_bias; + uint16_t scissor; + uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */ + uint8_t num_vtxbufs; + uint8_t num_vtxelts; + uint8_t num_textures[6]; + uint8_t num_samplers[6]; + uint8_t tls_required; /* bitmask of shader types using l[] */ + uint8_t c14_bound; /* whether immediate array constbuf is bound */ + uint8_t clip_enable; + uint32_t clip_mode; + uint32_t uniform_buffer_bound[5]; + struct nvc0_transform_feedback_state *tfb; + } state; + + struct nvc0_blend_stateobj *blend; + struct nvc0_rasterizer_stateobj *rast; + struct nvc0_zsa_stateobj *zsa; + struct nvc0_vertex_stateobj *vertex; + + struct nvc0_program *vertprog; + struct nvc0_program *tctlprog; + struct nvc0_program *tevlprog; + struct nvc0_program *gmtyprog; + struct nvc0_program *fragprog; + struct nvc0_program *compprog; + + struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS]; + uint16_t constbuf_dirty[6]; + + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned num_vtxbufs; + struct pipe_index_buffer idxbuf; + uint32_t constant_vbos; + uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ + uint32_t vb_elt_first; /* from pipe_draw_info, for vertex upload */ + uint32_t vb_elt_limit; /* max - min element (count - 1) */ + uint32_t instance_off; /* current base vertex for instanced arrays */ + uint32_t instance_max; /* last instance for current draw call */ + + struct pipe_sampler_view *textures[6][PIPE_MAX_SAMPLERS]; + unsigned num_textures[6]; + uint32_t textures_dirty[6]; + struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS]; + unsigned num_samplers[6]; + uint16_t samplers_dirty[6]; + + uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */ + + struct pipe_framebuffer_state framebuffer; + struct pipe_blend_color blend_colour; + struct pipe_stencil_ref stencil_ref; + struct pipe_poly_stipple stipple; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + struct pipe_clip_state clip; + + unsigned sample_mask; + + boolean vbo_push_hint; + + uint8_t tfbbuf_dirty; + struct pipe_stream_output_target *tfbbuf[4]; + unsigned num_tfbbufs; + + struct pipe_query *cond_query; + boolean cond_cond; + uint cond_mode; + + struct nvc0_blitctx *blit; + + struct pipe_surface *surfaces[2][NVC0_MAX_SURFACE_SLOTS]; + uint16_t surfaces_dirty[2]; + uint16_t surfaces_valid[2]; + uint32_t vport_int[2]; + + struct util_dynarray global_residents; + +#ifdef NVC0_WITH_DRAW_MODULE + struct draw_context *draw; +#endif +}; + +static INLINE struct nvc0_context * +nvc0_context(struct pipe_context *pipe) +{ + return (struct nvc0_context *)pipe; +} + +static INLINE unsigned +nvc0_shader_stage(unsigned pipe) +{ + switch (pipe) { + case PIPE_SHADER_VERTEX: return 0; +/* case PIPE_SHADER_TESSELLATION_CONTROL: return 1; */ +/* case PIPE_SHADER_TESSELLATION_EVALUATION: return 2; */ + case PIPE_SHADER_GEOMETRY: return 3; + case PIPE_SHADER_FRAGMENT: return 4; + case PIPE_SHADER_COMPUTE: return 5; + default: + assert(!"invalid PIPE_SHADER type"); + return 0; + } +} + + +/* nvc0_context.c */ +struct pipe_context *nvc0_create(struct pipe_screen *, void *); +void nvc0_bufctx_fence(struct nvc0_context *, struct nouveau_bufctx *, + boolean on_flush); +void nvc0_default_kick_notify(struct nouveau_pushbuf *); + +/* nvc0_draw.c */ +extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); + +/* nvc0_program.c */ +boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset); +boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *); +void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); +void nvc0_program_library_upload(struct nvc0_context *); +uint32_t nvc0_program_symbol_offset(const struct nvc0_program *, + uint32_t label); + +/* nvc0_query.c */ +void nvc0_init_query_functions(struct nvc0_context *); +void nvc0_query_pushbuf_submit(struct nouveau_pushbuf *, + struct pipe_query *, unsigned result_offset); +void nvc0_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *); +void nvc0_so_target_save_offset(struct pipe_context *, + struct pipe_stream_output_target *, unsigned i, + boolean *serialize); + +#define NVC0_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) + +/* nvc0_shader_state.c */ +void nvc0_vertprog_validate(struct nvc0_context *); +void nvc0_tctlprog_validate(struct nvc0_context *); +void nvc0_tevlprog_validate(struct nvc0_context *); +void nvc0_gmtyprog_validate(struct nvc0_context *); +void nvc0_fragprog_validate(struct nvc0_context *); + +void nvc0_tfb_validate(struct nvc0_context *); + +/* nvc0_state.c */ +extern void nvc0_init_state_functions(struct nvc0_context *); + +/* nvc0_state_validate.c */ +void nvc0_validate_global_residents(struct nvc0_context *, + struct nouveau_bufctx *, int bin); +extern boolean nvc0_state_validate(struct nvc0_context *, uint32_t state_mask, + unsigned space_words); + +/* nvc0_surface.c */ +extern void nvc0_clear(struct pipe_context *, unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil); +extern void nvc0_init_surface_functions(struct nvc0_context *); + +/* nvc0_tex.c */ +boolean nve4_validate_tsc(struct nvc0_context *nvc0, int s); +void nvc0_validate_textures(struct nvc0_context *); +void nvc0_validate_samplers(struct nvc0_context *); +void nve4_set_tex_handles(struct nvc0_context *); +void nvc0_validate_surfaces(struct nvc0_context *); +void nve4_set_surface_info(struct nouveau_pushbuf *, struct pipe_surface *, + struct nvc0_screen *); + +struct pipe_sampler_view * +nvc0_create_texture_view(struct pipe_context *, + struct pipe_resource *, + const struct pipe_sampler_view *, + uint32_t flags, + enum pipe_texture_target); +struct pipe_sampler_view * +nvc0_create_sampler_view(struct pipe_context *, + struct pipe_resource *, + const struct pipe_sampler_view *); + +/* nvc0_transfer.c */ +void +nvc0_init_transfer_functions(struct nvc0_context *); + +void +nvc0_m2mf_push_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data); +void +nve4_p2mf_push_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data); +void +nvc0_cb_push(struct nouveau_context *, + struct nouveau_bo *bo, unsigned domain, + unsigned base, unsigned size, + unsigned offset, unsigned words, const uint32_t *data); + +/* nvc0_vbo.c */ +void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *); + +void * +nvc0_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements); +void +nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso); + +void nvc0_vertex_arrays_validate(struct nvc0_context *); + +void nvc0_idxbuf_validate(struct nvc0_context *); + +/* nvc0_video.c */ +struct pipe_video_codec * +nvc0_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ); + +struct pipe_video_buffer * +nvc0_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *templat); + +/* nvc0_push.c */ +void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *); + +/* nve4_compute.c */ +void nve4_launch_grid(struct pipe_context *, + const uint *, const uint *, uint32_t, const void *); + +/* nvc0_compute.c */ +void nvc0_launch_grid(struct pipe_context *, + const uint *, const uint *, uint32_t, const void *); + +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_draw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_draw.c new file mode 100644 index 00000000000..e261d5058fc --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_draw.c @@ -0,0 +1,88 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "draw/draw_pipe.h" + +#include "nvc0/nvc0_context.h" + +struct nvc0_render_stage { + struct draw_stage stage; + struct nvc0_context *nvc0; +}; + +static INLINE struct nvc0_render_stage * +nvc0_render_stage(struct draw_stage *stage) +{ + return (struct nvc0_render_stage *)stage; +} + +static void +nvc0_render_point(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_line(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_flush(struct draw_stage *stage, unsigned flags) +{ +} + +static void +nvc0_render_reset_stipple_counter(struct draw_stage *stage) +{ + NOUVEAU_ERR("\n"); +} + +static void +nvc0_render_destroy(struct draw_stage *stage) +{ + FREE(stage); +} + +struct draw_stage * +nvc0_draw_render_stage(struct nvc0_context *nvc0) +{ + struct nvc0_render_stage *rs = CALLOC_STRUCT(nvc0_render_stage); + + rs->nvc0 = nvc0; + rs->stage.draw = nvc0->draw; + rs->stage.destroy = nvc0_render_destroy; + rs->stage.point = nvc0_render_point; + rs->stage.line = nvc0_render_line; + rs->stage.tri = nvc0_render_tri; + rs->stage.flush = nvc0_render_flush; + rs->stage.reset_stipple_counter = nvc0_render_reset_stipple_counter; + + return &rs->stage; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_formats.c b/src/gallium/drivers/nouveau/nvc0/nvc0_formats.c new file mode 100644 index 00000000000..2bfdb0e076c --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_formats.c @@ -0,0 +1,25 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define NOUVEAU_DRIVER 0xc0 + +#include "nv50/nv50_formats.c" diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h new file mode 100644 index 00000000000..f009980c629 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h @@ -0,0 +1,236 @@ + +#ifndef __NVC0_PGRAPH_MACROS_H__ +#define __NVC0_PGRAPH_MACROS_H__ + +/* extrinsrt r1, r2, src, size, dst: replace bits [dst:dst+size) in r1 + * with bits [src:src+size) in r2 + * + * bra(n)z annul: no delay slot + */ + +/* Bitfield version of NVC0_3D_VERTEX_ARRAY_PER_INSTANCE[]. + * Args: size, bitfield + */ +static const uint32_t nvc0_9097_per_instance_bf[] = +{ + 0x00000301, /* parm $r3 (the bitfield) */ + 0x00000211, /* mov $r2 0 */ + 0x05880021, /* maddr [NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(0), increment = 4] */ + 0xffffc911, /* mov $r1 (add $r1 -0x1) */ + 0x0040d043, /* send (extrshl $r3 $r2 0x1 0) */ + 0xffff8897, /* exit branz $r1 0x3 */ + 0x00005211 /* mov $r2 (add $r2 0x1) */ +}; + +/* The comments above the macros describe what they *should* be doing, + * but we use less functionality for now. + */ + +/* + * for (i = 0; i < 8; ++i) + * [NVC0_3D_BLEND_ENABLE(i)] = BIT(i of arg); + * + * [3428] = arg; + * + * if (arg == 0 || [NVC0_3D_MULTISAMPLE_ENABLE] == 0) + * [0d9c] = 0; + * else + * [0d9c] = [342c]; + */ +static const uint32_t nvc0_9097_blend_enables[] = +{ + 0x05360021, /* 0x00: maddr [NVC0_3D_BLEND_ENABLE(0), increment = 4] */ + 0x00404042, /* 0x01: send extrinsrt 0 $r1 0 0x1 0 */ + 0x00424042, /* 0x02: send extrinsrt 0 $r1 0x1 0x1 0 */ + 0x00444042, /* 0x03: send extrinsrt 0 $r1 0x2 0x1 0 */ + 0x00464042, /* 0x04: send extrinsrt 0 $r1 0x3 0x1 0 */ + 0x00484042, /* 0x05: send extrinsrt 0 $r1 0x4 0x1 0 */ + 0x004a4042, /* 0x06: send extrinsrt 0 $r1 0x5 0x1 0 */ + 0x004c40c2, /* 0x07: exit send extrinsrt 0 $r1 0x6 0x1 0 */ + 0x004e4042, /* 0x08: send extrinsrt 0 $r1 0x7 0x1 0 */ +}; + +/* + * uint64 limit = (parm(0) << 32) | parm(1); + * uint64 start = (parm(2) << 32); + * + * if (limit) { + * start |= parm(3); + * --limit; + * } else { + * start |= 1; + * } + * + * [0x1c04 + (arg & 0xf) * 16 + 0] = (start >> 32) & 0xff; + * [0x1c04 + (arg & 0xf) * 16 + 4] = start & 0xffffffff; + * [0x1f00 + (arg & 0xf) * 8 + 0] = (limit >> 32) & 0xff; + * [0x1f00 + (arg & 0xf) * 8 + 4] = limit & 0xffffffff; + */ +static const uint32_t nvc0_9097_vertex_array_select[] = +{ + 0x00000201, /* 0x00: parm $r2 */ + 0x00000301, /* 0x01: parm $r3 */ + 0x00000401, /* 0x02: parm $r4 */ + 0x00000501, /* 0x03: parm $r5 */ + 0x11004612, /* 0x04: mov $r6 extrinsrt 0 $r1 0 4 2 */ + 0x09004712, /* 0x05: mov $r7 extrinsrt 0 $r1 0 4 1 */ + 0x05c07621, /* 0x06: maddr $r6 add $6 0x1701 */ + 0x00002041, /* 0x07: send $r4 */ + 0x00002841, /* 0x08: send $r5 */ + 0x05f03f21, /* 0x09: maddr $r7 add $7 0x17c0 */ + 0x000010c1, /* 0x0a: exit send $r2 */ + 0x00001841, /* 0x0b: send $r3 */ +}; + +/* + * [GL_POLYGON_MODE_FRONT] = arg; + * + * if (BIT(31 of [0x3410])) + * [1a24] = 0x7353; + * + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41) + * [02ec] = 0; + * else + * if ([GL_POLYGON_MODE_BACK] == GL_LINE || arg == GL_LINE) + * [02ec] = BYTE(1 of [0x3410]) << 4; + * else + * [02ec] = BYTE(0 of [0x3410]) << 4; + */ +static const uint32_t nvc0_9097_poly_mode_front[] = +{ + 0x00db0215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_BACK] */ + 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */ + 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */ + 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x00dac021, /* 0x08: maddr 0x36b */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr [02ec] */ + 0x00003041 /* 0x10: send $r6 */ +}; + +/* + * [GL_POLYGON_MODE_BACK] = arg; + * + * if (BIT(31 of [0x3410])) + * [1a24] = 0x7353; + * + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41) + * [02ec] = 0; + * else + * if ([GL_POLYGON_MODE_FRONT] == GL_LINE || arg == GL_LINE) + * [02ec] = BYTE(1 of [0x3410]) << 4; + * else + * [02ec] = BYTE(0 of [0x3410]) << 4; + */ +/* NOTE: 0x3410 = 0x80002006 by default, + * POLYGON_MODE == GL_LINE check replaced by (MODE & 1) + * SP_SELECT(i) == (i << 4) | 1 check replaced by SP_SELECT(i) & 1 + */ +static const uint32_t nvc0_9097_poly_mode_back[] = +{ + 0x00dac215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_FRONT] */ + 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */ + 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */ + 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x00db0021, /* 0x08: maddr 0x36c */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr [02ec] */ + 0x00003041 /* 0x10: send $r6 */ +}; + +/* + * [NVC0_3D_SP_SELECT(4)] = arg + * + * if BIT(31 of [0x3410]) == 0 + * [1a24] = 0x7353; + * + * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || arg == 0x41) + * [02ec] = 0 + * else + * if (any POLYGON MODE == LINE) + * [02ec] = BYTE(1 of [3410]) << 4; + * else + * [02ec] = BYTE(0 of [3410]) << 4; // 02ec valid bits are 0xff1 + */ +static const uint32_t nvc0_9097_gp_select[] = /* 0x0f */ +{ + 0x00dac215, /* 0x00: read $r2 0x36b */ + 0x00db0315, /* 0x01: read $r3 0x36c */ + 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */ + 0x020c0415, /* 0x03: read $r4 0x830 */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x02100021, /* 0x08: maddr 0x840 */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr 0xbb */ + 0x00003041, /* 0x10: send $r6 */ +}; + +/* + * [NVC0_3D_SP_SELECT(3)] = arg + * + * if BIT(31 of [0x3410]) == 0 + * [1a24] = 0x7353; + * + * if (arg == 0x31) { + * if (BIT(2 of [0x3430])) { + * int i = 15; do { --i; } while(i); + * [0x1a2c] = 0; + * } + * } + * + * if ([NVC0_3D_SP_SELECT(4)] == 0x41 || arg == 0x31) + * [02ec] = 0 + * else + * if ([any POLYGON_MODE] == GL_LINE) + * [02ec] = BYTE(1 of [3410]) << 4; + * else + * [02ec] = BYTE(0 of [3410]) << 4; + */ +static const uint32_t nvc0_9097_tep_select[] = /* 0x10 */ +{ + 0x00dac215, /* 0x00: read $r2 0x36b */ + 0x00db0315, /* 0x01: read $r3 0x36c */ + 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */ + 0x02100415, /* 0x03: read $r4 0x840 */ + 0x00004211, /* 0x04: mov $r2 0x1 */ + 0x00180611, /* 0x05: mov $r6 0x60 */ + 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x07: braz $r7 0xa */ + 0x020c0021, /* 0x08: maddr 0x830 */ + 0x00800611, /* 0x09: mov $r6 0x200 */ + 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */ + 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ + 0x0000f807, /* 0x0c: braz $r7 0xf */ + 0x00000841, /* 0x0d: send $r1 */ + 0x00000611, /* 0x0e: mov $r6 0 */ + 0x002ec0a1, /* 0x0f: exit maddr 0xbb */ + 0x00003041, /* 0x10: send $r6 */ +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h new file mode 100644 index 00000000000..3bf628d425e --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h @@ -0,0 +1,138 @@ +#ifndef NVC0_M2MF_XML +#define NVC0_M2MF_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_m2mf.xml ( 2227 bytes, from 2010-10-16 16:10:29) +- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37) +- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24) +- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21) +- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58) + +Copyright (C) 2006-2010 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro, curro_, currojerez) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_M2MF_TILING_MODE_IN 0x00000204 + +#define NVC0_M2MF_TILING_PITCH_IN 0x00000208 + +#define NVC0_M2MF_TILING_HEIGHT_IN 0x0000020c + +#define NVC0_M2MF_TILING_DEPTH_IN 0x00000210 + +#define NVC0_M2MF_TILING_POSITION_IN_Z 0x00000214 + +#define NVC0_M2MF_TILING_MODE_OUT 0x00000220 + +#define NVC0_M2MF_TILING_PITCH_OUT 0x00000224 + +#define NVC0_M2MF_TILING_HEIGHT_OUT 0x00000228 + +#define NVC0_M2MF_TILING_DEPTH_OUT 0x0000022c + +#define NVC0_M2MF_TILING_POSITION_OUT_Z 0x00000230 + +#define NVC0_M2MF_OFFSET_OUT_HIGH 0x00000238 + +#define NVC0_M2MF_OFFSET_OUT_LOW 0x0000023c + +#define NVC0_M2MF_EXEC 0x00000300 +#define NVC0_M2MF_EXEC_PUSH 0x00000001 +#define NVC0_M2MF_EXEC_LINEAR_IN 0x00000010 +#define NVC0_M2MF_EXEC_LINEAR_OUT 0x00000100 +#define NVC0_M2MF_EXEC_NOTIFY 0x00002000 +#define NVC0_M2MF_EXEC_INC__MASK 0x00f00000 +#define NVC0_M2MF_EXEC_INC__SHIFT 20 + +#define NVC0_M2MF_DATA 0x00000304 + +#define NVC0_M2MF_OFFSET_IN_HIGH 0x0000030c + +#define NVC0_M2MF_OFFSET_IN_LOW 0x00000310 + +#define NVC0_M2MF_PITCH_IN 0x00000314 + +#define NVC0_M2MF_PITCH_OUT 0x00000318 + +#define NVC0_M2MF_LINE_LENGTH_IN 0x0000031c + +#define NVC0_M2MF_LINE_COUNT 0x00000320 + +#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH 0x0000032c + +#define NVC0_M2MF_NOTIFY_ADDRESS_LOW 0x00000330 + +#define NVC0_M2MF_NOTIFY 0x00000334 + +#define NVC0_M2MF_TILING_POSITION_IN_X 0x00000344 + +#define NVC0_M2MF_TILING_POSITION_IN_Y 0x00000348 + +#define NVC0_M2MF_TILING_POSITION_OUT_X 0x0000034c + +#define NVC0_M2MF_TILING_POSITION_OUT_Y 0x00000350 + + +#endif /* NVC0_M2MF_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c new file mode 100644 index 00000000000..79c9390b78f --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c @@ -0,0 +1,358 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_resource.h" + +static uint32_t +nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz) +{ + return nv50_tex_choose_tile_dims_helper(nx, ny, nz); +} + +static uint32_t +nvc0_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed) +{ + const unsigned ms = util_logbase2(mt->base.base.nr_samples); + + uint32_t tile_flags; + + if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR)) + return 0; + if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR)) + return 0; + + switch (mt->base.base.format) { + case PIPE_FORMAT_Z16_UNORM: + if (compressed) + tile_flags = 0x02 + ms; + else + tile_flags = 0x01; + break; + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + if (compressed) + tile_flags = 0x51 + ms; + else + tile_flags = 0x46; + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + if (compressed) + tile_flags = 0x17 + ms; + else + tile_flags = 0x11; + break; + case PIPE_FORMAT_Z32_FLOAT: + if (compressed) + tile_flags = 0x86 + ms; + else + tile_flags = 0x7b; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + if (compressed) + tile_flags = 0xce + ms; + else + tile_flags = 0xc3; + break; + default: + switch (util_format_get_blocksizebits(mt->base.base.format)) { + case 128: + if (compressed) + tile_flags = 0xf4 + ms * 2; + else + tile_flags = 0xfe; + break; + case 64: + if (compressed) { + switch (ms) { + case 0: tile_flags = 0xe6; break; + case 1: tile_flags = 0xeb; break; + case 2: tile_flags = 0xed; break; + case 3: tile_flags = 0xf2; break; + default: + return 0; + } + } else { + tile_flags = 0xfe; + } + break; + case 32: + if (compressed && ms) { + switch (ms) { + /* This one makes things blurry: + case 0: tile_flags = 0xdb; break; + */ + case 1: tile_flags = 0xdd; break; + case 2: tile_flags = 0xdf; break; + case 3: tile_flags = 0xe4; break; + default: + return 0; + } + } else { + tile_flags = 0xfe; + } + break; + case 16: + case 8: + tile_flags = 0xfe; + break; + default: + return 0; + } + break; + } + + return tile_flags; +} + +static INLINE boolean +nvc0_miptree_init_ms_mode(struct nv50_miptree *mt) +{ + switch (mt->base.base.nr_samples) { + case 8: + mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS8; + mt->ms_x = 2; + mt->ms_y = 1; + break; + case 4: + mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS4; + mt->ms_x = 1; + mt->ms_y = 1; + break; + case 2: + mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS2; + mt->ms_x = 1; + break; + case 1: + case 0: + mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; + break; + default: + NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples); + return FALSE; + } + return TRUE; +} + +static void +nvc0_miptree_init_layout_video(struct nv50_miptree *mt) +{ + const struct pipe_resource *pt = &mt->base.base; + const unsigned blocksize = util_format_get_blocksize(pt->format); + + assert(pt->last_level == 0); + assert(mt->ms_x == 0 && mt->ms_y == 0); + assert(!util_format_is_compressed(pt->format)); + + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; + + mt->level[0].tile_mode = 0x10; + mt->level[0].pitch = align(pt->width0 * blocksize, 64); + mt->total_size = align(pt->height0, 16) * mt->level[0].pitch * (mt->layout_3d ? pt->depth0 : 1); + + if (pt->array_size > 1) { + mt->layer_stride = align(mt->total_size, NVC0_TILE_SIZE(0x10)); + mt->total_size = mt->layer_stride * pt->array_size; + } +} + +static void +nvc0_miptree_init_layout_tiled(struct nv50_miptree *mt) +{ + struct pipe_resource *pt = &mt->base.base; + unsigned w, h, d, l; + const unsigned blocksize = util_format_get_blocksize(pt->format); + + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; + + w = pt->width0 << mt->ms_x; + h = pt->height0 << mt->ms_y; + + /* For 3D textures, a mipmap is spanned by all the layers, for array + * textures and cube maps, each layer contains its own mipmaps. + */ + d = mt->layout_3d ? pt->depth0 : 1; + + assert(!mt->ms_mode || !pt->last_level); + + for (l = 0; l <= pt->last_level; ++l) { + struct nv50_miptree_level *lvl = &mt->level[l]; + unsigned tsx, tsy, tsz; + unsigned nbx = util_format_get_nblocksx(pt->format, w); + unsigned nby = util_format_get_nblocksy(pt->format, h); + + lvl->offset = mt->total_size; + + lvl->tile_mode = nvc0_tex_choose_tile_dims(nbx, nby, d); + + tsx = NVC0_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */ + tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode); + tsz = NVC0_TILE_SIZE_Z(lvl->tile_mode); + + lvl->pitch = align(nbx * blocksize, tsx); + + mt->total_size += lvl->pitch * align(nby, tsy) * align(d, tsz); + + w = u_minify(w, 1); + h = u_minify(h, 1); + d = u_minify(d, 1); + } + + if (pt->array_size > 1) { + mt->layer_stride = align(mt->total_size, + NVC0_TILE_SIZE(mt->level[0].tile_mode)); + mt->total_size = mt->layer_stride * pt->array_size; + } +} + +const struct u_resource_vtbl nvc0_miptree_vtbl = +{ + nv50_miptree_get_handle, /* get_handle */ + nv50_miptree_destroy, /* resource_destroy */ + nvc0_miptree_transfer_map, /* transfer_map */ + u_default_transfer_flush_region, /* transfer_flush_region */ + nvc0_miptree_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + +struct pipe_resource * +nvc0_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ) +{ + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); + struct pipe_resource *pt = &mt->base.base; + boolean compressed = dev->drm_version >= 0x01000101; + int ret; + union nouveau_bo_config bo_config; + uint32_t bo_flags; + + if (!mt) + return NULL; + + mt->base.vtbl = &nvc0_miptree_vtbl; + *pt = *templ; + pipe_reference_init(&pt->reference, 1); + pt->screen = pscreen; + + if (pt->usage == PIPE_USAGE_STAGING) { + switch (pt->target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + if (pt->last_level == 0 && + !util_format_is_depth_or_stencil(pt->format) && + pt->nr_samples <= 1) + pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR; + break; + default: + break; + } + } + + if (pt->bind & PIPE_BIND_LINEAR) + pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR; + + bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed); + + if (!nvc0_miptree_init_ms_mode(mt)) { + FREE(mt); + return NULL; + } + + if (unlikely(pt->flags & NVC0_RESOURCE_FLAG_VIDEO)) { + nvc0_miptree_init_layout_video(mt); + } else + if (likely(bo_config.nvc0.memtype)) { + nvc0_miptree_init_layout_tiled(mt); + } else + if (!nv50_miptree_init_layout_linear(mt, 128)) { + FREE(mt); + return NULL; + } + bo_config.nvc0.tile_mode = mt->level[0].tile_mode; + + if (!bo_config.nvc0.memtype && pt->usage == PIPE_USAGE_STAGING) + mt->base.domain = NOUVEAU_BO_GART; + else + mt->base.domain = NOUVEAU_BO_VRAM; + + bo_flags = mt->base.domain | NOUVEAU_BO_NOSNOOP; + + if (mt->base.base.bind & (PIPE_BIND_CURSOR | PIPE_BIND_DISPLAY_TARGET)) + bo_flags |= NOUVEAU_BO_CONTIG; + + ret = nouveau_bo_new(dev, bo_flags, 4096, mt->total_size, &bo_config, + &mt->base.bo); + if (ret) { + FREE(mt); + return NULL; + } + mt->base.address = mt->base.bo->offset; + + NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_count, 1); + NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_bytes, + mt->total_size); + + return pt; +} + +/* Offset of zslice @z from start of level @l. */ +INLINE unsigned +nvc0_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z) +{ + const struct pipe_resource *pt = &mt->base.base; + + unsigned tds = NVC0_TILE_SHIFT_Z(mt->level[l].tile_mode); + unsigned ths = NVC0_TILE_SHIFT_Y(mt->level[l].tile_mode); + + unsigned nby = util_format_get_nblocksy(pt->format, + u_minify(pt->height0, l)); + + /* to next 2D tile slice within a 3D tile */ + unsigned stride_2d = NVC0_TILE_SIZE_2D(mt->level[l].tile_mode); + + /* to slice in the next (in z direction) 3D tile */ + unsigned stride_3d = (align(nby, (1 << ths)) * mt->level[l].pitch) << tds; + + return (z & (1 << (tds - 1))) * stride_2d + (z >> tds) * stride_3d; +} + +/* Surface functions. + */ + +struct pipe_surface * +nvc0_miptree_surface_new(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *templ) +{ + struct nv50_surface *ns = nv50_surface_from_miptree(nv50_miptree(pt), templ); + if (!ns) + return NULL; + ns->base.context = pipe; + return &ns->base; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c new file mode 100644 index 00000000000..71deb3485d5 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -0,0 +1,811 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" + +#include "nvc0/nvc0_context.h" + +#include "codegen/nv50_ir_driver.h" +#include "nvc0/nve4_compute.h" + +/* NOTE: Using a[0x270] in FP may cause an error even if we're using less than + * 124 scalar varying values. + */ +static uint32_t +nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase) +{ + switch (sn) { + case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; + case TGSI_SEMANTIC_PRIMID: return 0x060; + case TGSI_SEMANTIC_PSIZE: return 0x06c; + case TGSI_SEMANTIC_POSITION: return 0x070; + case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; + case TGSI_SEMANTIC_FOG: return 0x2e8; + case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; + case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; + case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4; + case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; + case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; + case TGSI_SEMANTIC_PCOORD: return 0x2e0; + case NV50_SEMANTIC_TESSCOORD: return 0x2f0; + case TGSI_SEMANTIC_INSTANCEID: return 0x2f8; + case TGSI_SEMANTIC_VERTEXID: return 0x2fc; + case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; + case TGSI_SEMANTIC_FACE: return 0x3fc; + case NV50_SEMANTIC_INVOCATIONID: return ~0; + default: + assert(!"invalid TGSI input semantic"); + return ~0; + } +} + +static uint32_t +nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase) +{ + switch (sn) { + case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; + case TGSI_SEMANTIC_PRIMID: return 0x060; + case NV50_SEMANTIC_LAYER: return 0x064; + case NV50_SEMANTIC_VIEWPORTINDEX: return 0x068; + case TGSI_SEMANTIC_PSIZE: return 0x06c; + case TGSI_SEMANTIC_POSITION: return 0x070; + case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; + case TGSI_SEMANTIC_FOG: return 0x2e8; + case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; + case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; + case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4; + case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; + case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; + case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; + case TGSI_SEMANTIC_EDGEFLAG: return ~0; + default: + assert(!"invalid TGSI output semantic"); + return ~0; + } +} + +static int +nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info) +{ + unsigned i, c, n; + + for (n = 0, i = 0; i < info->numInputs; ++i) { + switch (info->in[i].sn) { + case TGSI_SEMANTIC_INSTANCEID: /* for SM4 only, in TGSI they're SVs */ + case TGSI_SEMANTIC_VERTEXID: + info->in[i].mask = 0x1; + info->in[i].slot[0] = + nvc0_shader_input_address(info->in[i].sn, 0, 0) / 4; + continue; + default: + break; + } + for (c = 0; c < 4; ++c) + info->in[i].slot[c] = (0x80 + n * 0x10 + c * 0x4) / 4; + ++n; + } + + return 0; +} + +static int +nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info) +{ + unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10); + unsigned offset; + unsigned i, c; + + for (i = 0; i < info->numInputs; ++i) { + offset = nvc0_shader_input_address(info->in[i].sn, + info->in[i].si, ubase); + if (info->in[i].patch && offset >= 0x20) + offset = 0x20 + info->in[i].si * 0x10; + + if (info->in[i].sn == NV50_SEMANTIC_TESSCOORD) + info->in[i].mask &= 3; + + for (c = 0; c < 4; ++c) + info->in[i].slot[c] = (offset + c * 0x4) / 4; + } + + return 0; +} + +static int +nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) +{ + unsigned count = info->prop.fp.numColourResults * 4; + unsigned i, c; + + for (i = 0; i < info->numOutputs; ++i) + if (info->out[i].sn == TGSI_SEMANTIC_COLOR) + for (c = 0; c < 4; ++c) + info->out[i].slot[c] = info->out[i].si * 4 + c; + + if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) + info->out[info->io.sampleMask].slot[0] = count++; + else + if (info->target >= 0xe0) + count++; /* on Kepler, depth is always last colour reg + 2 */ + + if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) + info->out[info->io.fragDepth].slot[2] = count; + + return 0; +} + +static int +nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info) +{ + unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10); + unsigned offset; + unsigned i, c; + + for (i = 0; i < info->numOutputs; ++i) { + offset = nvc0_shader_output_address(info->out[i].sn, + info->out[i].si, ubase); + if (info->out[i].patch && offset >= 0x20) + offset = 0x20 + info->out[i].si * 0x10; + + for (c = 0; c < 4; ++c) + info->out[i].slot[c] = (offset + c * 0x4) / 4; + } + + return 0; +} + +static int +nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info) +{ + int ret; + + if (info->type == PIPE_SHADER_VERTEX) + ret = nvc0_vp_assign_input_slots(info); + else + ret = nvc0_sp_assign_input_slots(info); + if (ret) + return ret; + + if (info->type == PIPE_SHADER_FRAGMENT) + ret = nvc0_fp_assign_output_slots(info); + else + ret = nvc0_sp_assign_output_slots(info); + return ret; +} + +static INLINE void +nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot) +{ + uint8_t min = (vp->hdr[4] >> 12) & 0xff; + uint8_t max = (vp->hdr[4] >> 24); + + min = MIN2(min, slot); + max = MAX2(max, slot); + + vp->hdr[4] = (max << 24) | (min << 12); +} + +/* Common part of header generation for VP, TCP, TEP and GP. */ +static int +nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) +{ + unsigned i, c, a; + + for (i = 0; i < info->numInputs; ++i) { + if (info->in[i].patch) + continue; + for (c = 0; c < 4; ++c) { + a = info->in[i].slot[c]; + if (info->in[i].mask & (1 << c)) { + if (info->in[i].sn != NV50_SEMANTIC_TESSCOORD) + vp->hdr[5 + a / 32] |= 1 << (a % 32); + else + nvc0_vtgp_hdr_update_oread(vp, info->in[i].slot[c]); + } + } + } + + for (i = 0; i < info->numOutputs; ++i) { + if (info->out[i].patch) + continue; + for (c = 0; c < 4; ++c) { + if (!(info->out[i].mask & (1 << c))) + continue; + assert(info->out[i].slot[c] >= 0x40 / 4); + a = info->out[i].slot[c] - 0x40 / 4; + vp->hdr[13 + a / 32] |= 1 << (a % 32); + if (info->out[i].oread) + nvc0_vtgp_hdr_update_oread(vp, info->out[i].slot[c]); + } + } + + for (i = 0; i < info->numSysVals; ++i) { + switch (info->sv[i].sn) { + case TGSI_SEMANTIC_PRIMID: + vp->hdr[5] |= 1 << 24; + break; + case TGSI_SEMANTIC_INSTANCEID: + vp->hdr[10] |= 1 << 30; + break; + case TGSI_SEMANTIC_VERTEXID: + vp->hdr[10] |= 1 << 31; + break; + default: + break; + } + } + + vp->vp.clip_enable = info->io.clipDistanceMask; + for (i = 0; i < 8; ++i) + if (info->io.cullDistanceMask & (1 << i)) + vp->vp.clip_mode |= 1 << (i * 4); + + if (info->io.genUserClip < 0) + vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */ + + return 0; +} + +static int +nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) +{ + vp->hdr[0] = 0x20061 | (1 << 10); + vp->hdr[4] = 0xff000; + + vp->hdr[18] = info->io.clipDistanceMask; + + return nvc0_vtgp_gen_header(vp, info); +} + +#if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN) +static void +nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info) +{ + if (info->prop.tp.outputPrim == PIPE_PRIM_MAX) { + tp->tp.tess_mode = ~0; + return; + } + switch (info->prop.tp.domain) { + case PIPE_PRIM_LINES: + tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_ISOLINES; + break; + case PIPE_PRIM_TRIANGLES: + tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES; + if (info->prop.tp.winding > 0) + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW; + break; + case PIPE_PRIM_QUADS: + tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS; + break; + default: + tp->tp.tess_mode = ~0; + return; + } + if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS) + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED; + + switch (info->prop.tp.partitioning) { + case PIPE_TESS_PART_INTEGER: + case PIPE_TESS_PART_POW2: + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL; + break; + case PIPE_TESS_PART_FRACT_ODD: + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD; + break; + case PIPE_TESS_PART_FRACT_EVEN: + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN; + break; + default: + assert(!"invalid tessellator partitioning"); + break; + } +} +#endif + +#ifdef PIPE_SHADER_HULL +static int +nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info) +{ + unsigned opcs = 6; /* output patch constants (at least the TessFactors) */ + + tcp->tp.input_patch_size = info->prop.tp.inputPatchSize; + + if (info->numPatchConstants) + opcs = 8 + info->numPatchConstants * 4; + + tcp->hdr[0] = 0x20061 | (2 << 10); + + tcp->hdr[1] = opcs << 24; + tcp->hdr[2] = info->prop.tp.outputPatchSize << 24; + + tcp->hdr[4] = 0xff000; /* initial min/max parallel output read address */ + + nvc0_vtgp_gen_header(tcp, info); + + nvc0_tp_get_tess_mode(tcp, info); + + return 0; +} +#endif + +#ifdef PIPE_SHADER_DOMAIN +static int +nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info) +{ + tep->tp.input_patch_size = ~0; + + tep->hdr[0] = 0x20061 | (3 << 10); + tep->hdr[4] = 0xff000; + + nvc0_vtgp_gen_header(tep, info); + + nvc0_tp_get_tess_mode(tep, info); + + tep->hdr[18] |= 0x3 << 12; /* ? */ + + return 0; +} +#endif + +static int +nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info) +{ + gp->hdr[0] = 0x20061 | (4 << 10); + + gp->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24; + + switch (info->prop.gp.outputPrim) { + case PIPE_PRIM_POINTS: + gp->hdr[3] = 0x01000000; + gp->hdr[0] |= 0xf0000000; + break; + case PIPE_PRIM_LINE_STRIP: + gp->hdr[3] = 0x06000000; + gp->hdr[0] |= 0x10000000; + break; + case PIPE_PRIM_TRIANGLE_STRIP: + gp->hdr[3] = 0x07000000; + gp->hdr[0] |= 0x10000000; + break; + default: + assert(0); + break; + } + + gp->hdr[4] = info->prop.gp.maxVertices & 0x1ff; + + return nvc0_vtgp_gen_header(gp, info); +} + +#define NVC0_INTERP_FLAT (1 << 0) +#define NVC0_INTERP_PERSPECTIVE (2 << 0) +#define NVC0_INTERP_LINEAR (3 << 0) +#define NVC0_INTERP_CENTROID (1 << 2) + +static uint8_t +nvc0_hdr_interp_mode(const struct nv50_ir_varying *var) +{ + if (var->linear) + return NVC0_INTERP_LINEAR; + if (var->flat) + return NVC0_INTERP_FLAT; + return NVC0_INTERP_PERSPECTIVE; +} + +static int +nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) +{ + unsigned i, c, a, m; + + /* just 00062 on Kepler */ + fp->hdr[0] = 0x20062 | (5 << 10); + fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ + + if (info->prop.fp.usesDiscard) + fp->hdr[0] |= 0x8000; + if (info->prop.fp.numColourResults > 1) + fp->hdr[0] |= 0x4000; + if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) + fp->hdr[19] |= 0x1; + if (info->prop.fp.writesDepth) { + fp->hdr[19] |= 0x2; + fp->flags[0] = 0x11; /* deactivate ZCULL */ + } + + for (i = 0; i < info->numInputs; ++i) { + m = nvc0_hdr_interp_mode(&info->in[i]); + for (c = 0; c < 4; ++c) { + if (!(info->in[i].mask & (1 << c))) + continue; + a = info->in[i].slot[c]; + if (info->in[i].slot[0] >= (0x060 / 4) && + info->in[i].slot[0] <= (0x07c / 4)) { + fp->hdr[5] |= 1 << (24 + (a - 0x060 / 4)); + } else + if (info->in[i].slot[0] >= (0x2c0 / 4) && + info->in[i].slot[0] <= (0x2fc / 4)) { + fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000; + } else { + if (info->in[i].slot[c] < (0x040 / 4) || + info->in[i].slot[c] > (0x380 / 4)) + continue; + a *= 2; + if (info->in[i].slot[0] >= (0x300 / 4)) + a -= 32; + fp->hdr[4 + a / 32] |= m << (a % 32); + } + } + } + + for (i = 0; i < info->numOutputs; ++i) { + if (info->out[i].sn == TGSI_SEMANTIC_COLOR) + fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0]; + } + + fp->fp.early_z = info->prop.fp.earlyFragTests; + + return 0; +} + +static struct nvc0_transform_feedback_state * +nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info, + const struct pipe_stream_output_info *pso) +{ + struct nvc0_transform_feedback_state *tfb; + unsigned b, i, c; + + tfb = MALLOC_STRUCT(nvc0_transform_feedback_state); + if (!tfb) + return NULL; + for (b = 0; b < 4; ++b) { + tfb->stride[b] = pso->stride[b] * 4; + tfb->varying_count[b] = 0; + } + memset(tfb->varying_index, 0xff, sizeof(tfb->varying_index)); /* = skip */ + + for (i = 0; i < pso->num_outputs; ++i) { + unsigned s = pso->output[i].start_component; + unsigned p = pso->output[i].dst_offset; + b = pso->output[i].output_buffer; + + for (c = 0; c < pso->output[i].num_components; ++c) + tfb->varying_index[b][p++] = + info->out[pso->output[i].register_index].slot[s + c]; + + tfb->varying_count[b] = MAX2(tfb->varying_count[b], p); + } + for (b = 0; b < 4; ++b) // zero unused indices (looks nicer) + for (c = tfb->varying_count[b]; c & 3; ++c) + tfb->varying_index[b][c] = 0; + + return tfb; +} + +#ifdef DEBUG +static void +nvc0_program_dump(struct nvc0_program *prog) +{ + unsigned pos; + + if (prog->type != PIPE_SHADER_COMPUTE) { + for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos) + debug_printf("HDR[%02lx] = 0x%08x\n", + pos * sizeof(prog->hdr[0]), prog->hdr[pos]); + } + debug_printf("shader binary code (0x%x bytes):", prog->code_size); + for (pos = 0; pos < prog->code_size / 4; ++pos) { + if ((pos % 8) == 0) + debug_printf("\n"); + debug_printf("%08x ", prog->code[pos]); + } + debug_printf("\n"); +} +#endif + +boolean +nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) +{ + struct nv50_ir_prog_info *info; + int ret; + + info = CALLOC_STRUCT(nv50_ir_prog_info); + if (!info) + return FALSE; + + info->type = prog->type; + info->target = chipset; + info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; + info->bin.source = (void *)prog->pipe.tokens; + + info->io.genUserClip = prog->vp.num_ucps; + info->io.ucpBase = 256; + info->io.ucpCBSlot = 15; + + if (prog->type == PIPE_SHADER_COMPUTE) { + if (chipset >= NVISA_GK104_CHIPSET) { + info->io.resInfoCBSlot = 0; + info->io.texBindBase = NVE4_CP_INPUT_TEX(0); + info->io.suInfoBase = NVE4_CP_INPUT_SUF(0); + info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0); + } + info->io.msInfoCBSlot = 0; + info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS; + } else { + if (chipset >= NVISA_GK104_CHIPSET) { + info->io.resInfoCBSlot = 15; + info->io.texBindBase = 0x20; + info->io.suInfoBase = 0; /* TODO */ + } + info->io.msInfoCBSlot = 15; + info->io.msInfoBase = 0; /* TODO */ + } + + info->assignSlots = nvc0_program_assign_varying_slots; + +#ifdef DEBUG + info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); + info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); +#else + info->optLevel = 3; +#endif + + ret = nv50_ir_generate_code(info); + if (ret) { + NOUVEAU_ERR("shader translation failed: %i\n", ret); + goto out; + } + if (prog->type != PIPE_SHADER_COMPUTE) + FREE(info->bin.syms); + + prog->code = info->bin.code; + prog->code_size = info->bin.codeSize; + prog->immd_data = info->immd.buf; + prog->immd_size = info->immd.bufSize; + prog->relocs = info->bin.relocData; + prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); + prog->num_barriers = info->numBarriers; + + prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; + + if (info->io.edgeFlagOut < PIPE_MAX_ATTRIBS) + info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */ + prog->vp.edgeflag = info->io.edgeFlagIn; + + switch (prog->type) { + case PIPE_SHADER_VERTEX: + ret = nvc0_vp_gen_header(prog, info); + break; +#ifdef PIPE_SHADER_HULL + case PIPE_SHADER_HULL: + ret = nvc0_tcp_gen_header(prog, info); + break; +#endif +#ifdef PIPE_SHADER_DOMAIN + case PIPE_SHADER_DOMAIN: + ret = nvc0_tep_gen_header(prog, info); + break; +#endif + case PIPE_SHADER_GEOMETRY: + ret = nvc0_gp_gen_header(prog, info); + break; + case PIPE_SHADER_FRAGMENT: + ret = nvc0_fp_gen_header(prog, info); + break; + case PIPE_SHADER_COMPUTE: + prog->cp.syms = info->bin.syms; + prog->cp.num_syms = info->bin.numSyms; + break; + default: + ret = -1; + NOUVEAU_ERR("unknown program type: %u\n", prog->type); + break; + } + if (ret) + goto out; + + if (info->bin.tlsSpace) { + assert(info->bin.tlsSpace < (1 << 24)); + prog->hdr[0] |= 1 << 26; + prog->hdr[1] |= info->bin.tlsSpace; /* l[] size */ + prog->need_tls = TRUE; + } + /* TODO: factor 2 only needed where joinat/precont is used, + * and we only have to count non-uniform branches + */ + /* + if ((info->maxCFDepth * 2) > 16) { + prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200; + prog->need_tls = TRUE; + } + */ + if (info->io.globalAccess) + prog->hdr[0] |= 1 << 16; + + if (prog->pipe.stream_output.num_outputs) + prog->tfb = nvc0_program_create_tfb_state(info, + &prog->pipe.stream_output); + +out: + FREE(info); + return !ret; +} + +boolean +nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ + struct nvc0_screen *screen = nvc0->screen; + const boolean is_cp = prog->type == PIPE_SHADER_COMPUTE; + int ret; + uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); + uint32_t lib_pos = screen->lib_code->start; + uint32_t code_pos; + + /* c[] bindings need to be aligned to 0x100, but we could use relocations + * to save space. */ + if (prog->immd_size) { + prog->immd_base = size; + size = align(size, 0x40); + size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */ + } + /* On Fermi, SP_START_ID must be aligned to 0x40. + * On Kepler, the first instruction must be aligned to 0x80 because + * latency information is expected only at certain positions. + */ + if (screen->base.class_3d >= NVE4_3D_CLASS) + size = size + (is_cp ? 0x40 : 0x70); + size = align(size, 0x40); + + ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem); + if (ret) { + struct nouveau_heap *heap = screen->text_heap; + struct nouveau_heap *iter; + for (iter = heap; iter && iter->next != heap; iter = iter->next) { + struct nvc0_program *evict = iter->priv; + if (evict) + nouveau_heap_free(&evict->mem); + } + debug_printf("WARNING: out of code space, evicting all shaders.\n"); + ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); + if (ret) { + NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); + return FALSE; + } + IMMED_NVC0(nvc0->base.pushbuf, NVC0_3D(SERIALIZE), 0); + } + prog->code_base = prog->mem->start; + prog->immd_base = align(prog->mem->start + prog->immd_base, 0x100); + assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <= + prog->mem->start + prog->mem->size)); + + if (!is_cp) { + if (screen->base.class_3d >= NVE4_3D_CLASS) { + switch (prog->mem->start & 0xff) { + case 0x40: prog->code_base += 0x70; break; + case 0x80: prog->code_base += 0x30; break; + case 0xc0: prog->code_base += 0x70; break; + default: + prog->code_base += 0x30; + assert((prog->mem->start & 0xff) == 0x00); + break; + } + } + code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE; + } else { + if (screen->base.class_3d >= NVE4_3D_CLASS) { + if (prog->mem->start & 0x40) + prog->code_base += 0x40; + assert((prog->code_base & 0x7f) == 0x00); + } + code_pos = prog->code_base; + } + + if (prog->relocs) + nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0); + +#ifdef DEBUG + if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE)) + nvc0_program_dump(prog); +#endif + + if (!is_cp) + nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base, + NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); + nvc0->base.push_data(&nvc0->base, screen->text, code_pos, + NOUVEAU_BO_VRAM, prog->code_size, prog->code); + if (prog->immd_size) + nvc0->base.push_data(&nvc0->base, + screen->text, prog->immd_base, NOUVEAU_BO_VRAM, + prog->immd_size, prog->immd_data); + + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1); + PUSH_DATA (nvc0->base.pushbuf, 0x1011); + + return TRUE; +} + +/* Upload code for builtin functions like integer division emulation. */ +void +nvc0_program_library_upload(struct nvc0_context *nvc0) +{ + struct nvc0_screen *screen = nvc0->screen; + int ret; + uint32_t size; + const uint32_t *code; + + if (screen->lib_code) + return; + + nv50_ir_get_target_library(screen->base.device->chipset, &code, &size); + if (!size) + return; + + ret = nouveau_heap_alloc(screen->text_heap, align(size, 0x100), NULL, + &screen->lib_code); + if (ret) + return; + + nvc0->base.push_data(&nvc0->base, + screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, + size, code); + /* no need for a memory barrier, will be emitted with first program */ +} + +void +nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ + const struct pipe_shader_state pipe = prog->pipe; + const ubyte type = prog->type; + + if (prog->mem) + nouveau_heap_free(&prog->mem); + if (prog->code) + FREE(prog->code); /* may be 0 for hardcoded shaders */ + FREE(prog->immd_data); + FREE(prog->relocs); + if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms) + FREE(prog->cp.syms); + if (prog->tfb) { + if (nvc0->state.tfb == prog->tfb) + nvc0->state.tfb = NULL; + FREE(prog->tfb); + } + + memset(prog, 0, sizeof(*prog)); + + prog->pipe = pipe; + prog->type = type; +} + +uint32_t +nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label) +{ + const struct nv50_ir_prog_symbol *syms = + (const struct nv50_ir_prog_symbol *)prog->cp.syms; + unsigned base = 0; + unsigned i; + if (prog->type != PIPE_SHADER_COMPUTE) + base = NVC0_SHADER_HEADER_SIZE; + for (i = 0; i < prog->cp.num_syms; ++i) + if (syms[i].label == label) + return prog->code_base + base + syms[i].offset; + return prog->code_base; /* no symbols or symbol not found */ +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h new file mode 100644 index 00000000000..9c184d1f1d5 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h @@ -0,0 +1,68 @@ + +#ifndef __NVC0_PROGRAM_H__ +#define __NVC0_PROGRAM_H__ + +#include "pipe/p_state.h" + +#define NVC0_CAP_MAX_PROGRAM_TEMPS 128 + + +struct nvc0_transform_feedback_state { + uint32_t stride[4]; + uint8_t varying_count[4]; + uint8_t varying_index[4][128]; +}; + + +#define NVC0_SHADER_HEADER_SIZE (20 * 4) + +struct nvc0_program { + struct pipe_shader_state pipe; + + ubyte type; + boolean translated; + boolean need_tls; + uint8_t num_gprs; + + uint32_t *code; + uint32_t *immd_data; + unsigned code_base; + unsigned code_size; + unsigned immd_base; + unsigned immd_size; /* size of immediate array data */ + unsigned parm_size; /* size of non-bindable uniforms (c0[]) */ + + uint32_t hdr[20]; + uint32_t flags[2]; + + struct { + uint32_t clip_mode; /* clip/cull selection */ + uint8_t clip_enable; /* mask of defined clip planes */ + uint8_t num_ucps; /* also set to max if ClipDistance is used */ + uint8_t edgeflag; /* attribute index of edgeflag input */ + boolean need_vertex_id; + } vp; + struct { + uint8_t early_z; + uint8_t in_pos[PIPE_MAX_SHADER_INPUTS]; + } fp; + struct { + uint32_t tess_mode; /* ~0 if defined by the other stage */ + uint32_t input_patch_size; + } tp; + struct { + uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */ + uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */ + void *syms; + unsigned num_syms; + } cp; + uint8_t num_barriers; + + void *relocs; + + struct nvc0_transform_feedback_state *tfb; + + struct nouveau_heap *mem; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_push.c b/src/gallium/drivers/nouveau/nvc0/nvc0_push.c new file mode 100644 index 00000000000..15e8be6968d --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_push.c @@ -0,0 +1,409 @@ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_resource.h" + +#include "nvc0/nvc0_3d.xml.h" + +struct push_context { + struct nouveau_pushbuf *push; + + void *idxbuf; + + uint32_t vertex_words; + uint32_t packet_vertex_limit; + + struct translate *translate; + + boolean primitive_restart; + boolean need_vertex_id; + uint32_t prim; + uint32_t restart_index; + uint32_t instance_id; + + struct { + int buffer; + float value; + uint8_t *data; + unsigned offset; + unsigned stride; + } edgeflag; +}; + +static void +init_push_context(struct nvc0_context *nvc0, struct push_context *ctx) +{ + struct pipe_vertex_element *ve; + + ctx->push = nvc0->base.pushbuf; + ctx->translate = nvc0->vertex->translate; + + if (likely(nvc0->vertex->num_elements < 32)) + ctx->need_vertex_id = nvc0->vertprog->vp.need_vertex_id; + else + ctx->need_vertex_id = FALSE; + + ctx->edgeflag.buffer = -1; + ctx->edgeflag.value = 0.5f; + + if (unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) { + ve = &nvc0->vertex->element[nvc0->vertprog->vp.edgeflag].pipe; + ctx->edgeflag.buffer = ve->vertex_buffer_index; + ctx->edgeflag.offset = ve->src_offset; + ctx->packet_vertex_limit = 1; + } else { + ctx->packet_vertex_limit = nvc0->vertex->vtx_per_packet_max; + if (unlikely(ctx->need_vertex_id)) + ctx->packet_vertex_limit = 1; + } + + ctx->vertex_words = nvc0->vertex->vtx_size; +} + +static INLINE void +set_edgeflag(struct push_context *ctx, unsigned vtx_id) +{ + float f = *(float *)(ctx->edgeflag.data + vtx_id * ctx->edgeflag.stride); + + if (ctx->edgeflag.value != f) { + ctx->edgeflag.value = f; + IMMED_NVC0(ctx->push, NVC0_3D(EDGEFLAG), f ? 1 : 0); + } +} + +static INLINE void +set_vertexid(struct push_context *ctx, uint32_t vtx_id) +{ +#if 0 + BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_ID), 1); /* broken on nvc0 */ +#else + BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_DATA), 1); /* as last attribute */ +#endif + PUSH_DATA (ctx->push, vtx_id); +} + +static INLINE unsigned +prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static INLINE unsigned +prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index) +{ + unsigned i; + for (i = 0; i < push; ++i) + if (elts[i] == index) + break; + return i; +} + +static void +emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) +{ + uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i08(elts, push, ctx->restart_index); + + if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr)) + set_edgeflag(ctx, elts[0]); + + size = ctx->vertex_words * nr; + + BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size); + + ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id, + ctx->push->cur); + ctx->push->cur += size; + + if (unlikely(ctx->need_vertex_id) && likely(size)) + set_vertexid(ctx, elts[0]); + + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_END_GL), 2); + PUSH_DATA (ctx->push, 0); + PUSH_DATA (ctx->push, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); + } + } +} + +static void +emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) +{ + uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i16(elts, push, ctx->restart_index); + + if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr)) + set_edgeflag(ctx, elts[0]); + + size = ctx->vertex_words * nr; + + BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size); + + ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id, + ctx->push->cur); + ctx->push->cur += size; + + if (unlikely(ctx->need_vertex_id)) + set_vertexid(ctx, elts[0]); + + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_END_GL), 2); + PUSH_DATA (ctx->push, 0); + PUSH_DATA (ctx->push, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); + } + } +} + +static void +emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) +{ + uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start; + + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size, nr; + + nr = push; + if (ctx->primitive_restart) + nr = prim_restart_search_i32(elts, push, ctx->restart_index); + + if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr)) + set_edgeflag(ctx, elts[0]); + + size = ctx->vertex_words * nr; + + BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size); + + ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id, + ctx->push->cur); + ctx->push->cur += size; + + if (unlikely(ctx->need_vertex_id)) + set_vertexid(ctx, elts[0]); + + count -= nr; + elts += nr; + + if (nr != push) { + count--; + elts++; + BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_END_GL), 2); + PUSH_DATA (ctx->push, 0); + PUSH_DATA (ctx->push, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT | + (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT)); + } + } +} + +static void +emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) +{ + while (count) { + unsigned push = MIN2(count, ctx->packet_vertex_limit); + unsigned size = ctx->vertex_words * push; + + if (unlikely(ctx->edgeflag.buffer >= 0)) + set_edgeflag(ctx, start); + + BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size); + + ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id, + ctx->push->cur); + ctx->push->cur += size; + + if (unlikely(ctx->need_vertex_id)) + set_vertexid(ctx, start); + + count -= push; + start += push; + } +} + + +#define NVC0_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ + switch (prim) { + NVC0_PRIM_GL_CASE(POINTS); + NVC0_PRIM_GL_CASE(LINES); + NVC0_PRIM_GL_CASE(LINE_LOOP); + NVC0_PRIM_GL_CASE(LINE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLES); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLE_FAN); + NVC0_PRIM_GL_CASE(QUADS); + NVC0_PRIM_GL_CASE(QUAD_STRIP); + NVC0_PRIM_GL_CASE(POLYGON); + NVC0_PRIM_GL_CASE(LINES_ADJACENCY); + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NVC0_PRIM_GL_CASE(PATCHES); */ + default: + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + break; + } +} + +void +nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) +{ + struct push_context ctx; + unsigned i, index_size; + unsigned inst_count = info->instance_count; + unsigned vert_count = info->count; + boolean apply_bias = info->indexed && info->index_bias; + + init_push_context(nvc0, &ctx); + + for (i = 0; i < nvc0->num_vtxbufs; ++i) { + uint8_t *data; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; + struct nv04_resource *res = nv04_resource(vb->buffer); + + data = nouveau_resource_map_offset(&nvc0->base, res, + vb->buffer_offset, NOUVEAU_BO_RD); + + if (apply_bias && likely(!(nvc0->vertex->instance_bufs & (1 << i)))) + data += info->index_bias * vb->stride; + + ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); + + if (unlikely(i == ctx.edgeflag.buffer)) { + ctx.edgeflag.data = data + ctx.edgeflag.offset; + ctx.edgeflag.stride = vb->stride; + } + } + + if (info->indexed) { + ctx.idxbuf = + nouveau_resource_map_offset(&nvc0->base, + nv04_resource(nvc0->idxbuf.buffer), + nvc0->idxbuf.offset, NOUVEAU_BO_RD); + if (!ctx.idxbuf) + return; + index_size = nvc0->idxbuf.index_size; + ctx.primitive_restart = info->primitive_restart; + ctx.restart_index = info->restart_index; + } else { + ctx.idxbuf = NULL; + index_size = 0; + ctx.primitive_restart = FALSE; + ctx.restart_index = 0; + + if (info->count_from_stream_output) { + struct pipe_context *pipe = &nvc0->base.pipe; + struct nvc0_so_target *targ; + targ = nvc0_so_target(info->count_from_stream_output); + pipe->get_query_result(pipe, targ->pq, TRUE, (void*)&vert_count); + vert_count /= targ->stride; + } + } + + ctx.instance_id = info->start_instance; + ctx.prim = nvc0_prim_gl(info->mode); + + if (unlikely(ctx.need_vertex_id)) { + const unsigned a = nvc0->vertex->num_elements; + BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1); + PUSH_DATA (ctx.push, (a << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) | + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); + BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 1); + PUSH_DATA (ctx.push, (((0x80 + a * 0x10) / 4) << 4) | 1); + } + + while (inst_count--) { + BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (ctx.push, ctx.prim); + switch (index_size) { + case 0: + emit_vertices_seq(&ctx, info->start, vert_count); + break; + case 1: + emit_vertices_i08(&ctx, info->start, vert_count); + break; + case 2: + emit_vertices_i16(&ctx, info->start, vert_count); + break; + case 4: + emit_vertices_i32(&ctx, info->start, vert_count); + break; + default: + assert(0); + break; + } + IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0); + + ctx.instance_id++; + ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + + if (unlikely(ctx.edgeflag.value == 0.0f)) + IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1); + + if (unlikely(ctx.need_vertex_id)) { + const unsigned a = nvc0->vertex->num_elements; + IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0); + BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1); + PUSH_DATA (ctx.push, + NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST | + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); + } + + if (info->indexed) + nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer)); + + for (i = 0; i < nvc0->num_vtxbufs; ++i) + nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer)); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c new file mode 100644 index 00000000000..21aa3580e7c --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -0,0 +1,1448 @@ +/* + * Copyright 2011 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christoph Bumiller + */ + +#define NVC0_PUSH_EXPLICIT_SPACE_CHECKING + +#include "nvc0/nvc0_context.h" +#include "nv_object.xml.h" +#include "nvc0/nve4_compute.xml.h" +#include "nvc0/nvc0_compute.xml.h" + +#define NVC0_QUERY_STATE_READY 0 +#define NVC0_QUERY_STATE_ACTIVE 1 +#define NVC0_QUERY_STATE_ENDED 2 +#define NVC0_QUERY_STATE_FLUSHED 3 + +struct nvc0_query { + uint32_t *data; + uint16_t type; + uint16_t index; + int8_t ctr[4]; + uint32_t sequence; + struct nouveau_bo *bo; + uint32_t base; + uint32_t offset; /* base + i * rotate */ + uint8_t state; + boolean is64bit; + uint8_t rotate; + int nesting; /* only used for occlusion queries */ + union { + struct nouveau_mm_allocation *mm; + uint64_t value; + } u; + struct nouveau_fence *fence; +}; + +#define NVC0_QUERY_ALLOC_SPACE 256 + +static void nvc0_mp_pm_query_begin(struct nvc0_context *, struct nvc0_query *); +static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *); +static boolean nvc0_mp_pm_query_result(struct nvc0_context *, + struct nvc0_query *, void *, boolean); + +static INLINE struct nvc0_query * +nvc0_query(struct pipe_query *pipe) +{ + return (struct nvc0_query *)pipe; +} + +static boolean +nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size) +{ + struct nvc0_screen *screen = nvc0->screen; + int ret; + + if (q->bo) { + nouveau_bo_ref(NULL, &q->bo); + if (q->u.mm) { + if (q->state == NVC0_QUERY_STATE_READY) + nouveau_mm_free(q->u.mm); + else + nouveau_fence_work(screen->base.fence.current, + nouveau_mm_free_work, q->u.mm); + } + } + if (size) { + q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base); + if (!q->bo) + return FALSE; + q->offset = q->base; + + ret = nouveau_bo_map(q->bo, 0, screen->base.client); + if (ret) { + nvc0_query_allocate(nvc0, q, 0); + return FALSE; + } + q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base); + } + return TRUE; +} + +static void +nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0); + nouveau_fence_ref(NULL, &nvc0_query(pq)->fence); + FREE(nvc0_query(pq)); +} + +static struct pipe_query * +nvc0_query_create(struct pipe_context *pipe, unsigned type) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_query *q; + unsigned space = NVC0_QUERY_ALLOC_SPACE; + + q = CALLOC_STRUCT(nvc0_query); + if (!q) + return NULL; + + switch (type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + q->rotate = 32; + space = NVC0_QUERY_ALLOC_SPACE; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + q->is64bit = TRUE; + space = 512; + break; + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + q->is64bit = TRUE; + space = 64; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + q->is64bit = TRUE; + space = 32; + break; + case PIPE_QUERY_TIME_ELAPSED: + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_GPU_FINISHED: + space = 32; + break; + case NVC0_QUERY_TFB_BUFFER_OFFSET: + space = 16; + break; + default: +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + if (type >= NVC0_QUERY_DRV_STAT(0) && type <= NVC0_QUERY_DRV_STAT_LAST) { + space = 0; + q->is64bit = true; + q->index = type - NVC0_QUERY_DRV_STAT(0); + break; + } else +#endif + if (nvc0->screen->base.device->drm_version >= 0x01000101) { + if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) { + /* for each MP: + * [00] = WS0.C0 + * [04] = WS0.C1 + * [08] = WS0.C2 + * [0c] = WS0.C3 + * [10] = WS1.C0 + * [14] = WS1.C1 + * [18] = WS1.C2 + * [1c] = WS1.C3 + * [20] = WS2.C0 + * [24] = WS2.C1 + * [28] = WS2.C2 + * [2c] = WS2.C3 + * [30] = WS3.C0 + * [34] = WS3.C1 + * [38] = WS3.C2 + * [3c] = WS3.C3 + * [40] = MP.C4 + * [44] = MP.C5 + * [48] = MP.C6 + * [4c] = MP.C7 + * [50] = WS0.sequence + * [54] = WS1.sequence + * [58] = WS2.sequence + * [5c] = WS3.sequence + */ + space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t); + break; + } else + if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) { + /* for each MP: + * [00] = MP.C0 + * [04] = MP.C1 + * [08] = MP.C2 + * [0c] = MP.C3 + * [10] = MP.C4 + * [14] = MP.C5 + * [18] = MP.C6 + * [1c] = MP.C7 + * [20] = MP.sequence + */ + space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t); + break; + } + } + debug_printf("invalid query type: %u\n", type); + FREE(q); + return NULL; + } + if (!nvc0_query_allocate(nvc0, q, space)) { + FREE(q); + return NULL; + } + + q->type = type; + + if (q->rotate) { + /* we advance before query_begin ! */ + q->offset -= q->rotate; + q->data -= q->rotate / sizeof(*q->data); + } else + if (!q->is64bit) + q->data[0] = 0; /* initialize sequence */ + + return (struct pipe_query *)q; +} + +static void +nvc0_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q, + unsigned offset, uint32_t get) +{ + offset += q->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4); + PUSH_DATAh(push, q->bo->offset + offset); + PUSH_DATA (push, q->bo->offset + offset); + PUSH_DATA (push, q->sequence); + PUSH_DATA (push, get); +} + +static void +nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q) +{ + q->offset += q->rotate; + q->data += q->rotate / sizeof(*q->data); + if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE) + nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE); +} + +static void +nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_query *q = nvc0_query(pq); + + /* For occlusion queries we have to change the storage, because a previous + * query might set the initial render conition to FALSE even *after* we re- + * initialized it to TRUE. + */ + if (q->rotate) { + nvc0_query_rotate(nvc0, q); + + /* XXX: can we do this with the GPU, and sync with respect to a previous + * query ? + */ + q->data[0] = q->sequence; /* initialize sequence */ + q->data[1] = 1; /* initial render condition = TRUE */ + q->data[4] = q->sequence + 1; /* for comparison COND_MODE */ + q->data[5] = 0; + } + q->sequence++; + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + q->nesting = nvc0->screen->num_occlusion_queries_active++; + if (q->nesting) { + nvc0_query_get(push, q, 0x10, 0x0100f002); + } else { + PUSH_SPACE(push, 3); + BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1); + PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT); + IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1); + } + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nvc0_query_get(push, q, 0x10, 0x09005002 | (q->index << 5)); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5)); + break; + case PIPE_QUERY_SO_STATISTICS: + nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5)); + nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5)); + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5)); + break; + case PIPE_QUERY_TIME_ELAPSED: + nvc0_query_get(push, q, 0x10, 0x00005002); + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */ + nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */ + nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */ + nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */ + nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */ + nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */ + nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */ + nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */ + nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */ + nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */ + break; + default: +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + if (q->type >= NVC0_QUERY_DRV_STAT(0) && + q->type <= NVC0_QUERY_DRV_STAT_LAST) { + if (q->index >= 5) + q->u.value = nvc0->screen->base.stats.v[q->index]; + else + q->u.value = 0; + } else +#endif + if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) || + (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) { + nvc0_mp_pm_query_begin(nvc0, q); + } + break; + } + q->state = NVC0_QUERY_STATE_ACTIVE; +} + +static void +nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_query *q = nvc0_query(pq); + + if (q->state != NVC0_QUERY_STATE_ACTIVE) { + /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */ + if (q->rotate) + nvc0_query_rotate(nvc0, q); + q->sequence++; + } + q->state = NVC0_QUERY_STATE_ENDED; + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + nvc0_query_get(push, q, 0, 0x0100f002); + if (--nvc0->screen->num_occlusion_queries_active == 0) { + PUSH_SPACE(push, 1); + IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0); + } + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nvc0_query_get(push, q, 0, 0x09005002 | (q->index << 5)); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5)); + break; + case PIPE_QUERY_SO_STATISTICS: + nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5)); + nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5)); + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + /* TODO: How do we sum over all streams for render condition ? */ + /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */ + nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5)); + nvc0_query_get(push, q, 0x20, 0x00005002); + break; + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIME_ELAPSED: + nvc0_query_get(push, q, 0, 0x00005002); + break; + case PIPE_QUERY_GPU_FINISHED: + nvc0_query_get(push, q, 0, 0x1000f010); + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ + nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ + nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ + nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ + nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ + nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ + nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ + nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ + nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */ + nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */ + break; + case NVC0_QUERY_TFB_BUFFER_OFFSET: + /* indexed by TFB buffer instead of by vertex stream */ + nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5)); + break; + default: +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + if (q->type >= NVC0_QUERY_DRV_STAT(0) && + q->type <= NVC0_QUERY_DRV_STAT_LAST) { + q->u.value = nvc0->screen->base.stats.v[q->index] - q->u.value; + return; + } else +#endif + if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) || + (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) { + nvc0_mp_pm_query_end(nvc0, q); + } + break; + } + if (q->is64bit) + nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence); +} + +static INLINE void +nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q) +{ + if (q->is64bit) { + if (nouveau_fence_signalled(q->fence)) + q->state = NVC0_QUERY_STATE_READY; + } else { + if (q->data[0] == q->sequence) + q->state = NVC0_QUERY_STATE_READY; + } +} + +static boolean +nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, union pipe_query_result *result) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_query *q = nvc0_query(pq); + uint64_t *res64 = (uint64_t*)result; + uint32_t *res32 = (uint32_t*)result; + boolean *res8 = (boolean*)result; + uint64_t *data64 = (uint64_t *)q->data; + unsigned i; + +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + if (q->type >= NVC0_QUERY_DRV_STAT(0) && + q->type <= NVC0_QUERY_DRV_STAT_LAST) { + res64[0] = q->u.value; + return TRUE; + } else +#endif + if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) || + (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) { + return nvc0_mp_pm_query_result(nvc0, q, result, wait); + } + + if (q->state != NVC0_QUERY_STATE_READY) + nvc0_query_update(nvc0->screen->base.client, q); + + if (q->state != NVC0_QUERY_STATE_READY) { + if (!wait) { + if (q->state != NVC0_QUERY_STATE_FLUSHED) { + q->state = NVC0_QUERY_STATE_FLUSHED; + /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */ + PUSH_KICK(nvc0->base.pushbuf); + } + return FALSE; + } + if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client)) + return FALSE; + NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1); + } + q->state = NVC0_QUERY_STATE_READY; + + switch (q->type) { + case PIPE_QUERY_GPU_FINISHED: + res8[0] = TRUE; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ + res64[0] = q->data[1] - q->data[5]; + break; + case PIPE_QUERY_OCCLUSION_PREDICATE: + res8[0] = q->data[1] != q->data[5]; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ + case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ + res64[0] = data64[0] - data64[2]; + break; + case PIPE_QUERY_SO_STATISTICS: + res64[0] = data64[0] - data64[4]; + res64[1] = data64[2] - data64[6]; + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + res8[0] = data64[0] != data64[2]; + break; + case PIPE_QUERY_TIMESTAMP: + res64[0] = data64[1]; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + res64[0] = 1000000000; + res8[8] = FALSE; + break; + case PIPE_QUERY_TIME_ELAPSED: + res64[0] = data64[1] - data64[3]; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + for (i = 0; i < 10; ++i) + res64[i] = data64[i * 2] - data64[24 + i * 2]; + break; + case NVC0_QUERY_TFB_BUFFER_OFFSET: + res32[0] = q->data[1]; + break; + default: + assert(0); /* can't happen, we don't create queries with invalid type */ + return FALSE; + } + + return TRUE; +} + +void +nvc0_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq) +{ + struct nvc0_query *q = nvc0_query(pq); + unsigned offset = q->offset; + + if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); + PUSH_DATAh(push, q->bo->offset + offset); + PUSH_DATA (push, q->bo->offset + offset); + PUSH_DATA (push, q->sequence); + PUSH_DATA (push, (1 << 12) | + NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); +} + +static void +nvc0_render_condition(struct pipe_context *pipe, + struct pipe_query *pq, + boolean condition, uint mode) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_query *q; + uint32_t cond; + boolean negated = FALSE; + boolean wait = + mode != PIPE_RENDER_COND_NO_WAIT && + mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; + + nvc0->cond_query = pq; + nvc0->cond_cond = condition; + nvc0->cond_mode = mode; + + if (!pq) { + PUSH_SPACE(push, 1); + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + return; + } + q = nvc0_query(pq); + + /* NOTE: comparison of 2 queries only works if both have completed */ + switch (q->type) { + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + cond = negated ? NVC0_3D_COND_MODE_EQUAL : + NVC0_3D_COND_MODE_NOT_EQUAL; + wait = TRUE; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + if (likely(!negated)) { + if (unlikely(q->nesting)) + cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL : + NVC0_3D_COND_MODE_ALWAYS; + else + cond = NVC0_3D_COND_MODE_RES_NON_ZERO; + } else { + cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS; + } + break; + default: + assert(!"render condition query not a predicate"); + mode = NVC0_3D_COND_MODE_ALWAYS; + break; + } + + if (wait) + nvc0_query_fifo_wait(push, pq); + + PUSH_SPACE(push, 4); + PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3); + PUSH_DATAh(push, q->bo->offset + q->offset); + PUSH_DATA (push, q->bo->offset + q->offset); + PUSH_DATA (push, cond); +} + +void +nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push, + struct pipe_query *pq, unsigned result_offset) +{ + struct nvc0_query *q = nvc0_query(pq); + +#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8)) + + nouveau_pushbuf_space(push, 0, 0, 1); + nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 | + NVC0_IB_ENTRY_1_NO_PREFETCH); +} + +void +nvc0_so_target_save_offset(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg, + unsigned index, boolean *serialize) +{ + struct nvc0_so_target *targ = nvc0_so_target(ptarg); + + if (*serialize) { + *serialize = FALSE; + PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1); + IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0); + + NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1); + } + + nvc0_query(targ->pq)->index = index; + + nvc0_query_end(pipe, targ->pq); +} + + +/* === DRIVER STATISTICS === */ + +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + +static const char *nvc0_drv_stat_names[] = +{ + "drv-tex_obj_current_count", + "drv-tex_obj_current_bytes", + "drv-buf_obj_current_count", + "drv-buf_obj_current_bytes_vid", + "drv-buf_obj_current_bytes_sys", + "drv-tex_transfers_rd", + "drv-tex_transfers_wr", + "drv-tex_copy_count", + "drv-tex_blit_count", + "drv-tex_cache_flush_count", + "drv-buf_transfers_rd", + "drv-buf_transfers_wr", + "drv-buf_read_bytes_staging_vid", + "drv-buf_write_bytes_direct", + "drv-buf_write_bytes_staging_vid", + "drv-buf_write_bytes_staging_sys", + "drv-buf_copy_bytes", + "drv-buf_non_kernel_fence_sync_count", + "drv-any_non_kernel_fence_sync_count", + "drv-query_sync_count", + "drv-gpu_serialize_count", + "drv-draw_calls_array", + "drv-draw_calls_indexed", + "drv-draw_calls_fallback_count", + "drv-user_buffer_upload_bytes", + "drv-constbuf_upload_count", + "drv-constbuf_upload_bytes", + "drv-pushbuf_count", + "drv-resource_validate_count" +}; + +#endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */ + + +/* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */ + +/* Code to read out MP counters: They are accessible via mmio, too, but let's + * just avoid mapping registers in userspace. We'd have to know which MPs are + * enabled/present, too, and that information is not presently exposed. + * We could add a kernel interface for it, but reading the counters like this + * has the advantage of being async (if get_result isn't called immediately). + */ +static const uint64_t nve4_read_mp_pm_counters_code[] = +{ + /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20 + * mov b32 $r8 $tidx + * mov b32 $r12 $physid + * mov b32 $r0 $pm0 + * mov b32 $r1 $pm1 + * mov b32 $r2 $pm2 + * mov b32 $r3 $pm3 + * mov b32 $r4 $pm4 + * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b + * mov b32 $r5 $pm5 + * mov b32 $r6 $pm6 + * mov b32 $r7 $pm7 + * set $p0 0x1 eq u32 $r8 0x0 + * mov b32 $r10 c0[0x0] + * ext u32 $r8 $r12 0x414 + * mov b32 $r11 c0[0x4] + * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04 + * ext u32 $r9 $r12 0x208 + * (not $p0) exit + * set $p1 0x1 eq u32 $r9 0x0 + * mul $r8 u32 $r8 u32 96 + * mul $r12 u32 $r9 u32 16 + * mul $r13 u32 $r9 u32 4 + * add b32 $r9 $r8 $r13 + * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c + * add b32 $r8 $r8 $r12 + * mov b32 $r12 $r10 + * add b32 $r10 $c $r10 $r8 + * mov b32 $r13 $r11 + * add b32 $r11 $r11 0x0 $c + * add b32 $r12 $c $r12 $r9 + * st b128 wt g[$r10d] $r0q + * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00 + * mov b32 $r0 c0[0x8] + * add b32 $r13 $r13 0x0 $c + * $p1 st b128 wt g[$r12d+0x40] $r4q + * st b32 wt g[$r12d+0x50] $r0 + * exit */ + 0x2202020202020207ULL, + 0x2c00000084021c04ULL, + 0x2c0000000c031c04ULL, + 0x2c00000010001c04ULL, + 0x2c00000014005c04ULL, + 0x2c00000018009c04ULL, + 0x2c0000001c00dc04ULL, + 0x2c00000020011c04ULL, + 0x22b0420042320207ULL, + 0x2c00000024015c04ULL, + 0x2c00000028019c04ULL, + 0x2c0000002c01dc04ULL, + 0x190e0000fc81dc03ULL, + 0x2800400000029de4ULL, + 0x7000c01050c21c03ULL, + 0x280040001002dde4ULL, + 0x204282020042e047ULL, + 0x7000c00820c25c03ULL, + 0x80000000000021e7ULL, + 0x190e0000fc93dc03ULL, + 0x1000000180821c02ULL, + 0x1000000040931c02ULL, + 0x1000000010935c02ULL, + 0x4800000034825c03ULL, + 0x22c042c042c04287ULL, + 0x4800000030821c03ULL, + 0x2800000028031de4ULL, + 0x4801000020a29c03ULL, + 0x280000002c035de4ULL, + 0x0800000000b2dc42ULL, + 0x4801000024c31c03ULL, + 0x9400000000a01fc5ULL, + 0x200002e04202c047ULL, + 0x2800400020001de4ULL, + 0x0800000000d35c42ULL, + 0x9400000100c107c5ULL, + 0x9400000140c01f85ULL, + 0x8000000000001de7ULL +}; + +/* NOTE: intentionally using the same names as NV */ +static const char *nve4_pm_query_names[] = +{ + /* MP counters */ + "prof_trigger_00", + "prof_trigger_01", + "prof_trigger_02", + "prof_trigger_03", + "prof_trigger_04", + "prof_trigger_05", + "prof_trigger_06", + "prof_trigger_07", + "warps_launched", + "threads_launched", + "sm_cta_launched", + "inst_issued1", + "inst_issued2", + "inst_executed", + "local_load", + "local_store", + "shared_load", + "shared_store", + "l1_local_load_hit", + "l1_local_load_miss", + "l1_local_store_hit", + "l1_local_store_miss", + "gld_request", + "gst_request", + "l1_global_load_hit", + "l1_global_load_miss", + "uncached_global_load_transaction", + "global_store_transaction", + "branch", + "divergent_branch", + "active_warps", + "active_cycles", + "inst_issued", + "atom_count", + "gred_count", + "shared_load_replay", + "shared_store_replay", + "local_load_transactions", + "local_store_transactions", + "l1_shared_load_transactions", + "l1_shared_store_transactions", + "global_ld_mem_divergence_replays", + "global_st_mem_divergence_replays", + /* metrics, i.e. functions of the MP counters */ + "metric-ipc", /* inst_executed, clock */ + "metric-ipac", /* inst_executed, active_cycles */ + "metric-ipec", /* inst_executed, (bool)inst_executed */ + "metric-achieved_occupancy", /* active_warps, active_cycles */ + "metric-sm_efficiency", /* active_cycles, clock */ + "metric-inst_replay_overhead" /* inst_issued, inst_executed */ +}; + +/* For simplicity, we will allocate as many group slots as we allocate counter + * slots. This means that a single counter which wants to source from 2 groups + * will have to be declared as using 2 counter slots. This shouldn't really be + * a problem because such queries don't make much sense ... (unless someone is + * really creative). + */ +struct nvc0_mp_counter_cfg +{ + uint32_t func : 16; /* mask or 4-bit logic op (depending on mode) */ + uint32_t mode : 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */ + uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */ + uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */ + uint32_t sig_sel : 8; /* signal group */ + uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */ +}; + +#define NVC0_COUNTER_OPn_SUM 0 +#define NVC0_COUNTER_OPn_OR 1 +#define NVC0_COUNTER_OPn_AND 2 +#define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */ +#define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */ +#define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */ +#define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */ + +struct nvc0_mp_pm_query_cfg +{ + struct nvc0_mp_counter_cfg ctr[4]; + uint8_t num_counters; + uint8_t op; + uint8_t norm[2]; /* normalization num,denom */ +}; + +#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } } +#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } } +#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \ + { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \ + { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \ + {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } } +#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \ + { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \ + { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \ + {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } } +#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \ + { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \ + { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \ + {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } } + +/* NOTES: + * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps + * inst_executed etc.: we only count a single warp scheduler + * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers; + * this is inaccurate ! + */ +static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] = +{ + _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1), + _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1), + _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1), + _Q1A(PROF_TRIGGER_3, 0x0001, B6, USER, 0x0000000c, 1, 1), + _Q1A(PROF_TRIGGER_4, 0x0001, B6, USER, 0x00000010, 1, 1), + _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1), + _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1), + _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1), + _Q1A(LAUNCHED_WARPS, 0x0001, B6, LAUNCH, 0x00000004, 1, 1), + _Q1A(LAUNCHED_THREADS, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1), + _Q1B(LAUNCHED_CTA, 0x0001, B6, WARP, 0x0000001c, 1, 1), + _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1), + _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1), + _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1), + _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1), + _Q1A(LD_SHARED, 0x0001, B6, LDST, 0x00000000, 1, 1), + _Q1A(ST_SHARED, 0x0001, B6, LDST, 0x00000004, 1, 1), + _Q1A(LD_LOCAL, 0x0001, B6, LDST, 0x00000008, 1, 1), + _Q1A(ST_LOCAL, 0x0001, B6, LDST, 0x0000000c, 1, 1), + _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1), + _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1), + _Q1B(L1_LOCAL_LOAD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1), + _Q1B(L1_LOCAL_LOAD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1), + _Q1B(L1_LOCAL_STORE_HIT, 0x0001, B6, L1, 0x00000008, 1, 1), + _Q1B(L1_LOCAL_STORE_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1), + _Q1B(L1_GLOBAL_LOAD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1), + _Q1B(L1_GLOBAL_LOAD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1), + _Q1B(GLD_TRANSACTIONS_UNCACHED, 0x0001, B6, MEM, 0x00000000, 1, 1), + _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1), + _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1), + _Q1A(BRANCH_DIVERGENT, 0x0001, B6, BRANCH, 0x00000010, 1, 1), + _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1), + _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1), + _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1), + _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1), + _Q1B(LD_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1), + _Q1B(ST_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1), + _Q1B(LD_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1), + _Q1B(ST_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1), + _Q1B(L1_LD_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1), + _Q1B(L1_ST_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1), + _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1), + _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1), + _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1), + _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1), + _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1), + _M2A(INST_REPLAY_OHEAD, 0x3, B6, ISSUE, 0x104, 0x3, B6, EXEC, 0x398, REL_SUM_MM, 100, 1), + _M2B(MP_OCCUPANCY, 0x3f, B6, WARP, 0x31483104, 0x01, B6, WARP, 0x0, AVG_DIV_MM, 200, 64), + _M2B(MP_EFFICIENCY, 0x01, B6, WARP, 0x0, 0xffff, LOGOP, WARP, 0x0, AVG_DIV_M0, 100, 1), +}; + +#undef _Q1A +#undef _Q1B +#undef _M2A +#undef _M2B + +/* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */ +static const uint64_t nvc0_read_mp_pm_counters_code[] = +{ + /* mov b32 $r8 $tidx + * mov b32 $r9 $physid + * mov b32 $r0 $pm0 + * mov b32 $r1 $pm1 + * mov b32 $r2 $pm2 + * mov b32 $r3 $pm3 + * mov b32 $r4 $pm4 + * mov b32 $r5 $pm5 + * mov b32 $r6 $pm6 + * mov b32 $r7 $pm7 + * set $p0 0x1 eq u32 $r8 0x0 + * mov b32 $r10 c0[0x0] + * mov b32 $r11 c0[0x4] + * ext u32 $r8 $r9 0x414 + * (not $p0) exit + * mul $r8 u32 $r8 u32 36 + * add b32 $r10 $c $r10 $r8 + * add b32 $r11 $r11 0x0 $c + * mov b32 $r8 c0[0x8] + * st b128 wt g[$r10d+0x00] $r0q + * st b128 wt g[$r10d+0x10] $r4q + * st b32 wt g[$r10d+0x20] $r8 + * exit */ + 0x2c00000084021c04ULL, + 0x2c0000000c025c04ULL, + 0x2c00000010001c04ULL, + 0x2c00000014005c04ULL, + 0x2c00000018009c04ULL, + 0x2c0000001c00dc04ULL, + 0x2c00000020011c04ULL, + 0x2c00000024015c04ULL, + 0x2c00000028019c04ULL, + 0x2c0000002c01dc04ULL, + 0x190e0000fc81dc03ULL, + 0x2800400000029de4ULL, + 0x280040001002dde4ULL, + 0x7000c01050921c03ULL, + 0x80000000000021e7ULL, + 0x1000000090821c02ULL, + 0x4801000020a29c03ULL, + 0x0800000000b2dc42ULL, + 0x2800400020021de4ULL, + 0x9400000000a01fc5ULL, + 0x9400000040a11fc5ULL, + 0x9400000080a21f85ULL, + 0x8000000000001de7ULL +}; + +static const char *nvc0_pm_query_names[] = +{ + /* MP counters */ + "inst_executed", + "branch", + "divergent_branch", + "active_warps", + "active_cycles", + "warps_launched", + "threads_launched", + "shared_load", + "shared_store", + "local_load", + "local_store", + "gred_count", + "atom_count", + "gld_request", + "gst_request", + "inst_issued1_0", + "inst_issued1_1", + "inst_issued2_0", + "inst_issued2_1", + "thread_inst_executed_0", + "thread_inst_executed_1", + "thread_inst_executed_2", + "thread_inst_executed_3", + "prof_trigger_00", + "prof_trigger_01", + "prof_trigger_02", + "prof_trigger_03", + "prof_trigger_04", + "prof_trigger_05", + "prof_trigger_06", + "prof_trigger_07", +}; + +#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } } + +static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] = +{ + _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00), + _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00), + _Q(BRANCH_DIVERGENT, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00), + _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65), + _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(LAUNCHED_WARPS, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(LAUNCHED_THREADS, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65), + _Q(LD_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(ST_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(LD_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(ST_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), + _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), + _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), + _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), + _Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_3, 0xaaaa, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_4, 0xaaaa, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00), +}; + +#undef _Q + +static const struct nvc0_mp_pm_query_cfg * +nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q) +{ + struct nvc0_screen *screen = nvc0->screen; + + if (screen->base.class_3d >= NVE4_3D_CLASS) + return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC]; + return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)]; +} + +void +nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q) +{ + struct nvc0_screen *screen = nvc0->screen; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const struct nvc0_mp_pm_query_cfg *cfg; + unsigned i, c; + unsigned num_ab[2] = { 0, 0 }; + + cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + + /* check if we have enough free counter slots */ + for (i = 0; i < cfg->num_counters; ++i) + num_ab[cfg->ctr[i].sig_dom]++; + + if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 || + screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) { + NOUVEAU_ERR("Not enough free MP counter slots !\n"); + return; + } + + assert(cfg->num_counters <= 4); + PUSH_SPACE(push, 4 * 8 + 6); + + if (!screen->pm.mp_counters_enabled) { + screen->pm.mp_counters_enabled = TRUE; + BEGIN_NVC0(push, SUBC_SW(0x06ac), 1); + PUSH_DATA (push, 0x1fcb); + } + + /* set sequence field to 0 (used to check if result is available) */ + for (i = 0; i < screen->mp_count; ++i) + q->data[i * 10 + 10] = 0; + + for (i = 0; i < cfg->num_counters; ++i) { + const unsigned d = cfg->ctr[i].sig_dom; + + if (!screen->pm.num_mp_pm_active[d]) { + uint32_t m = (1 << 22) | (1 << (7 + (8 * !d))); + if (screen->pm.num_mp_pm_active[!d]) + m |= 1 << (7 + (8 * d)); + BEGIN_NVC0(push, SUBC_SW(0x0600), 1); + PUSH_DATA (push, m); + } + screen->pm.num_mp_pm_active[d]++; + + for (c = d * 4; c < (d * 4 + 4); ++c) { + if (!screen->pm.mp_counter[c]) { + q->ctr[i] = c; + screen->pm.mp_counter[c] = (struct pipe_query *)q; + break; + } + } + assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */ + + /* configure and reset the counter(s) */ + if (screen->base.class_3d >= NVE4_3D_CLASS) { + if (d == 0) + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1); + else + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1); + PUSH_DATA (push, cfg->ctr[i].sig_sel); + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1); + PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3)); + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1); + PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1); + PUSH_DATA (push, 0); + } else { + unsigned s; + + for (s = 0; s < cfg->ctr[i].num_src; s++) { + BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(s)), 1); + PUSH_DATA (push, cfg->ctr[i].sig_sel); + BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(s)), 1); + PUSH_DATA (push, (cfg->ctr[i].src_sel >> (s * 8)) & 0xff); + BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(s)), 1); + PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); + BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(s)), 1); + PUSH_DATA (push, 0); + } + } + } +} + +static void +nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) +{ + struct nvc0_screen *screen = nvc0->screen; + struct pipe_context *pipe = &nvc0->base.pipe; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS; + uint32_t mask; + uint32_t input[3]; + const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 }; + const uint grid[3] = { screen->mp_count, 1, 1 }; + unsigned c; + const struct nvc0_mp_pm_query_cfg *cfg; + + cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + + if (unlikely(!screen->pm.prog)) { + struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program); + prog->type = PIPE_SHADER_COMPUTE; + prog->translated = TRUE; + prog->num_gprs = 14; + prog->parm_size = 12; + if (is_nve4) { + prog->code = (uint32_t *)nve4_read_mp_pm_counters_code; + prog->code_size = sizeof(nve4_read_mp_pm_counters_code); + } else { + prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code; + prog->code_size = sizeof(nvc0_read_mp_pm_counters_code); + } + screen->pm.prog = prog; + } + + /* disable all counting */ + PUSH_SPACE(push, 8); + for (c = 0; c < 8; ++c) + if (screen->pm.mp_counter[c]) { + if (is_nve4) { + IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0); + } else { + IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0); + } + } + /* release counters for this query */ + for (c = 0; c < 8; ++c) { + if (nvc0_query(screen->pm.mp_counter[c]) == q) { + screen->pm.num_mp_pm_active[c / 4]--; + screen->pm.mp_counter[c] = NULL; + } + } + + BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR, + q->bo); + + PUSH_SPACE(push, 1); + IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0); + + pipe->bind_compute_state(pipe, screen->pm.prog); + input[0] = (q->bo->offset + q->base); + input[1] = (q->bo->offset + q->base) >> 32; + input[2] = q->sequence; + pipe->launch_grid(pipe, block, grid, 0, input); + + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY); + + /* re-activate other counters */ + PUSH_SPACE(push, 16); + mask = 0; + for (c = 0; c < 8; ++c) { + unsigned i; + q = nvc0_query(screen->pm.mp_counter[c]); + if (!q) + continue; + cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + for (i = 0; i < cfg->num_counters; ++i) { + if (mask & (1 << q->ctr[i])) + break; + mask |= 1 << q->ctr[i]; + if (is_nve4) { + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1); + } else { + BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(q->ctr[i])), 1); + } + PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); + } + } +} + +static INLINE boolean +nvc0_mp_pm_query_read_data(uint32_t count[32][4], + struct nvc0_context *nvc0, boolean wait, + struct nvc0_query *q, + const struct nvc0_mp_pm_query_cfg *cfg, + unsigned mp_count) +{ + unsigned p, c; + + for (p = 0; p < mp_count; ++p) { + const unsigned b = (0x24 / 4) * p; + + for (c = 0; c < cfg->num_counters; ++c) { + if (q->data[b + 8] != q->sequence) { + if (!wait) + return FALSE; + if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client)) + return FALSE; + } + count[p][c] = q->data[b + q->ctr[c]]; + } + } + return TRUE; +} + +static INLINE boolean +nve4_mp_pm_query_read_data(uint32_t count[32][4], + struct nvc0_context *nvc0, boolean wait, + struct nvc0_query *q, + const struct nvc0_mp_pm_query_cfg *cfg, + unsigned mp_count) +{ + unsigned p, c, d; + + for (p = 0; p < mp_count; ++p) { + const unsigned b = (0x60 / 4) * p; + + for (c = 0; c < cfg->num_counters; ++c) { + count[p][c] = 0; + for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) { + if (q->data[b + 20 + d] != q->sequence) { + if (!wait) + return FALSE; + if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client)) + return FALSE; + } + if (q->ctr[c] & ~0x3) + count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)]; + else + count[p][c] += q->data[b + d * 4 + q->ctr[c]]; + } + } + } + return TRUE; +} + +/* Metric calculations: + * sum(x) ... sum of x over all MPs + * avg(x) ... average of x over all MPs + * + * IPC : sum(inst_executed) / clock + * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued) + * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles) + * MP_EFFICIENCY : avg(active_cycles / clock) + * + * NOTE: Interpretation of IPC requires knowledge of MP count. + */ +static boolean +nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q, + void *result, boolean wait) +{ + uint32_t count[32][4]; + uint64_t value = 0; + unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32); + unsigned p, c; + const struct nvc0_mp_pm_query_cfg *cfg; + boolean ret; + + cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) + ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count); + else + ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count); + if (!ret) + return FALSE; + + if (cfg->op == NVC0_COUNTER_OPn_SUM) { + for (c = 0; c < cfg->num_counters; ++c) + for (p = 0; p < mp_count; ++p) + value += count[p][c]; + value = (value * cfg->norm[0]) / cfg->norm[1]; + } else + if (cfg->op == NVC0_COUNTER_OPn_OR) { + uint32_t v = 0; + for (c = 0; c < cfg->num_counters; ++c) + for (p = 0; p < mp_count; ++p) + v |= count[p][c]; + value = (v * cfg->norm[0]) / cfg->norm[1]; + } else + if (cfg->op == NVC0_COUNTER_OPn_AND) { + uint32_t v = ~0; + for (c = 0; c < cfg->num_counters; ++c) + for (p = 0; p < mp_count; ++p) + v &= count[p][c]; + value = (v * cfg->norm[0]) / cfg->norm[1]; + } else + if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) { + uint64_t v[2] = { 0, 0 }; + for (p = 0; p < mp_count; ++p) { + v[0] += count[p][0]; + v[1] += count[p][1]; + } + if (v[0]) + value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]); + } else + if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) { + for (p = 0; p < mp_count; ++p) + value += count[p][0]; + if (count[0][1]) + value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]); + else + value = 0; + } else + if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) { + unsigned mp_used = 0; + for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0]) + if (count[p][1]) + value += (count[p][0] * cfg->norm[0]) / count[p][1]; + if (mp_used) + value /= mp_used * cfg->norm[1]; + } else + if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) { + unsigned mp_used = 0; + for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0]) + value += count[p][0]; + if (count[0][1] && mp_used) { + value *= cfg->norm[0]; + value /= count[0][1] * mp_used * cfg->norm[1]; + } else { + value = 0; + } + } + + *(uint64_t *)result = value; + return TRUE; +} + +int +nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen, + unsigned id, + struct pipe_driver_query_info *info) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + int count = 0; + + count += NVC0_QUERY_DRV_STAT_COUNT; + + if (screen->base.device->drm_version >= 0x01000101) { + if (screen->base.class_3d >= NVE4_3D_CLASS) { + count += NVE4_PM_QUERY_COUNT; + } else + if (screen->compute) { + count += NVC0_PM_QUERY_COUNT; /* NVC0_COMPUTE is not always enabled */ + } + } + + if (!info) + return count; + +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + if (id < NVC0_QUERY_DRV_STAT_COUNT) { + info->name = nvc0_drv_stat_names[id]; + info->query_type = NVC0_QUERY_DRV_STAT(id); + info->max_value = ~0ULL; + info->uses_byte_units = !!strstr(info->name, "bytes"); + return 1; + } else +#endif + if (id < count) { + if (screen->base.class_3d >= NVE4_3D_CLASS) { + info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT]; + info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); + info->max_value = (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? + ~0ULL : 100; + info->uses_byte_units = FALSE; + return 1; + } else + if (screen->compute) { + info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT]; + info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); + info->max_value = ~0ULL; + info->uses_byte_units = FALSE; + return 1; + } + } + /* user asked for info about non-existing query */ + info->name = "this_is_not_the_query_you_are_looking_for"; + info->query_type = 0xdeadd01d; + info->max_value = 0; + info->uses_byte_units = FALSE; + return 0; +} + +void +nvc0_init_query_functions(struct nvc0_context *nvc0) +{ + struct pipe_context *pipe = &nvc0->base.pipe; + + pipe->create_query = nvc0_query_create; + pipe->destroy_query = nvc0_query_destroy; + pipe->begin_query = nvc0_query_begin; + pipe->end_query = nvc0_query_end; + pipe->get_query_result = nvc0_query_result; + pipe->render_condition = nvc0_render_condition; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c new file mode 100644 index 00000000000..4e70903b538 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c @@ -0,0 +1,62 @@ + +#include "pipe/p_context.h" +#include "nvc0/nvc0_resource.h" +#include "nouveau_screen.h" + + +static struct pipe_resource * +nvc0_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + switch (templ->target) { + case PIPE_BUFFER: + return nouveau_buffer_create(screen, templ); + default: + return nvc0_miptree_create(screen, templ); + } +} + +static struct pipe_resource * +nvc0_resource_from_handle(struct pipe_screen * screen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + if (templ->target == PIPE_BUFFER) { + return NULL; + } else { + struct pipe_resource *res = nv50_miptree_from_handle(screen, + templ, whandle); + nv04_resource(res)->vtbl = &nvc0_miptree_vtbl; + return res; + } +} + +static struct pipe_surface * +nvc0_surface_create(struct pipe_context *pipe, + struct pipe_resource *pres, + const struct pipe_surface *templ) +{ + if (unlikely(pres->target == PIPE_BUFFER)) + return nv50_surface_from_buffer(pipe, pres, templ); + return nvc0_miptree_surface_new(pipe, pres, templ); +} + +void +nvc0_init_resource_functions(struct pipe_context *pcontext) +{ + pcontext->transfer_map = u_transfer_map_vtbl; + pcontext->transfer_flush_region = u_transfer_flush_region_vtbl; + pcontext->transfer_unmap = u_transfer_unmap_vtbl; + pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; + pcontext->create_surface = nvc0_surface_create; + pcontext->surface_destroy = nv50_surface_destroy; +} + +void +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen) +{ + pscreen->resource_create = nvc0_resource_create; + pscreen->resource_from_handle = nvc0_resource_from_handle; + pscreen->resource_get_handle = u_resource_get_handle_vtbl; + pscreen->resource_destroy = u_resource_destroy_vtbl; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h new file mode 100644 index 00000000000..0d5f026d6e1 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h @@ -0,0 +1,58 @@ + +#ifndef __NVC0_RESOURCE_H__ +#define __NVC0_RESOURCE_H__ + +#include "nv50/nv50_resource.h" + +#define NVC0_RESOURCE_FLAG_VIDEO (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 0) + + +#define NVC0_TILE_SHIFT_X(m) ((((m) >> 0) & 0xf) + 6) +#define NVC0_TILE_SHIFT_Y(m) ((((m) >> 4) & 0xf) + 3) +#define NVC0_TILE_SHIFT_Z(m) ((((m) >> 8) & 0xf) + 0) + +#define NVC0_TILE_SIZE_X(m) (64 << (((m) >> 0) & 0xf)) +#define NVC0_TILE_SIZE_Y(m) ( 8 << (((m) >> 4) & 0xf)) +#define NVC0_TILE_SIZE_Z(m) ( 1 << (((m) >> 8) & 0xf)) + +/* it's ok to mask only in the end because max value is 3 * 5 */ + +#define NVC0_TILE_SIZE_2D(m) ((64 * 8) << (((m) + ((m) >> 4)) & 0xf)) + +#define NVC0_TILE_SIZE(m) ((64 * 8) << (((m) + ((m) >> 4) + ((m) >> 8)) & 0xf)) + + +void +nvc0_init_resource_functions(struct pipe_context *pcontext); + +void +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen); + +/* Internal functions: + */ +struct pipe_resource * +nvc0_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmp); + +const struct u_resource_vtbl nvc0_miptree_vtbl; + +struct pipe_surface * +nvc0_miptree_surface_new(struct pipe_context *, + struct pipe_resource *, + const struct pipe_surface *templ); + +unsigned +nvc0_mt_zslice_offset(const struct nv50_miptree *, unsigned l, unsigned z); + +void * +nvc0_miptree_transfer_map(struct pipe_context *pctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer); +void +nvc0_miptree_transfer_unmap(struct pipe_context *pcontext, + struct pipe_transfer *ptx); + +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c new file mode 100644 index 00000000000..ff7890bbac4 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -0,0 +1,967 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_format_s3tc.h" +#include "pipe/p_screen.h" + +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" + +#include "nouveau_vp3_video.h" + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_screen.h" + +#include "nvc0/nvc0_graph_macros.h" + +#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS +# define NOUVEAU_GETPARAM_GRAPH_UNITS 13 +#endif + +static boolean +nvc0_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bindings) +{ + if (sample_count > 8) + return FALSE; + if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */ + return FALSE; + + if (!util_format_is_supported(format, bindings)) + return FALSE; + + if ((bindings & PIPE_BIND_SAMPLER_VIEW) && (target != PIPE_BUFFER)) + if (util_format_get_blocksizebits(format) == 3 * 32) + return FALSE; + + /* transfers & shared are always supported */ + bindings &= ~(PIPE_BIND_TRANSFER_READ | + PIPE_BIND_TRANSFER_WRITE | + PIPE_BIND_SHARED); + + return (nvc0_format_table[format].usage & bindings) == bindings; +} + +static int +nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; + + switch (param) { + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 16 * 5; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 15; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return (class_3d >= NVE4_3D_CLASS) ? 13 : 12; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 2048; + case PIPE_CAP_MIN_TEXEL_OFFSET: + return -8; + case PIPE_CAP_MAX_TEXEL_OFFSET: + return 7; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_CUBE_MAP_ARRAY: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return 1; + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return 65536; + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_TGSI_TEXCOORD: + return 1; + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 150; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + return 1; + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + return 1; + case PIPE_CAP_QUERY_TIMESTAMP: + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + return 1; + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return 4; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 128; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 0; + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_START_INSTANCE: + return 1; + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + return 0; /* state trackers will know better */ + case PIPE_CAP_USER_CONSTANT_BUFFERS: + case PIPE_CAP_USER_INDEX_BUFFERS: + case PIPE_CAP_USER_VERTEX_BUFFERS: + return 1; + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 256; + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + return 1; /* 256 for binding as RT, but that's not possible in GL */ + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return NOUVEAU_MIN_BUFFER_MAP_ALIGN; + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + return 0; + case PIPE_CAP_COMPUTE: + return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 1; + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50; + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static int +nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, + enum pipe_shader_cap param) +{ + const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; + + switch (shader) { + case PIPE_SHADER_VERTEX: + /* + case PIPE_SHADER_TESSELLATION_CONTROL: + case PIPE_SHADER_TESSELLATION_EVALUATION: + */ + case PIPE_SHADER_GEOMETRY: + case PIPE_SHADER_FRAGMENT: + break; + case PIPE_SHADER_COMPUTE: + if (class_3d < NVE4_3D_CLASS) + return 0; + break; + default: + return 0; + } + + switch (param) { + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 16; + case PIPE_SHADER_CAP_MAX_INPUTS: + if (shader == PIPE_SHADER_VERTEX) + return 32; + /* NOTE: These only count our slots for GENERIC varyings. + * The address space may be larger, but the actual hard limit seems to be + * less than what the address space layout permits, so don't add TEXCOORD, + * COLOR, etc. here. + */ + if (shader == PIPE_SHADER_FRAGMENT) + return 0x1f0 / 16; + /* Actually this counts CLIPVERTEX, which occupies the last generic slot, + * and excludes 0x60 per-patch inputs. + */ + return 0x200 / 16; + case PIPE_SHADER_CAP_MAX_CONSTS: + return 65536 / 16; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + if (shader == PIPE_SHADER_COMPUTE && class_3d >= NVE4_3D_CLASS) + return NVE4_MAX_PIPE_CONSTBUFS_COMPUTE; + return NVC0_MAX_PIPE_CONSTBUFS; + case PIPE_SHADER_CAP_MAX_ADDRS: + return 1; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + return shader != PIPE_SHADER_FRAGMENT; + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; + case PIPE_SHADER_CAP_MAX_TEMPS: + return NVC0_CAP_MAX_PROGRAM_TEMPS; + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 1; + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + return 0; + case PIPE_SHADER_CAP_SUBROUTINES: + return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 1; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + return 16; /* would be 32 in linked (OpenGL-style) mode */ + /* + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLER_VIEWS: + return 32; + */ + default: + NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); + return 0; + } +} + +static float +nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 10.0f; + case PIPE_CAPF_MAX_POINT_WIDTH: + return 63.0f; + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 63.375f; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 15.0f; + default: + NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); + return 0.0f; + } +} + +static int +nvc0_screen_get_compute_param(struct pipe_screen *pscreen, + enum pipe_compute_cap param, void *data) +{ + uint64_t *data64 = (uint64_t *)data; + const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass; + + switch (param) { + case PIPE_COMPUTE_CAP_GRID_DIMENSION: + data64[0] = 3; + return 8; + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: + data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fffffff : 65535; + data64[1] = 65535; + data64[2] = 65535; + return 24; + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: + data64[0] = 1024; + data64[1] = 1024; + data64[2] = 64; + return 24; + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: + data64[0] = 1024; + return 8; + case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */ + data64[0] = (uint64_t)1 << 40; + return 8; + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */ + data64[0] = 48 << 10; + return 8; + case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */ + data64[0] = 512 << 10; + return 8; + case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ + data64[0] = 4096; + return 8; + default: + return 0; + } +} + +static void +nvc0_screen_destroy(struct pipe_screen *pscreen) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + + if (screen->base.fence.current) { + nouveau_fence_wait(screen->base.fence.current); + nouveau_fence_ref(NULL, &screen->base.fence.current); + } + if (screen->base.pushbuf) + screen->base.pushbuf->user_priv = NULL; + + if (screen->blitter) + nvc0_blitter_destroy(screen); + if (screen->pm.prog) { + screen->pm.prog->code = NULL; /* hardcoded, don't FREE */ + nvc0_program_destroy(NULL, screen->pm.prog); + } + + nouveau_bo_ref(NULL, &screen->text); + nouveau_bo_ref(NULL, &screen->uniform_bo); + nouveau_bo_ref(NULL, &screen->tls); + nouveau_bo_ref(NULL, &screen->txc); + nouveau_bo_ref(NULL, &screen->fence.bo); + nouveau_bo_ref(NULL, &screen->poly_cache); + nouveau_bo_ref(NULL, &screen->parm); + + nouveau_heap_destroy(&screen->lib_code); + nouveau_heap_destroy(&screen->text_heap); + + FREE(screen->tic.entries); + + nouveau_mm_destroy(screen->mm_VRAM_fe0); + + nouveau_object_del(&screen->eng3d); + nouveau_object_del(&screen->eng2d); + nouveau_object_del(&screen->m2mf); + nouveau_object_del(&screen->compute); + + nouveau_screen_fini(&screen->base); + + FREE(screen); +} + +static int +nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, + unsigned size, const uint32_t *data) +{ + struct nouveau_pushbuf *push = screen->base.pushbuf; + + size /= 4; + + BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2); + PUSH_DATA (push, (m - 0x3800) / 8); + PUSH_DATA (push, pos); + BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1); + PUSH_DATA (push, pos); + PUSH_DATAp(push, data, size); + + return pos + size; +} + +static void +nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) +{ + BEGIN_NVC0(push, SUBC_3D(0x10cc), 1); + PUSH_DATA (push, 0xff); + BEGIN_NVC0(push, SUBC_3D(0x10e0), 2); + PUSH_DATA (push, 0xff); + PUSH_DATA (push, 0xff); + BEGIN_NVC0(push, SUBC_3D(0x10ec), 2); + PUSH_DATA (push, 0xff); + PUSH_DATA (push, 0xff); + BEGIN_NVC0(push, SUBC_3D(0x074c), 1); + PUSH_DATA (push, 0x3f); + + BEGIN_NVC0(push, SUBC_3D(0x16a8), 1); + PUSH_DATA (push, (3 << 16) | 3); + BEGIN_NVC0(push, SUBC_3D(0x1794), 1); + PUSH_DATA (push, (2 << 16) | 2); + BEGIN_NVC0(push, SUBC_3D(0x0de8), 1); + PUSH_DATA (push, 1); + + BEGIN_NVC0(push, SUBC_3D(0x12ac), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_3D(0x0218), 1); + PUSH_DATA (push, 0x10); + BEGIN_NVC0(push, SUBC_3D(0x10fc), 1); + PUSH_DATA (push, 0x10); + BEGIN_NVC0(push, SUBC_3D(0x1290), 1); + PUSH_DATA (push, 0x10); + BEGIN_NVC0(push, SUBC_3D(0x12d8), 2); + PUSH_DATA (push, 0x10); + PUSH_DATA (push, 0x10); + BEGIN_NVC0(push, SUBC_3D(0x1140), 1); + PUSH_DATA (push, 0x10); + BEGIN_NVC0(push, SUBC_3D(0x1610), 1); + PUSH_DATA (push, 0xe); + + BEGIN_NVC0(push, SUBC_3D(0x164c), 1); + PUSH_DATA (push, 1 << 12); + BEGIN_NVC0(push, SUBC_3D(0x030c), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_3D(0x0300), 1); + PUSH_DATA (push, 3); + + BEGIN_NVC0(push, SUBC_3D(0x02d0), 1); + PUSH_DATA (push, 0x3fffff); + BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, SUBC_3D(0x19c0), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, SUBC_3D(0x075c), 1); + PUSH_DATA (push, 3); + + if (obj_class >= NVE4_3D_CLASS) { + BEGIN_NVC0(push, SUBC_3D(0x07fc), 1); + PUSH_DATA (push, 1); + } + + /* TODO: find out what software methods 0x1528, 0x1280 and (on nve4) 0x02dc + * are supposed to do */ +} + +static void +nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + struct nouveau_pushbuf *push = screen->base.pushbuf; + + /* we need to do it after possible flush in MARK_RING */ + *sequence = ++screen->base.fence.sequence; + + BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4); + PUSH_DATAh(push, screen->fence.bo->offset); + PUSH_DATA (push, screen->fence.bo->offset); + PUSH_DATA (push, *sequence); + PUSH_DATA (push, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT | + (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT)); +} + +static u32 +nvc0_screen_fence_update(struct pipe_screen *pscreen) +{ + struct nvc0_screen *screen = nvc0_screen(pscreen); + return screen->fence.map[0]; +} + +static int +nvc0_screen_init_compute(struct nvc0_screen *screen) +{ + screen->base.base.get_compute_param = nvc0_screen_get_compute_param; + + switch (screen->base.device->chipset & 0xf0) { + case 0xc0: + case 0xd0: + /* Using COMPUTE has weird effects on 3D state, we need to + * investigate this further before enabling it by default. + */ + if (debug_get_bool_option("NVC0_COMPUTE", FALSE)) + return nvc0_screen_compute_setup(screen, screen->base.pushbuf); + return 0; + case 0xe0: + case 0xf0: + return nve4_screen_compute_setup(screen, screen->base.pushbuf); + default: + return -1; + } +} + +boolean +nvc0_screen_resize_tls_area(struct nvc0_screen *screen, + uint32_t lpos, uint32_t lneg, uint32_t cstack) +{ + struct nouveau_bo *bo = NULL; + int ret; + uint64_t size = (lpos + lneg) * 32 + cstack; + + if (size >= (1 << 20)) { + NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size); + return FALSE; + } + + size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */ + size = align(size, 0x8000); + size *= screen->mp_count; + + size = align(size, 1 << 17); + + ret = nouveau_bo_new(screen->base.device, NOUVEAU_BO_VRAM, 1 << 17, size, + NULL, &bo); + if (ret) { + NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size); + return FALSE; + } + nouveau_bo_ref(NULL, &screen->tls); + screen->tls = bo; + return TRUE; +} + +#define FAIL_SCREEN_INIT(str, err) \ + do { \ + NOUVEAU_ERR(str, err); \ + nvc0_screen_destroy(pscreen); \ + return NULL; \ + } while(0) + +struct pipe_screen * +nvc0_screen_create(struct nouveau_device *dev) +{ + struct nvc0_screen *screen; + struct pipe_screen *pscreen; + struct nouveau_object *chan; + struct nouveau_pushbuf *push; + uint64_t value; + uint32_t obj_class; + int ret; + unsigned i; + union nouveau_bo_config mm_config; + + switch (dev->chipset & ~0xf) { + case 0xc0: + case 0xd0: + case 0xe0: + case 0xf0: + break; + default: + return NULL; + } + + screen = CALLOC_STRUCT(nvc0_screen); + if (!screen) + return NULL; + pscreen = &screen->base.base; + + ret = nouveau_screen_init(&screen->base, dev); + if (ret) { + nvc0_screen_destroy(pscreen); + return NULL; + } + chan = screen->base.channel; + push = screen->base.pushbuf; + push->user_priv = screen; + push->rsvd_kick = 5; + + screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER | + PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER; + screen->base.sysmem_bindings |= + PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER; + + pscreen->destroy = nvc0_screen_destroy; + pscreen->context_create = nvc0_create; + pscreen->is_format_supported = nvc0_screen_is_format_supported; + pscreen->get_param = nvc0_screen_get_param; + pscreen->get_shader_param = nvc0_screen_get_shader_param; + pscreen->get_paramf = nvc0_screen_get_paramf; + pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info; + + nvc0_screen_init_resource_functions(pscreen); + + screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param; + screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, NULL, + &screen->fence.bo); + if (ret) + goto fail; + nouveau_bo_map(screen->fence.bo, 0, NULL); + screen->fence.map = screen->fence.bo->map; + screen->base.fence.emit = nvc0_screen_fence_emit; + screen->base.fence.update = nvc0_screen_fence_update; + + switch (dev->chipset & 0xf0) { + case 0xf0: + obj_class = NVF0_P2MF_CLASS; + break; + case 0xe0: + obj_class = NVE4_P2MF_CLASS; + break; + default: + obj_class = NVC0_M2MF_CLASS; + break; + } + ret = nouveau_object_new(chan, 0xbeef323f, obj_class, NULL, 0, + &screen->m2mf); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret); + + BEGIN_NVC0(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->m2mf->oclass); + if (screen->m2mf->oclass == NVE4_P2MF_CLASS) { + BEGIN_NVC0(push, SUBC_COPY(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, 0xa0b5); + } + + ret = nouveau_object_new(chan, 0xbeef902d, NVC0_2D_CLASS, NULL, 0, + &screen->eng2d); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret); + + BEGIN_NVC0(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->eng2d->oclass); + BEGIN_NVC0(push, NVC0_2D(SINGLE_GPC), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_2D(OPERATION), 1); + PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY); + BEGIN_NVC0(push, NVC0_2D(CLIP_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_2D(COLOR_KEY_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_2D(0x0884), 1); + PUSH_DATA (push, 0x3f); + BEGIN_NVC0(push, SUBC_2D(0x0888), 1); + PUSH_DATA (push, 1); + + BEGIN_NVC0(push, SUBC_2D(NVC0_GRAPH_NOTIFY_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->fence.bo->offset + 16); + PUSH_DATA (push, screen->fence.bo->offset + 16); + + switch (dev->chipset & 0xf0) { + case 0xf0: + obj_class = NVF0_3D_CLASS; + break; + case 0xe0: + obj_class = NVE4_3D_CLASS; + break; + case 0xd0: + case 0xc0: + default: + switch (dev->chipset) { + case 0xd9: + case 0xc8: + obj_class = NVC8_3D_CLASS; + break; + case 0xc1: + obj_class = NVC1_3D_CLASS; + break; + default: + obj_class = NVC0_3D_CLASS; + break; + } + break; + } + ret = nouveau_object_new(chan, 0xbeef003d, obj_class, NULL, 0, + &screen->eng3d); + if (ret) + FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret); + screen->base.class_3d = obj_class; + + BEGIN_NVC0(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->eng3d->oclass); + + BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1); + PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS); + + if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) { + /* kill shaders after about 1 second (at 100 MHz) */ + BEGIN_NVC0(push, NVC0_3D(WATCHDOG_TIMER), 1); + PUSH_DATA (push, 0x17); + } + + IMMED_NVC0(push, NVC0_3D(ZETA_COMP_ENABLE), dev->drm_version >= 0x01000101); + BEGIN_NVC0(push, NVC0_3D(RT_COMP_ENABLE(0)), 8); + for (i = 0; i < 8; ++i) + PUSH_DATA(push, dev->drm_version >= 0x01000101); + + BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1); + PUSH_DATA (push, 1); + + BEGIN_NVC0(push, NVC0_3D(CSAA_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 1); + PUSH_DATA (push, NVC0_3D_MULTISAMPLE_MODE_MS1); + BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_CTRL), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(LINE_WIDTH_SEPARATE), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(LINE_LAST_PIXEL), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(BLEND_SEPARATE_ALPHA), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1); + PUSH_DATA (push, 0); + if (screen->eng3d->oclass < NVE4_3D_CLASS) { + BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1); + PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); + } else { + BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1); + PUSH_DATA (push, 15); + } + BEGIN_NVC0(push, NVC0_3D(CALL_LIMIT_LOG), 1); + PUSH_DATA (push, 8); /* 128 */ + BEGIN_NVC0(push, NVC0_3D(ZCULL_STATCTRS_ENABLE), 1); + PUSH_DATA (push, 1); + if (screen->eng3d->oclass >= NVC1_3D_CLASS) { + BEGIN_NVC0(push, NVC0_3D(CACHE_SPLIT), 1); + PUSH_DATA (push, NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1); + } + + nvc0_magic_3d_init(push, screen->eng3d->oclass); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL, + &screen->text); + if (ret) + goto fail; + + /* XXX: getting a page fault at the end of the code buffer every few + * launches, don't use the last 256 bytes to work around them - prefetch ? + */ + nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, NULL, + &screen->uniform_bo); + if (ret) + goto fail; + + for (i = 0; i < 5; ++i) { + /* TIC and TSC entries for each unit (nve4+ only) */ + /* auxiliary constants (6 user clip planes, base instance id) */ + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 512); + PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9)); + PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9)); + BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); + PUSH_DATA (push, (15 << 4) | 1); + if (screen->eng3d->oclass >= NVE4_3D_CLASS) { + unsigned j; + BEGIN_1IC0(push, NVC0_3D(CB_POS), 9); + PUSH_DATA (push, 0); + for (j = 0; j < 8; ++j) + PUSH_DATA(push, j); + } else { + BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1); + PUSH_DATA (push, 0x54); + } + } + BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1); + PUSH_DATA (push, 0); + + /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */ + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 256); + PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); + PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 5); + PUSH_DATA (push, 0); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); + PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); + + if (dev->drm_version >= 0x01000101) { + ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); + if (ret) { + NOUVEAU_ERR("NOUVEAU_GETPARAM_GRAPH_UNITS failed.\n"); + goto fail; + } + } else { + if (dev->chipset >= 0xe0 && dev->chipset < 0xf0) + value = (8 << 8) | 4; + else + value = (16 << 8) | 4; + } + screen->mp_count = value >> 8; + screen->mp_count_compute = screen->mp_count; + + nvc0_screen_resize_tls_area(screen, 128 * 16, 0, 0x200); + + BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->text->offset); + PUSH_DATA (push, screen->text->offset); + BEGIN_NVC0(push, NVC0_3D(TEMP_ADDRESS_HIGH), 4); + PUSH_DATAh(push, screen->tls->offset); + PUSH_DATA (push, screen->tls->offset); + PUSH_DATA (push, screen->tls->size >> 32); + PUSH_DATA (push, screen->tls->size); + BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1); + PUSH_DATA (push, 0); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL, + &screen->poly_cache); + if (ret) + goto fail; + + BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->poly_cache->offset); + PUSH_DATA (push, screen->poly_cache->offset); + PUSH_DATA (push, 3); + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, NULL, + &screen->txc); + if (ret) + goto fail; + + BEGIN_NVC0(push, NVC0_3D(TIC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset); + PUSH_DATA (push, screen->txc->offset); + PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); + + BEGIN_NVC0(push, NVC0_3D(TSC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset + 65536); + PUSH_DATA (push, screen->txc->offset + 65536); + PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); + + BEGIN_NVC0(push, NVC0_3D(SCREEN_Y_CONTROL), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(WINDOW_OFFSET_X), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1); /* deactivate ZCULL */ + PUSH_DATA (push, 0x3f); + + BEGIN_NVC0(push, NVC0_3D(CLIP_RECTS_MODE), 1); + PUSH_DATA (push, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY); + BEGIN_NVC0(push, NVC0_3D(CLIP_RECT_HORIZ(0)), 8 * 2); + for (i = 0; i < 8 * 2; ++i) + PUSH_DATA(push, 0); + BEGIN_NVC0(push, NVC0_3D(CLIP_RECTS_EN), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(CLIPID_ENABLE), 1); + PUSH_DATA (push, 0); + + /* neither scissors, viewport nor stencil mask should affect clears */ + BEGIN_NVC0(push, NVC0_3D(CLEAR_FLAGS), 1); + PUSH_DATA (push, 0); + + BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(0)), 2); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 1.0f); + BEGIN_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 1); + PUSH_DATA (push, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1); + + /* We use scissors instead of exact view volume clipping, + * so they're always enabled. + */ + BEGIN_NVC0(push, NVC0_3D(SCISSOR_ENABLE(0)), 3); + PUSH_DATA (push, 1); + PUSH_DATA (push, 8192 << 16); + PUSH_DATA (push, 8192 << 16); + +#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); + + i = 0; + MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, nvc0_9097_per_instance_bf); + MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, nvc0_9097_blend_enables); + MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select); + MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, nvc0_9097_tep_select); + MK_MACRO(NVC0_3D_MACRO_GP_SELECT, nvc0_9097_gp_select); + MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front); + MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back); + + BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(RT_SEPARATE_FRAG_DATA), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); + PUSH_DATA (push, 0x40); + BEGIN_NVC0(push, NVC0_3D(LAYER), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1); + PUSH_DATA (push, 0x30); + BEGIN_NVC0(push, NVC0_3D(PATCH_VERTICES), 1); + PUSH_DATA (push, 3); + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); + PUSH_DATA (push, 0x20); + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(0)), 1); + PUSH_DATA (push, 0x00); + + BEGIN_NVC0(push, NVC0_3D(POINT_COORD_REPLACE), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(POINT_RASTER_RULES), 1); + PUSH_DATA (push, NVC0_3D_POINT_RASTER_RULES_OGL); + + IMMED_NVC0(push, NVC0_3D(EDGEFLAG), 1); + + if (nvc0_screen_init_compute(screen)) + goto fail; + + PUSH_KICK (push); + + screen->tic.entries = CALLOC(4096, sizeof(void *)); + screen->tsc.entries = screen->tic.entries + 2048; + + mm_config.nvc0.tile_mode = 0; + mm_config.nvc0.memtype = 0xfe0; + screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config); + + if (!nvc0_blitter_create(screen)) + goto fail; + + nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE); + + return pscreen; + +fail: + nvc0_screen_destroy(pscreen); + return NULL; +} + +int +nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry) +{ + int i = screen->tic.next; + + while (screen->tic.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); + + screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1); + + if (screen->tic.entries[i]) + nv50_tic_entry(screen->tic.entries[i])->id = -1; + + screen->tic.entries[i] = entry; + return i; +} + +int +nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry) +{ + int i = screen->tsc.next; + + while (screen->tsc.lock[i / 32] & (1 << (i % 32))) + i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); + + screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1); + + if (screen->tsc.entries[i]) + nv50_tsc_entry(screen->tsc.entries[i])->id = -1; + + screen->tsc.entries[i] = entry; + return i; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h new file mode 100644 index 00000000000..27a0c5f784d --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -0,0 +1,325 @@ +#ifndef __NVC0_SCREEN_H__ +#define __NVC0_SCREEN_H__ + +#include "nouveau_screen.h" +#include "nouveau_mm.h" +#include "nouveau_fence.h" +#include "nouveau_heap.h" + +#include "nv_object.xml.h" + +#include "nvc0/nvc0_winsys.h" +#include "nvc0/nvc0_stateobj.h" + +#define NVC0_TIC_MAX_ENTRIES 2048 +#define NVC0_TSC_MAX_ENTRIES 2048 + +/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */ +#define NVC0_MAX_PIPE_CONSTBUFS 14 +#define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE 7 + +#define NVC0_MAX_SURFACE_SLOTS 16 + +struct nvc0_context; + +struct nvc0_blitter; + +struct nvc0_screen { + struct nouveau_screen base; + + struct nvc0_context *cur_ctx; + + int num_occlusion_queries_active; + + struct nouveau_bo *text; + struct nouveau_bo *parm; /* for COMPUTE */ + struct nouveau_bo *uniform_bo; /* for 3D */ + struct nouveau_bo *tls; + struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ + struct nouveau_bo *poly_cache; + + uint16_t mp_count; + uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */ + + struct nouveau_heap *text_heap; + struct nouveau_heap *lib_code; /* allocated from text_heap */ + + struct nvc0_blitter *blitter; + + struct { + void **entries; + int next; + uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32]; + } tic; + + struct { + void **entries; + int next; + uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32]; + } tsc; + + struct { + struct nouveau_bo *bo; + uint32_t *map; + } fence; + + struct { + struct nvc0_program *prog; /* compute state object to read MP counters */ + struct pipe_query *mp_counter[8]; /* counter to query allocation */ + uint8_t num_mp_pm_active[2]; + boolean mp_counters_enabled; + } pm; + + struct nouveau_mman *mm_VRAM_fe0; + + struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */ + struct nouveau_object *eng2d; + struct nouveau_object *m2mf; + struct nouveau_object *compute; +}; + +static INLINE struct nvc0_screen * +nvc0_screen(struct pipe_screen *screen) +{ + return (struct nvc0_screen *)screen; +} + + +/* Performance counter queries: + */ +#define NVE4_PM_QUERY_COUNT 49 +#define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i)) +#define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1) +#define NVE4_PM_QUERY_PROF_TRIGGER_0 0 +#define NVE4_PM_QUERY_PROF_TRIGGER_1 1 +#define NVE4_PM_QUERY_PROF_TRIGGER_2 2 +#define NVE4_PM_QUERY_PROF_TRIGGER_3 3 +#define NVE4_PM_QUERY_PROF_TRIGGER_4 4 +#define NVE4_PM_QUERY_PROF_TRIGGER_5 5 +#define NVE4_PM_QUERY_PROF_TRIGGER_6 6 +#define NVE4_PM_QUERY_PROF_TRIGGER_7 7 +#define NVE4_PM_QUERY_LAUNCHED_WARPS 8 +#define NVE4_PM_QUERY_LAUNCHED_THREADS 9 +#define NVE4_PM_QUERY_LAUNCHED_CTA 10 +#define NVE4_PM_QUERY_INST_ISSUED1 11 +#define NVE4_PM_QUERY_INST_ISSUED2 12 +#define NVE4_PM_QUERY_INST_EXECUTED 13 +#define NVE4_PM_QUERY_LD_LOCAL 14 +#define NVE4_PM_QUERY_ST_LOCAL 15 +#define NVE4_PM_QUERY_LD_SHARED 16 +#define NVE4_PM_QUERY_ST_SHARED 17 +#define NVE4_PM_QUERY_L1_LOCAL_LOAD_HIT 18 +#define NVE4_PM_QUERY_L1_LOCAL_LOAD_MISS 19 +#define NVE4_PM_QUERY_L1_LOCAL_STORE_HIT 20 +#define NVE4_PM_QUERY_L1_LOCAL_STORE_MISS 21 +#define NVE4_PM_QUERY_GLD_REQUEST 22 +#define NVE4_PM_QUERY_GST_REQUEST 23 +#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_HIT 24 +#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_MISS 25 +#define NVE4_PM_QUERY_GLD_TRANSACTIONS_UNCACHED 26 +#define NVE4_PM_QUERY_GST_TRANSACTIONS 27 +#define NVE4_PM_QUERY_BRANCH 28 +#define NVE4_PM_QUERY_BRANCH_DIVERGENT 29 +#define NVE4_PM_QUERY_ACTIVE_WARPS 30 +#define NVE4_PM_QUERY_ACTIVE_CYCLES 31 +#define NVE4_PM_QUERY_INST_ISSUED 32 +#define NVE4_PM_QUERY_ATOM_COUNT 33 +#define NVE4_PM_QUERY_GRED_COUNT 34 +#define NVE4_PM_QUERY_LD_SHARED_REPLAY 35 +#define NVE4_PM_QUERY_ST_SHARED_REPLAY 36 +#define NVE4_PM_QUERY_LD_LOCAL_TRANSACTIONS 37 +#define NVE4_PM_QUERY_ST_LOCAL_TRANSACTIONS 38 +#define NVE4_PM_QUERY_L1_LD_SHARED_TRANSACTIONS 39 +#define NVE4_PM_QUERY_L1_ST_SHARED_TRANSACTIONS 40 +#define NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY 41 +#define NVE4_PM_QUERY_GST_MEM_DIV_REPLAY 42 +#define NVE4_PM_QUERY_METRIC_IPC 43 +#define NVE4_PM_QUERY_METRIC_IPAC 44 +#define NVE4_PM_QUERY_METRIC_IPEC 45 +#define NVE4_PM_QUERY_METRIC_MP_OCCUPANCY 46 +#define NVE4_PM_QUERY_METRIC_MP_EFFICIENCY 47 +#define NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD 48 + +/* +#define NVE4_PM_QUERY_GR_IDLE 50 +#define NVE4_PM_QUERY_BSP_IDLE 51 +#define NVE4_PM_QUERY_VP_IDLE 52 +#define NVE4_PM_QUERY_PPP_IDLE 53 +#define NVE4_PM_QUERY_CE0_IDLE 54 +#define NVE4_PM_QUERY_CE1_IDLE 55 +#define NVE4_PM_QUERY_CE2_IDLE 56 +*/ +/* L2 queries (PCOUNTER) */ +/* +#define NVE4_PM_QUERY_L2_SUBP_WRITE_L1_SECTOR_QUERIES 57 +... +*/ +/* TEX queries (PCOUNTER) */ +/* +#define NVE4_PM_QUERY_TEX0_CACHE_SECTOR_QUERIES 58 +... +*/ + +#define NVC0_PM_QUERY_COUNT 31 +#define NVC0_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i)) +#define NVC0_PM_QUERY_LAST NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1) +#define NVC0_PM_QUERY_INST_EXECUTED 0 +#define NVC0_PM_QUERY_BRANCH 1 +#define NVC0_PM_QUERY_BRANCH_DIVERGENT 2 +#define NVC0_PM_QUERY_ACTIVE_WARPS 3 +#define NVC0_PM_QUERY_ACTIVE_CYCLES 4 +#define NVC0_PM_QUERY_LAUNCHED_WARPS 5 +#define NVC0_PM_QUERY_LAUNCHED_THREADS 6 +#define NVC0_PM_QUERY_LD_SHARED 7 +#define NVC0_PM_QUERY_ST_SHARED 8 +#define NVC0_PM_QUERY_LD_LOCAL 9 +#define NVC0_PM_QUERY_ST_LOCAL 10 +#define NVC0_PM_QUERY_GRED_COUNT 11 +#define NVC0_PM_QUERY_ATOM_COUNT 12 +#define NVC0_PM_QUERY_GLD_REQUEST 13 +#define NVC0_PM_QUERY_GST_REQUEST 14 +#define NVC0_PM_QUERY_INST_ISSUED1_0 15 +#define NVC0_PM_QUERY_INST_ISSUED1_1 16 +#define NVC0_PM_QUERY_INST_ISSUED2_0 17 +#define NVC0_PM_QUERY_INST_ISSUED2_1 18 +#define NVC0_PM_QUERY_TH_INST_EXECUTED_0 19 +#define NVC0_PM_QUERY_TH_INST_EXECUTED_1 20 +#define NVC0_PM_QUERY_TH_INST_EXECUTED_2 21 +#define NVC0_PM_QUERY_TH_INST_EXECUTED_3 22 +#define NVC0_PM_QUERY_PROF_TRIGGER_0 23 +#define NVC0_PM_QUERY_PROF_TRIGGER_1 24 +#define NVC0_PM_QUERY_PROF_TRIGGER_2 25 +#define NVC0_PM_QUERY_PROF_TRIGGER_3 26 +#define NVC0_PM_QUERY_PROF_TRIGGER_4 27 +#define NVC0_PM_QUERY_PROF_TRIGGER_5 28 +#define NVC0_PM_QUERY_PROF_TRIGGER_6 29 +#define NVC0_PM_QUERY_PROF_TRIGGER_7 30 + +/* Driver statistics queries: + */ +#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS + +#define NVC0_QUERY_DRV_STAT(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i)) +#define NVC0_QUERY_DRV_STAT_COUNT 29 +#define NVC0_QUERY_DRV_STAT_LAST NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1) +#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT 0 +#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES 1 +#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT 2 +#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID 3 +#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS 4 +#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ 5 +#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE 6 +#define NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT 7 +#define NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT 8 +#define NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT 9 +#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ 10 +#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE 11 +#define NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID 12 +#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT 13 +#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID 14 +#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS 15 +#define NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES 16 +#define NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT 17 +#define NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT 18 +#define NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT 19 +#define NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT 20 +#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY 21 +#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED 22 +#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT 23 +#define NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES 24 +#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT 25 +#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES 26 +#define NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT 27 +#define NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT 28 + +#else + +#define NVC0_QUERY_DRV_STAT_COUNT 0 + +#endif + +int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned, + struct pipe_driver_query_info *); + +boolean nvc0_blitter_create(struct nvc0_screen *); +void nvc0_blitter_destroy(struct nvc0_screen *); + +void nvc0_screen_make_buffers_resident(struct nvc0_screen *); + +int nvc0_screen_tic_alloc(struct nvc0_screen *, void *); +int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *); + +int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *); +int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *); + +boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos, + uint32_t lneg, uint32_t cstack); + +static INLINE void +nvc0_resource_fence(struct nv04_resource *res, uint32_t flags) +{ + struct nvc0_screen *screen = nvc0_screen(res->base.screen); + + if (res->mm) { + nouveau_fence_ref(screen->base.fence.current, &res->fence); + if (flags & NOUVEAU_BO_WR) + nouveau_fence_ref(screen->base.fence.current, &res->fence_wr); + } +} + +static INLINE void +nvc0_resource_validate(struct nv04_resource *res, uint32_t flags) +{ + if (likely(res->bo)) { + if (flags & NOUVEAU_BO_WR) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING | + NOUVEAU_BUFFER_STATUS_DIRTY; + if (flags & NOUVEAU_BO_RD) + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nvc0_resource_fence(res, flags); + } +} + +struct nvc0_format { + uint32_t rt; + uint32_t tic; + uint32_t vtx; + uint32_t usage; +}; + +extern const struct nvc0_format nvc0_format_table[]; + +static INLINE void +nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic) +{ + if (tic->id >= 0) + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); +} + +static INLINE void +nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc) +{ + if (tsc->id >= 0) + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); +} + +static INLINE void +nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic) +{ + if (tic->id >= 0) { + screen->tic.entries[tic->id] = NULL; + screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32)); + } +} + +static INLINE void +nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc) +{ + if (tsc->id >= 0) { + screen->tsc.entries[tsc->id] = NULL; + screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32)); + } +} + +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c new file mode 100644 index 00000000000..b820ef21df8 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -0,0 +1,278 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" + +#include "nvc0/nvc0_context.h" + +static INLINE void +nvc0_program_update_context_state(struct nvc0_context *nvc0, + struct nvc0_program *prog, int stage) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + if (prog && prog->need_tls) { + const uint32_t flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR; + if (!nvc0->state.tls_required) + BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls); + nvc0->state.tls_required |= 1 << stage; + } else { + if (nvc0->state.tls_required == (1 << stage)) + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TLS); + nvc0->state.tls_required &= ~(1 << stage); + } + + if (prog && prog->immd_size) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + /* NOTE: may overlap code of a different shader */ + PUSH_DATA (push, align(prog->immd_size, 0x100)); + PUSH_DATAh(push, nvc0->screen->text->offset + prog->immd_base); + PUSH_DATA (push, nvc0->screen->text->offset + prog->immd_base); + BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1); + PUSH_DATA (push, (14 << 4) | 1); + + nvc0->state.c14_bound |= 1 << stage; + } else + if (nvc0->state.c14_bound & (1 << stage)) { + BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1); + PUSH_DATA (push, (14 << 4) | 0); + + nvc0->state.c14_bound &= ~(1 << stage); + } +} + +static INLINE boolean +nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ + if (prog->mem) + return TRUE; + + if (!prog->translated) { + prog->translated = nvc0_program_translate( + prog, nvc0->screen->base.device->chipset); + if (!prog->translated) + return FALSE; + } + + if (likely(prog->code_size)) + return nvc0_program_upload_code(nvc0, prog); + return TRUE; /* stream output info only */ +} + +void +nvc0_vertprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *vp = nvc0->vertprog; + + if (!nvc0_program_validate(nvc0, vp)) + return; + nvc0_program_update_context_state(nvc0, vp, 0); + + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 2); + PUSH_DATA (push, 0x11); + PUSH_DATA (push, vp->code_base); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1); + PUSH_DATA (push, vp->num_gprs); + + // BEGIN_NVC0(push, NVC0_3D_(0x163c), 1); + // PUSH_DATA (push, 0); +} + +void +nvc0_fragprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *fp = nvc0->fragprog; + + if (!nvc0_program_validate(nvc0, fp)) + return; + nvc0_program_update_context_state(nvc0, fp, 4); + + if (fp->fp.early_z != nvc0->state.early_z_forced) { + nvc0->state.early_z_forced = fp->fp.early_z; + IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z); + } + + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2); + PUSH_DATA (push, 0x51); + PUSH_DATA (push, fp->code_base); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1); + PUSH_DATA (push, fp->num_gprs); + + BEGIN_NVC0(push, SUBC_3D(0x0360), 2); + PUSH_DATA (push, 0x20164010); + PUSH_DATA (push, 0x20); + BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1); + PUSH_DATA (push, fp->flags[0]); +} + +void +nvc0_tctlprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *tp = nvc0->tctlprog; + + if (tp && nvc0_program_validate(nvc0, tp)) { + if (tp->tp.tess_mode != ~0) { + BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1); + PUSH_DATA (push, tp->tp.tess_mode); + } + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2); + PUSH_DATA (push, 0x21); + PUSH_DATA (push, tp->code_base); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1); + PUSH_DATA (push, tp->num_gprs); + + if (tp->tp.input_patch_size <= 32) + IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), tp->tp.input_patch_size); + } else { + BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); + PUSH_DATA (push, 0x20); + } + nvc0_program_update_context_state(nvc0, tp, 1); +} + +void +nvc0_tevlprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *tp = nvc0->tevlprog; + + if (tp && nvc0_program_validate(nvc0, tp)) { + if (tp->tp.tess_mode != ~0) { + BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1); + PUSH_DATA (push, tp->tp.tess_mode); + } + BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1); + PUSH_DATA (push, 0x31); + BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1); + PUSH_DATA (push, tp->code_base); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1); + PUSH_DATA (push, tp->num_gprs); + } else { + BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1); + PUSH_DATA (push, 0x30); + } + nvc0_program_update_context_state(nvc0, tp, 2); +} + +void +nvc0_gmtyprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *gp = nvc0->gmtyprog; + + if (gp) + nvc0_program_validate(nvc0, gp); + + /* we allow GPs with no code for specifying stream output state only */ + if (gp && gp->code_size) { + const boolean gp_selects_layer = gp->hdr[13] & (1 << 9); + + BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); + PUSH_DATA (push, 0x41); + BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1); + PUSH_DATA (push, gp->code_base); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1); + PUSH_DATA (push, gp->num_gprs); + BEGIN_NVC0(push, NVC0_3D(LAYER), 1); + PUSH_DATA (push, gp_selects_layer ? NVC0_3D_LAYER_USE_GP : 0); + } else { + IMMED_NVC0(push, NVC0_3D(LAYER), 0); + BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); + PUSH_DATA (push, 0x40); + } + nvc0_program_update_context_state(nvc0, gp, 3); +} + +void +nvc0_tfb_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_transform_feedback_state *tfb; + unsigned b; + + if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb; + else + if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb; + else + tfb = nvc0->vertprog->tfb; + + IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0); + + if (tfb && tfb != nvc0->state.tfb) { + for (b = 0; b < 4; ++b) { + if (tfb->varying_count[b]) { + unsigned n = (tfb->varying_count[b] + 3) / 4; + + BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3); + PUSH_DATA (push, 0); + PUSH_DATA (push, tfb->varying_count[b]); + PUSH_DATA (push, tfb->stride[b]); + BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n); + PUSH_DATAp(push, tfb->varying_index[b], n); + + if (nvc0->tfbbuf[b]) + nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b]; + } else { + IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0); + } + } + } + nvc0->state.tfb = tfb; + + if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS)) + return; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB); + + for (b = 0; b < nvc0->num_tfbbufs; ++b) { + struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]); + struct nv04_resource *buf = nv04_resource(targ->pipe.buffer); + + if (tfb) + targ->stride = tfb->stride[b]; + + if (!(nvc0->tfbbuf_dirty & (1 << b))) + continue; + + if (!targ->clean) + nvc0_query_fifo_wait(push, targ->pq); + BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5); + PUSH_DATA (push, 1); + PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset); + PUSH_DATA (push, buf->address + targ->pipe.buffer_offset); + PUSH_DATA (push, targ->pipe.buffer_size); + if (!targ->clean) { + nvc0_query_pushbuf_submit(push, targ->pq, 0x4); + } else { + PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */ + targ->clean = FALSE; + } + BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR); + } + for (; b < 4; ++b) + IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c new file mode 100644 index 00000000000..e56ef0160f2 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -0,0 +1,1247 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "util/u_helpers.h" +#include "util/u_inlines.h" +#include "util/u_transfer.h" + +#include "tgsi/tgsi_parse.h" + +#include "nvc0/nvc0_stateobj.h" +#include "nvc0/nvc0_context.h" + +#include "nvc0/nvc0_3d.xml.h" +#include "nv50/nv50_texture.xml.h" + +#include "nouveau_gldefs.h" + +static INLINE uint32_t +nvc0_colormask(unsigned mask) +{ + uint32_t ret = 0; + + if (mask & PIPE_MASK_R) + ret |= 0x0001; + if (mask & PIPE_MASK_G) + ret |= 0x0010; + if (mask & PIPE_MASK_B) + ret |= 0x0100; + if (mask & PIPE_MASK_A) + ret |= 0x1000; + + return ret; +} + +#define NVC0_BLEND_FACTOR_CASE(a, b) \ + case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b + +static INLINE uint32_t +nvc0_blend_fac(unsigned factor) +{ + switch (factor) { + NVC0_BLEND_FACTOR_CASE(ONE, ONE); + NVC0_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE); + NVC0_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA); + NVC0_BLEND_FACTOR_CASE(ZERO, ZERO); + NVC0_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA); + default: + return NV50_3D_BLEND_FACTOR_ZERO; + } +} + +static void * +nvc0_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj); + int i; + int r; /* reference */ + uint32_t ms; + uint8_t blend_en = 0; + boolean indep_masks = FALSE; + boolean indep_funcs = FALSE; + + so->pipe = *cso; + + /* check which states actually have differing values */ + if (cso->independent_blend_enable) { + for (r = 0; r < 8 && !cso->rt[r].blend_enable; ++r); + blend_en |= 1 << r; + for (i = r + 1; i < 8; ++i) { + if (!cso->rt[i].blend_enable) + continue; + blend_en |= 1 << i; + if (cso->rt[i].rgb_func != cso->rt[r].rgb_func || + cso->rt[i].rgb_src_factor != cso->rt[r].rgb_src_factor || + cso->rt[i].rgb_dst_factor != cso->rt[r].rgb_dst_factor || + cso->rt[i].alpha_func != cso->rt[r].alpha_func || + cso->rt[i].alpha_src_factor != cso->rt[r].alpha_src_factor || + cso->rt[i].alpha_dst_factor != cso->rt[r].alpha_dst_factor) { + indep_funcs = TRUE; + break; + } + } + for (; i < 8; ++i) + blend_en |= (cso->rt[i].blend_enable ? 1 : 0) << i; + + for (i = 1; i < 8; ++i) { + if (cso->rt[i].colormask != cso->rt[0].colormask) { + indep_masks = TRUE; + break; + } + } + } else { + r = 0; + if (cso->rt[0].blend_enable) + blend_en = 0xff; + } + + if (cso->logicop_enable) { + SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2); + SB_DATA (so, 1); + SB_DATA (so, nvgl_logicop_func(cso->logicop_func)); + + SB_IMMED_3D(so, MACRO_BLEND_ENABLES, 0); + } else { + SB_IMMED_3D(so, LOGIC_OP_ENABLE, 0); + + SB_IMMED_3D(so, BLEND_INDEPENDENT, indep_funcs); + SB_IMMED_3D(so, MACRO_BLEND_ENABLES, blend_en); + if (indep_funcs) { + for (i = 0; i < 8; ++i) { + if (cso->rt[i].blend_enable) { + SB_BEGIN_3D(so, IBLEND_EQUATION_RGB(i), 6); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_src_factor)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor)); + SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor)); + } + } + } else + if (blend_en) { + SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5); + SB_DATA (so, nvgl_blend_eqn(cso->rt[r].rgb_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[r].rgb_src_factor)); + SB_DATA (so, nvc0_blend_fac(cso->rt[r].rgb_dst_factor)); + SB_DATA (so, nvgl_blend_eqn(cso->rt[r].alpha_func)); + SB_DATA (so, nvc0_blend_fac(cso->rt[r].alpha_src_factor)); + SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1); + SB_DATA (so, nvc0_blend_fac(cso->rt[r].alpha_dst_factor)); + } + + SB_IMMED_3D(so, COLOR_MASK_COMMON, !indep_masks); + if (indep_masks) { + SB_BEGIN_3D(so, COLOR_MASK(0), 8); + for (i = 0; i < 8; ++i) + SB_DATA(so, nvc0_colormask(cso->rt[i].colormask)); + } else { + SB_BEGIN_3D(so, COLOR_MASK(0), 1); + SB_DATA (so, nvc0_colormask(cso->rt[0].colormask)); + } + } + + ms = 0; + if (cso->alpha_to_coverage) + ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE; + if (cso->alpha_to_one) + ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE; + + SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1); + SB_DATA (so, ms); + + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); + return so; +} + +static void +nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->blend = hwcso; + nvc0->dirty |= NVC0_NEW_BLEND; +} + +static void +nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +/* NOTE: ignoring line_last_pixel, using FALSE (set on screen init) */ +static void * +nvc0_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nvc0_rasterizer_stateobj *so; + uint32_t reg; + + so = CALLOC_STRUCT(nvc0_rasterizer_stateobj); + if (!so) + return NULL; + so->pipe = *cso; + + /* Scissor enables are handled in scissor state, we will not want to + * always emit 16 commands, one for each scissor rectangle, here. + */ + + SB_BEGIN_3D(so, SHADE_MODEL, 1); + SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT : + NVC0_3D_SHADE_MODEL_SMOOTH); + SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first); + SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside); + + SB_IMMED_3D(so, VERT_COLOR_CLAMP_EN, cso->clamp_vertex_color); + SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1); + SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000); + + SB_IMMED_3D(so, MULTISAMPLE_ENABLE, cso->multisample); + + SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth); + if (cso->line_smooth) + SB_BEGIN_3D(so, LINE_WIDTH_SMOOTH, 1); + else + SB_BEGIN_3D(so, LINE_WIDTH_ALIASED, 1); + SB_DATA (so, fui(cso->line_width)); + + SB_IMMED_3D(so, LINE_STIPPLE_ENABLE, cso->line_stipple_enable); + if (cso->line_stipple_enable) { + SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1); + SB_DATA (so, (cso->line_stipple_pattern << 8) | + cso->line_stipple_factor); + + } + + SB_IMMED_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex); + if (!cso->point_size_per_vertex) { + SB_BEGIN_3D(so, POINT_SIZE, 1); + SB_DATA (so, fui(cso->point_size)); + } + + reg = (cso->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT) ? + NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT : + NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT; + + SB_BEGIN_3D(so, POINT_COORD_REPLACE, 1); + SB_DATA (so, ((cso->sprite_coord_enable & 0xff) << 3) | reg); + SB_IMMED_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization); + SB_IMMED_3D(so, POINT_SMOOTH_ENABLE, cso->point_smooth); + + SB_BEGIN_3D(so, MACRO_POLYGON_MODE_FRONT, 1); + SB_DATA (so, nvgl_polygon_mode(cso->fill_front)); + SB_BEGIN_3D(so, MACRO_POLYGON_MODE_BACK, 1); + SB_DATA (so, nvgl_polygon_mode(cso->fill_back)); + SB_IMMED_3D(so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth); + + SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3); + SB_DATA (so, cso->cull_face != PIPE_FACE_NONE); + SB_DATA (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW : + NVC0_3D_FRONT_FACE_CW); + switch (cso->cull_face) { + case PIPE_FACE_FRONT_AND_BACK: + SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK); + break; + case PIPE_FACE_FRONT: + SB_DATA(so, NVC0_3D_CULL_FACE_FRONT); + break; + case PIPE_FACE_BACK: + default: + SB_DATA(so, NVC0_3D_CULL_FACE_BACK); + break; + } + + SB_IMMED_3D(so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable); + SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3); + SB_DATA (so, cso->offset_point); + SB_DATA (so, cso->offset_line); + SB_DATA (so, cso->offset_tri); + + if (cso->offset_point || cso->offset_line || cso->offset_tri) { + SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1); + SB_DATA (so, fui(cso->offset_scale)); + SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1); + SB_DATA (so, fui(cso->offset_units * 2.0f)); + SB_BEGIN_3D(so, POLYGON_OFFSET_CLAMP, 1); + SB_DATA (so, fui(cso->offset_clamp)); + } + + if (cso->depth_clip) + reg = NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1; + else + reg = + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2; + + SB_BEGIN_3D(so, VIEW_VOLUME_CLIP_CTRL, 1); + SB_DATA (so, reg); + + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; +} + +static void +nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->rast = hwcso; + nvc0->dirty |= NVC0_NEW_RASTERIZER; +} + +static void +nvc0_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nvc0_zsa_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nvc0_zsa_stateobj *so = CALLOC_STRUCT(nvc0_zsa_stateobj); + + so->pipe = *cso; + + SB_IMMED_3D(so, DEPTH_TEST_ENABLE, cso->depth.enabled); + if (cso->depth.enabled) { + SB_IMMED_3D(so, DEPTH_WRITE_ENABLE, cso->depth.writemask); + SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1); + SB_DATA (so, nvgl_comparison_op(cso->depth.func)); + } + + if (cso->stencil[0].enabled) { + SB_BEGIN_3D(so, STENCIL_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func)); + SB_BEGIN_3D(so, STENCIL_FRONT_FUNC_MASK, 2); + SB_DATA (so, cso->stencil[0].valuemask); + SB_DATA (so, cso->stencil[0].writemask); + } else { + SB_IMMED_3D(so, STENCIL_ENABLE, 0); + } + + if (cso->stencil[1].enabled) { + assert(cso->stencil[0].enabled); + SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5); + SB_DATA (so, 1); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func)); + SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2); + SB_DATA (so, cso->stencil[1].writemask); + SB_DATA (so, cso->stencil[1].valuemask); + } else + if (cso->stencil[0].enabled) { + SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0); + } + + SB_IMMED_3D(so, ALPHA_TEST_ENABLE, cso->alpha.enabled); + if (cso->alpha.enabled) { + SB_BEGIN_3D(so, ALPHA_TEST_REF, 2); + SB_DATA (so, fui(cso->alpha.ref_value)); + SB_DATA (so, nvgl_comparison_op(cso->alpha.func)); + } + + assert(so->size <= (sizeof(so->state) / sizeof(so->state[0]))); + return (void *)so; +} + +static void +nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->zsa = hwcso; + nvc0->dirty |= NVC0_NEW_ZSA; +} + +static void +nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +/* ====================== SAMPLERS AND TEXTURES ================================ + */ + +#define NV50_TSC_WRAP_CASE(n) \ + case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n + +static INLINE unsigned +nv50_tsc_wrap_mode(unsigned wrap) +{ + switch (wrap) { + NV50_TSC_WRAP_CASE(REPEAT); + NV50_TSC_WRAP_CASE(MIRROR_REPEAT); + NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(CLAMP); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER); + NV50_TSC_WRAP_CASE(MIRROR_CLAMP); + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + return NV50_TSC_WRAP_REPEAT; + } +} + +static void +nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + unsigned s, i; + + for (s = 0; s < 5; ++s) + for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i) + if (nvc0_context(pipe)->samplers[s][i] == hwcso) + nvc0_context(pipe)->samplers[s][i] = NULL; + + nvc0_screen_tsc_free(nvc0_context(pipe)->screen, nv50_tsc_entry(hwcso)); + + FREE(hwcso); +} + +static INLINE void +nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s, + unsigned nr, void **hwcso) +{ + unsigned i; + + for (i = 0; i < nr; ++i) { + struct nv50_tsc_entry *old = nvc0->samplers[s][i]; + + if (hwcso[i] == old) + continue; + nvc0->samplers_dirty[s] |= 1 << i; + + nvc0->samplers[s][i] = nv50_tsc_entry(hwcso[i]); + if (old) + nvc0_screen_tsc_unlock(nvc0->screen, old); + } + for (; i < nvc0->num_samplers[s]; ++i) { + if (nvc0->samplers[s][i]) { + nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]); + nvc0->samplers[s][i] = NULL; + } + } + + nvc0->num_samplers[s] = nr; + + nvc0->dirty |= NVC0_NEW_SAMPLERS; +} + +static void +nvc0_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s); +} + +static void +nvc0_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 4, nr, s); +} + +static void +nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s) +{ + nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s); +} + +static void +nvc0_stage_sampler_states_bind_range(struct nvc0_context *nvc0, + const unsigned s, + unsigned start, unsigned nr, void **cso) +{ + const unsigned end = start + nr; + int last_valid = -1; + unsigned i; + + if (cso) { + for (i = start; i < end; ++i) { + const unsigned p = i - start; + if (cso[p]) + last_valid = i; + if (cso[p] == nvc0->samplers[s][i]) + continue; + nvc0->samplers_dirty[s] |= 1 << i; + + if (nvc0->samplers[s][i]) + nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]); + nvc0->samplers[s][i] = cso[p]; + } + } else { + for (i = start; i < end; ++i) { + if (nvc0->samplers[s][i]) { + nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]); + nvc0->samplers[s][i] = NULL; + nvc0->samplers_dirty[s] |= 1 << i; + } + } + } + + if (nvc0->num_samplers[s] <= end) { + if (last_valid < 0) { + for (i = start; i && !nvc0->samplers[s][i - 1]; --i); + nvc0->num_samplers[s] = i; + } else { + nvc0->num_samplers[s] = last_valid + 1; + } + } +} + +static void +nvc0_cp_sampler_states_bind(struct pipe_context *pipe, + unsigned start, unsigned nr, void **cso) +{ + nvc0_stage_sampler_states_bind_range(nvc0_context(pipe), 5, start, nr, cso); + + nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SAMPLERS; +} + +/* NOTE: only called when not referenced anywhere, won't be bound */ +static void +nvc0_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + + nvc0_screen_tic_free(nvc0_context(pipe)->screen, nv50_tic_entry(view)); + + FREE(nv50_tic_entry(view)); +} + +static INLINE void +nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, + unsigned nr, + struct pipe_sampler_view **views) +{ + unsigned i; + + for (i = 0; i < nr; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); + + if (views[i] == nvc0->textures[s][i]) + continue; + nvc0->textures_dirty[s] |= 1 << i; + + if (old) { + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); + nvc0_screen_tic_unlock(nvc0->screen, old); + } + + pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]); + } + + for (i = nr; i < nvc0->num_textures[s]; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); + if (old) { + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); + nvc0_screen_tic_unlock(nvc0->screen, old); + pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); + } + } + + nvc0->num_textures[s] = nr; + + nvc0->dirty |= NVC0_NEW_TEXTURES; +} + +static void +nvc0_vp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views); +} + +static void +nvc0_fp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views(nvc0_context(pipe), 4, nr, views); +} + +static void +nvc0_gp_set_sampler_views(struct pipe_context *pipe, + unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views); +} + +static void +nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + struct nouveau_bufctx *bctx = (s == 5) ? nvc0->bufctx_cp : nvc0->bufctx_3d; + const unsigned end = start + nr; + const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_TEX(s, 0); + int last_valid = -1; + unsigned i; + + if (views) { + for (i = start; i < end; ++i) { + const unsigned p = i - start; + if (views[p]) + last_valid = i; + if (views[p] == nvc0->textures[s][i]) + continue; + nvc0->textures_dirty[s] |= 1 << i; + + if (nvc0->textures[s][i]) { + struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); + nouveau_bufctx_reset(bctx, bin + i); + nvc0_screen_tic_unlock(nvc0->screen, old); + } + pipe_sampler_view_reference(&nvc0->textures[s][i], views[p]); + } + } else { + for (i = start; i < end; ++i) { + struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); + if (!old) + continue; + nvc0->textures_dirty[s] |= 1 << i; + + nvc0_screen_tic_unlock(nvc0->screen, old); + pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); + nouveau_bufctx_reset(bctx, bin + i); + } + } + + if (nvc0->num_textures[s] <= end) { + if (last_valid < 0) { + for (i = start; i && !nvc0->textures[s][i - 1]; --i); + nvc0->num_textures[s] = i; + } else { + nvc0->num_textures[s] = last_valid + 1; + } + } +} + +static void +nvc0_cp_set_sampler_views(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + nvc0_stage_set_sampler_views_range(nvc0_context(pipe), 5, start, nr, views); + + nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_TEXTURES; +} + + +/* ============================= SHADERS ======================================= + */ + +static void * +nvc0_sp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso, unsigned type) +{ + struct nvc0_program *prog; + + prog = CALLOC_STRUCT(nvc0_program); + if (!prog) + return NULL; + + prog->type = type; + + if (cso->tokens) + prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + if (cso->stream_output.num_outputs) + prog->pipe.stream_output = cso->stream_output; + + return (void *)prog; +} + +static void +nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_program *prog = (struct nvc0_program *)hwcso; + + nvc0_program_destroy(nvc0_context(pipe), prog); + + FREE((void *)prog->pipe.tokens); + FREE(prog); +} + +static void * +nvc0_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX); +} + +static void +nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->vertprog = hwcso; + nvc0->dirty |= NVC0_NEW_VERTPROG; +} + +static void * +nvc0_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT); +} + +static void +nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->fragprog = hwcso; + nvc0->dirty |= NVC0_NEW_FRAGPROG; +} + +static void * +nvc0_gp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY); +} + +static void +nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->gmtyprog = hwcso; + nvc0->dirty |= NVC0_NEW_GMTYPROG; +} + +static void * +nvc0_cp_state_create(struct pipe_context *pipe, + const struct pipe_compute_state *cso) +{ + struct nvc0_program *prog; + + prog = CALLOC_STRUCT(nvc0_program); + if (!prog) + return NULL; + prog->type = PIPE_SHADER_COMPUTE; + + prog->cp.smem_size = cso->req_local_mem; + prog->cp.lmem_size = cso->req_private_mem; + prog->parm_size = cso->req_input_mem; + + prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog); + + return (void *)prog; +} + +static void +nvc0_cp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->compprog = hwcso; + nvc0->dirty_cp |= NVC0_NEW_CP_PROGRAM; +} + +static void +nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + struct pipe_constant_buffer *cb) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct pipe_resource *res = cb ? cb->buffer : NULL; + const unsigned s = nvc0_shader_stage(shader); + const unsigned i = index; + + if (unlikely(shader == PIPE_SHADER_COMPUTE)) { + assert(!cb || !cb->user_buffer); + if (nvc0->constbuf[s][i].u.buf) + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i)); + + nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF; + } else { + if (nvc0->constbuf[s][i].user) + nvc0->constbuf[s][i].u.buf = NULL; + else + if (nvc0->constbuf[s][i].u.buf) + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i)); + + nvc0->dirty |= NVC0_NEW_CONSTBUF; + } + nvc0->constbuf_dirty[s] |= 1 << i; + + pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res); + + nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE; + if (nvc0->constbuf[s][i].user) { + nvc0->constbuf[s][i].u.data = cb->user_buffer; + nvc0->constbuf[s][i].size = cb->buffer_size; + } else + if (cb) { + nvc0->constbuf[s][i].offset = cb->buffer_offset; + nvc0->constbuf[s][i].size = align(cb->buffer_size, 0x100); + } +} + +/* ============================================================================= + */ + +static void +nvc0_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->blend_colour = *bcol; + nvc0->dirty |= NVC0_NEW_BLEND_COLOUR; +} + +static void +nvc0_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *sr) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->stencil_ref = *sr; + nvc0->dirty |= NVC0_NEW_STENCIL_REF; +} + +static void +nvc0_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + memcpy(nvc0->clip.ucp, clip->ucp, sizeof(clip->ucp)); + + nvc0->dirty |= NVC0_NEW_CLIP; +} + +static void +nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->sample_mask = sample_mask; + nvc0->dirty |= NVC0_NEW_SAMPLE_MASK; +} + + +static void +nvc0_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + unsigned i; + + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + + for (i = 0; i < fb->nr_cbufs; ++i) + pipe_surface_reference(&nvc0->framebuffer.cbufs[i], fb->cbufs[i]); + for (; i < nvc0->framebuffer.nr_cbufs; ++i) + pipe_surface_reference(&nvc0->framebuffer.cbufs[i], NULL); + + nvc0->framebuffer.nr_cbufs = fb->nr_cbufs; + + nvc0->framebuffer.width = fb->width; + nvc0->framebuffer.height = fb->height; + + pipe_surface_reference(&nvc0->framebuffer.zsbuf, fb->zsbuf); + + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +static void +nvc0_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->stipple = *stipple; + nvc0->dirty |= NVC0_NEW_STIPPLE; +} + +static void +nvc0_set_scissor_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *scissor) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->scissor = *scissor; + nvc0->dirty |= NVC0_NEW_SCISSOR; +} + +static void +nvc0_set_viewport_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *vpt) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->viewport = *vpt; + nvc0->dirty |= NVC0_NEW_VIEWPORT; +} + +static void +nvc0_set_vertex_buffers(struct pipe_context *pipe, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + unsigned i; + + util_set_vertex_buffers_count(nvc0->vtxbuf, &nvc0->num_vtxbufs, vb, + start_slot, count); + + if (!vb) { + nvc0->vbo_user &= ~(((1ull << count) - 1) << start_slot); + nvc0->constant_vbos &= ~(((1ull << count) - 1) << start_slot); + return; + } + + for (i = 0; i < count; ++i) { + unsigned dst_index = start_slot + i; + + if (vb[i].user_buffer) { + nvc0->vbo_user |= 1 << dst_index; + if (!vb[i].stride) + nvc0->constant_vbos |= 1 << dst_index; + else + nvc0->constant_vbos &= ~(1 << dst_index); + } else { + nvc0->vbo_user &= ~(1 << dst_index); + nvc0->constant_vbos &= ~(1 << dst_index); + } + } + + nvc0->dirty |= NVC0_NEW_ARRAYS; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); +} + +static void +nvc0_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + if (nvc0->idxbuf.buffer) + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX); + + if (ib) { + pipe_resource_reference(&nvc0->idxbuf.buffer, ib->buffer); + nvc0->idxbuf.index_size = ib->index_size; + if (ib->buffer) { + nvc0->idxbuf.offset = ib->offset; + nvc0->dirty |= NVC0_NEW_IDXBUF; + } else { + nvc0->idxbuf.user_buffer = ib->user_buffer; + nvc0->dirty &= ~NVC0_NEW_IDXBUF; + } + } else { + nvc0->dirty &= ~NVC0_NEW_IDXBUF; + pipe_resource_reference(&nvc0->idxbuf.buffer, NULL); + } +} + +static void +nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->vertex = hwcso; + nvc0->dirty |= NVC0_NEW_VERTEX; +} + +static struct pipe_stream_output_target * +nvc0_so_target_create(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size) +{ + struct nvc0_so_target *targ = MALLOC_STRUCT(nvc0_so_target); + if (!targ) + return NULL; + + targ->pq = pipe->create_query(pipe, NVC0_QUERY_TFB_BUFFER_OFFSET); + if (!targ->pq) { + FREE(targ); + return NULL; + } + targ->clean = TRUE; + + targ->pipe.buffer_size = size; + targ->pipe.buffer_offset = offset; + targ->pipe.context = pipe; + targ->pipe.buffer = NULL; + pipe_resource_reference(&targ->pipe.buffer, res); + pipe_reference_init(&targ->pipe.reference, 1); + + return &targ->pipe; +} + +static void +nvc0_so_target_destroy(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg) +{ + struct nvc0_so_target *targ = nvc0_so_target(ptarg); + pipe->destroy_query(pipe, targ->pq); + pipe_resource_reference(&targ->pipe.buffer, NULL); + FREE(targ); +} + +static void +nvc0_set_transform_feedback_targets(struct pipe_context *pipe, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_mask) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + unsigned i; + boolean serialize = TRUE; + + assert(num_targets <= 4); + + for (i = 0; i < num_targets; ++i) { + if (nvc0->tfbbuf[i] == targets[i] && (append_mask & (1 << i))) + continue; + nvc0->tfbbuf_dirty |= 1 << i; + + if (nvc0->tfbbuf[i] && nvc0->tfbbuf[i] != targets[i]) + nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize); + + if (targets[i] && !(append_mask & (1 << i))) + nvc0_so_target(targets[i])->clean = TRUE; + + pipe_so_target_reference(&nvc0->tfbbuf[i], targets[i]); + } + for (; i < nvc0->num_tfbbufs; ++i) { + nvc0->tfbbuf_dirty |= 1 << i; + nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize); + pipe_so_target_reference(&nvc0->tfbbuf[i], NULL); + } + nvc0->num_tfbbufs = num_targets; + + if (nvc0->tfbbuf_dirty) + nvc0->dirty |= NVC0_NEW_TFB_TARGETS; +} + +static void +nvc0_bind_surfaces_range(struct nvc0_context *nvc0, const unsigned t, + unsigned start, unsigned nr, + struct pipe_surface **psurfaces) +{ + const unsigned end = start + nr; + const unsigned mask = ((1 << nr) - 1) << start; + unsigned i; + + if (psurfaces) { + for (i = start; i < end; ++i) { + const unsigned p = i - start; + if (psurfaces[p]) + nvc0->surfaces_valid[t] |= (1 << i); + else + nvc0->surfaces_valid[t] &= ~(1 << i); + pipe_surface_reference(&nvc0->surfaces[t][i], psurfaces[p]); + } + } else { + for (i = start; i < end; ++i) + pipe_surface_reference(&nvc0->surfaces[t][i], NULL); + nvc0->surfaces_valid[t] &= ~mask; + } + nvc0->surfaces_dirty[t] |= mask; + + if (t == 0) + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_SUF); + else + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF); +} + +static void +nvc0_set_compute_resources(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_surface **resources) +{ + nvc0_bind_surfaces_range(nvc0_context(pipe), 1, start, nr, resources); + + nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SURFACES; +} + +static void +nvc0_set_shader_resources(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_surface **resources) +{ + nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, resources); + + nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES; +} + +static INLINE void +nvc0_set_global_handle(uint32_t *phandle, struct pipe_resource *res) +{ + struct nv04_resource *buf = nv04_resource(res); + if (buf) { + uint64_t limit = (buf->address + buf->base.width0) - 1; + if (limit < (1ULL << 32)) { + *phandle = (uint32_t)buf->address; + } else { + NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: " + "resource not contained within 32-bit address space !\n"); + *phandle = 0; + } + } else { + *phandle = 0; + } +} + +static void +nvc0_set_global_bindings(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_resource **resources, + uint32_t **handles) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct pipe_resource **ptr; + unsigned i; + const unsigned end = start + nr; + + if (nvc0->global_residents.size <= (end * sizeof(struct pipe_resource *))) { + const unsigned old_size = nvc0->global_residents.size; + const unsigned req_size = end * sizeof(struct pipe_resource *); + util_dynarray_resize(&nvc0->global_residents, req_size); + memset((uint8_t *)nvc0->global_residents.data + old_size, 0, + req_size - old_size); + } + + if (resources) { + ptr = util_dynarray_element( + &nvc0->global_residents, struct pipe_resource *, start); + for (i = 0; i < nr; ++i) { + pipe_resource_reference(&ptr[i], resources[i]); + nvc0_set_global_handle(handles[i], resources[i]); + } + } else { + ptr = util_dynarray_element( + &nvc0->global_residents, struct pipe_resource *, start); + for (i = 0; i < nr; ++i) + pipe_resource_reference(&ptr[i], NULL); + } + + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL); + + nvc0->dirty_cp = NVC0_NEW_CP_GLOBALS; +} + +void +nvc0_init_state_functions(struct nvc0_context *nvc0) +{ + struct pipe_context *pipe = &nvc0->base.pipe; + + pipe->create_blend_state = nvc0_blend_state_create; + pipe->bind_blend_state = nvc0_blend_state_bind; + pipe->delete_blend_state = nvc0_blend_state_delete; + + pipe->create_rasterizer_state = nvc0_rasterizer_state_create; + pipe->bind_rasterizer_state = nvc0_rasterizer_state_bind; + pipe->delete_rasterizer_state = nvc0_rasterizer_state_delete; + + pipe->create_depth_stencil_alpha_state = nvc0_zsa_state_create; + pipe->bind_depth_stencil_alpha_state = nvc0_zsa_state_bind; + pipe->delete_depth_stencil_alpha_state = nvc0_zsa_state_delete; + + pipe->create_sampler_state = nv50_sampler_state_create; + pipe->delete_sampler_state = nvc0_sampler_state_delete; + pipe->bind_vertex_sampler_states = nvc0_vp_sampler_states_bind; + pipe->bind_fragment_sampler_states = nvc0_fp_sampler_states_bind; + pipe->bind_geometry_sampler_states = nvc0_gp_sampler_states_bind; + pipe->bind_compute_sampler_states = nvc0_cp_sampler_states_bind; + + pipe->create_sampler_view = nvc0_create_sampler_view; + pipe->sampler_view_destroy = nvc0_sampler_view_destroy; + pipe->set_vertex_sampler_views = nvc0_vp_set_sampler_views; + pipe->set_fragment_sampler_views = nvc0_fp_set_sampler_views; + pipe->set_geometry_sampler_views = nvc0_gp_set_sampler_views; + pipe->set_compute_sampler_views = nvc0_cp_set_sampler_views; + + pipe->create_vs_state = nvc0_vp_state_create; + pipe->create_fs_state = nvc0_fp_state_create; + pipe->create_gs_state = nvc0_gp_state_create; + pipe->bind_vs_state = nvc0_vp_state_bind; + pipe->bind_fs_state = nvc0_fp_state_bind; + pipe->bind_gs_state = nvc0_gp_state_bind; + pipe->delete_vs_state = nvc0_sp_state_delete; + pipe->delete_fs_state = nvc0_sp_state_delete; + pipe->delete_gs_state = nvc0_sp_state_delete; + + pipe->create_compute_state = nvc0_cp_state_create; + pipe->bind_compute_state = nvc0_cp_state_bind; + pipe->delete_compute_state = nvc0_sp_state_delete; + + pipe->set_blend_color = nvc0_set_blend_color; + pipe->set_stencil_ref = nvc0_set_stencil_ref; + pipe->set_clip_state = nvc0_set_clip_state; + pipe->set_sample_mask = nvc0_set_sample_mask; + pipe->set_constant_buffer = nvc0_set_constant_buffer; + pipe->set_framebuffer_state = nvc0_set_framebuffer_state; + pipe->set_polygon_stipple = nvc0_set_polygon_stipple; + pipe->set_scissor_states = nvc0_set_scissor_states; + pipe->set_viewport_states = nvc0_set_viewport_states; + + pipe->create_vertex_elements_state = nvc0_vertex_state_create; + pipe->delete_vertex_elements_state = nvc0_vertex_state_delete; + pipe->bind_vertex_elements_state = nvc0_vertex_state_bind; + + pipe->set_vertex_buffers = nvc0_set_vertex_buffers; + pipe->set_index_buffer = nvc0_set_index_buffer; + + pipe->create_stream_output_target = nvc0_so_target_create; + pipe->stream_output_target_destroy = nvc0_so_target_destroy; + pipe->set_stream_output_targets = nvc0_set_transform_feedback_targets; + + pipe->set_global_binding = nvc0_set_global_bindings; + pipe->set_compute_resources = nvc0_set_compute_resources; + pipe->set_shader_resources = nvc0_set_shader_resources; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c new file mode 100644 index 00000000000..0ba4bad154a --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -0,0 +1,577 @@ + +#include "util/u_math.h" + +#include "nvc0/nvc0_context.h" + +#if 0 +static void +nvc0_validate_zcull(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + struct nv50_surface *sf = nv50_surface(fb->zsbuf); + struct nv50_miptree *mt = nv50_miptree(sf->base.texture); + struct nouveau_bo *bo = mt->base.bo; + uint32_t size; + uint32_t offset = align(mt->total_size, 1 << 17); + unsigned width, height; + + assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2); + + size = mt->total_size * 2; + + height = align(fb->height, 32); + width = fb->width % 224; + if (width) + width = fb->width + (224 - width); + else + width = fb->width; + + BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(ZCULL_ADDRESS_HIGH), 2); + PUSH_DATAh(push, bo->offset + offset); + PUSH_DATA (push, bo->offset + offset); + offset += 1 << 17; + BEGIN_NVC0(push, NVC0_3D(ZCULL_LIMIT_HIGH), 2); + PUSH_DATAh(push, bo->offset + offset); + PUSH_DATA (push, bo->offset + offset); + BEGIN_NVC0(push, SUBC_3D(0x07e0), 2); + PUSH_DATA (push, size); + PUSH_DATA (push, size >> 16); + BEGIN_NVC0(push, SUBC_3D(0x15c8), 1); /* bits 0x3 */ + PUSH_DATA (push, 2); + BEGIN_NVC0(push, NVC0_3D(ZCULL_WIDTH), 4); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(ZCULL_WINDOW_OFFSET_X), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(ZCULL_INVALIDATE), 1); + PUSH_DATA (push, 0); +} +#endif + +static void +nvc0_validate_fb(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + unsigned i; + unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; + boolean serialize = FALSE; + + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + + BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1); + PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs); + BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2); + PUSH_DATA (push, fb->width << 16); + PUSH_DATA (push, fb->height << 16); + + for (i = 0; i < fb->nr_cbufs; ++i) { + struct nv50_surface *sf = nv50_surface(fb->cbufs[i]); + struct nv04_resource *res = nv04_resource(sf->base.texture); + struct nouveau_bo *bo = res->bo; + + BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9); + PUSH_DATAh(push, res->address + sf->offset); + PUSH_DATA (push, res->address + sf->offset); + if (likely(nouveau_bo_memtype(bo))) { + struct nv50_miptree *mt = nv50_miptree(sf->base.texture); + + assert(sf->base.texture->target != PIPE_BUFFER); + + PUSH_DATA(push, sf->width); + PUSH_DATA(push, sf->height); + PUSH_DATA(push, nvc0_format_table[sf->base.format].rt); + PUSH_DATA(push, (mt->layout_3d << 16) | + mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA(push, sf->base.u.tex.first_layer + sf->depth); + PUSH_DATA(push, mt->layer_stride >> 2); + PUSH_DATA(push, sf->base.u.tex.first_layer); + + ms_mode = mt->ms_mode; + } else { + if (res->base.target == PIPE_BUFFER) { + PUSH_DATA(push, 262144); + PUSH_DATA(push, 1); + } else { + PUSH_DATA(push, nv50_miptree(sf->base.texture)->level[0].pitch); + PUSH_DATA(push, sf->height); + } + PUSH_DATA(push, nvc0_format_table[sf->base.format].rt); + PUSH_DATA(push, 1 << 12); + PUSH_DATA(push, 1); + PUSH_DATA(push, 0); + PUSH_DATA(push, 0); + + nvc0_resource_fence(res, NOUVEAU_BO_WR); + + assert(!fb->zsbuf); + } + + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; + + /* only register for writing, otherwise we'd always serialize here */ + BCTX_REFN(nvc0->bufctx_3d, FB, res, WR); + } + + if (fb->zsbuf) { + struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture); + struct nv50_surface *sf = nv50_surface(fb->zsbuf); + int unk = mt->base.base.target == PIPE_TEXTURE_2D; + + BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5); + PUSH_DATAh(push, mt->base.address + sf->offset); + PUSH_DATA (push, mt->base.address + sf->offset); + PUSH_DATA (push, nvc0_format_table[fb->zsbuf->format].rt); + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, mt->layer_stride >> 2); + BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3); + PUSH_DATA (push, sf->width); + PUSH_DATA (push, sf->height); + PUSH_DATA (push, (unk << 16) | + (sf->base.u.tex.first_layer + sf->depth)); + BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1); + PUSH_DATA (push, sf->base.u.tex.first_layer); + + ms_mode = mt->ms_mode; + + if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; + + BCTX_REFN(nvc0->bufctx_3d, FB, &mt->base, WR); + } else { + BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 0); + } + + IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode); + + if (serialize) + IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); + + NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, serialize); +} + +static void +nvc0_validate_blend_colour(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + BEGIN_NVC0(push, NVC0_3D(BLEND_COLOR(0)), 4); + PUSH_DATAf(push, nvc0->blend_colour.color[0]); + PUSH_DATAf(push, nvc0->blend_colour.color[1]); + PUSH_DATAf(push, nvc0->blend_colour.color[2]); + PUSH_DATAf(push, nvc0->blend_colour.color[3]); +} + +static void +nvc0_validate_stencil_ref(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const ubyte *ref = &nvc0->stencil_ref.ref_value[0]; + + IMMED_NVC0(push, NVC0_3D(STENCIL_FRONT_FUNC_REF), ref[0]); + IMMED_NVC0(push, NVC0_3D(STENCIL_BACK_FUNC_REF), ref[1]); +} + +static void +nvc0_validate_stipple(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned i; + + BEGIN_NVC0(push, NVC0_3D(POLYGON_STIPPLE_PATTERN(0)), 32); + for (i = 0; i < 32; ++i) + PUSH_DATA(push, util_bswap32(nvc0->stipple.stipple[i])); +} + +static void +nvc0_validate_scissor(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_scissor_state *s = &nvc0->scissor; + + if (!(nvc0->dirty & NVC0_NEW_SCISSOR) && + nvc0->rast->pipe.scissor == nvc0->state.scissor) + return; + nvc0->state.scissor = nvc0->rast->pipe.scissor; + + BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2); + if (nvc0->rast->pipe.scissor) { + PUSH_DATA(push, (s->maxx << 16) | s->minx); + PUSH_DATA(push, (s->maxy << 16) | s->miny); + } else { + PUSH_DATA(push, (0xffff << 16) | 0); + PUSH_DATA(push, (0xffff << 16) | 0); + } +} + +static void +nvc0_validate_viewport(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_viewport_state *vp = &nvc0->viewport; + int x, y, w, h; + float zmin, zmax; + + BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSLATE_X(0)), 3); + PUSH_DATAf(push, vp->translate[0]); + PUSH_DATAf(push, vp->translate[1]); + PUSH_DATAf(push, vp->translate[2]); + BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SCALE_X(0)), 3); + PUSH_DATAf(push, vp->scale[0]); + PUSH_DATAf(push, vp->scale[1]); + PUSH_DATAf(push, vp->scale[2]); + + /* now set the viewport rectangle to viewport dimensions for clipping */ + + x = util_iround(MAX2(0.0f, vp->translate[0] - fabsf(vp->scale[0]))); + y = util_iround(MAX2(0.0f, vp->translate[1] - fabsf(vp->scale[1]))); + w = util_iround(vp->translate[0] + fabsf(vp->scale[0])) - x; + h = util_iround(vp->translate[1] + fabsf(vp->scale[1])) - y; + + zmin = vp->translate[2] - fabsf(vp->scale[2]); + zmax = vp->translate[2] + fabsf(vp->scale[2]); + + nvc0->vport_int[0] = (w << 16) | x; + nvc0->vport_int[1] = (h << 16) | y; + BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2); + PUSH_DATA (push, nvc0->vport_int[0]); + PUSH_DATA (push, nvc0->vport_int[1]); + BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(0)), 2); + PUSH_DATAf(push, zmin); + PUSH_DATAf(push, zmax); +} + +static INLINE void +nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nouveau_bo *bo = nvc0->screen->uniform_bo; + + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 512); + PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9)); + PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1); + PUSH_DATA (push, 256); + PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4); +} + +static INLINE void +nvc0_check_program_ucps(struct nvc0_context *nvc0, + struct nvc0_program *vp, uint8_t mask) +{ + const unsigned n = util_logbase2(mask) + 1; + + if (vp->vp.num_ucps >= n) + return; + nvc0_program_destroy(nvc0, vp); + + vp->vp.num_ucps = n; + if (likely(vp == nvc0->vertprog)) + nvc0_vertprog_validate(nvc0); + else + if (likely(vp == nvc0->gmtyprog)) + nvc0_vertprog_validate(nvc0); + else + nvc0_tevlprog_validate(nvc0); +} + +static void +nvc0_validate_clip(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *vp; + unsigned stage; + uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable; + + if (nvc0->gmtyprog) { + stage = 3; + vp = nvc0->gmtyprog; + } else + if (nvc0->tevlprog) { + stage = 2; + vp = nvc0->tevlprog; + } else { + stage = 0; + vp = nvc0->vertprog; + } + + if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES) + nvc0_check_program_ucps(nvc0, vp, clip_enable); + + if (nvc0->dirty & (NVC0_NEW_CLIP | (NVC0_NEW_VERTPROG << stage))) + if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES) + nvc0_upload_uclip_planes(nvc0, stage); + + clip_enable &= vp->vp.clip_enable; + + if (nvc0->state.clip_enable != clip_enable) { + nvc0->state.clip_enable = clip_enable; + IMMED_NVC0(push, NVC0_3D(CLIP_DISTANCE_ENABLE), clip_enable); + } + if (nvc0->state.clip_mode != vp->vp.clip_mode) { + nvc0->state.clip_mode = vp->vp.clip_mode; + BEGIN_NVC0(push, NVC0_3D(CLIP_DISTANCE_MODE), 1); + PUSH_DATA (push, vp->vp.clip_mode); + } +} + +static void +nvc0_validate_blend(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + PUSH_SPACE(push, nvc0->blend->size); + PUSH_DATAp(push, nvc0->blend->state, nvc0->blend->size); +} + +static void +nvc0_validate_zsa(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + PUSH_SPACE(push, nvc0->zsa->size); + PUSH_DATAp(push, nvc0->zsa->state, nvc0->zsa->size); +} + +static void +nvc0_validate_rasterizer(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + PUSH_SPACE(push, nvc0->rast->size); + PUSH_DATAp(push, nvc0->rast->state, nvc0->rast->size); +} + +static void +nvc0_constbufs_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned s; + + for (s = 0; s < 5; ++s) { + while (nvc0->constbuf_dirty[s]) { + int i = ffs(nvc0->constbuf_dirty[s]) - 1; + nvc0->constbuf_dirty[s] &= ~(1 << i); + + if (nvc0->constbuf[s][i].user) { + struct nouveau_bo *bo = nvc0->screen->uniform_bo; + const unsigned base = s << 16; + const unsigned size = nvc0->constbuf[s][0].size; + assert(i == 0); /* we really only want OpenGL uniforms here */ + assert(nvc0->constbuf[s][0].u.data); + + if (nvc0->state.uniform_buffer_bound[s] < size) { + nvc0->state.uniform_buffer_bound[s] = align(size, 0x100); + + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]); + PUSH_DATAh(push, bo->offset + base); + PUSH_DATA (push, bo->offset + base); + BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); + PUSH_DATA (push, (0 << 4) | 1); + } + nvc0_cb_push(&nvc0->base, bo, NOUVEAU_BO_VRAM, + base, nvc0->state.uniform_buffer_bound[s], + 0, (size + 3) / 4, + nvc0->constbuf[s][0].u.data); + } else { + struct nv04_resource *res = + nv04_resource(nvc0->constbuf[s][i].u.buf); + if (res) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, nvc0->constbuf[s][i].size); + PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); + PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); + BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); + PUSH_DATA (push, (i << 4) | 1); + + BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD); + } else { + BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); + PUSH_DATA (push, (i << 4) | 0); + } + if (i == 0) + nvc0->state.uniform_buffer_bound[s] = 0; + } + } + } +} + +static void +nvc0_validate_sample_mask(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + unsigned mask[4] = + { + nvc0->sample_mask & 0xffff, + nvc0->sample_mask & 0xffff, + nvc0->sample_mask & 0xffff, + nvc0->sample_mask & 0xffff + }; + + BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4); + PUSH_DATA (push, mask[0]); + PUSH_DATA (push, mask[1]); + PUSH_DATA (push, mask[2]); + PUSH_DATA (push, mask[3]); + BEGIN_NVC0(push, NVC0_3D(SAMPLE_SHADING), 1); + PUSH_DATA (push, 0x01); +} + +void +nvc0_validate_global_residents(struct nvc0_context *nvc0, + struct nouveau_bufctx *bctx, int bin) +{ + unsigned i; + + for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *); + ++i) { + struct pipe_resource *res = *util_dynarray_element( + &nvc0->global_residents, struct pipe_resource *, i); + if (res) + nvc0_add_resident(bctx, bin, nv04_resource(res), NOUVEAU_BO_RDWR); + } +} + +static void +nvc0_validate_derived_1(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + boolean rasterizer_discard; + + if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) { + rasterizer_discard = TRUE; + } else { + boolean zs = nvc0->zsa && + (nvc0->zsa->pipe.depth.enabled || nvc0->zsa->pipe.stencil[0].enabled); + rasterizer_discard = !zs && + (!nvc0->fragprog || !nvc0->fragprog->hdr[18]); + } + + if (rasterizer_discard != nvc0->state.rasterizer_discard) { + nvc0->state.rasterizer_discard = rasterizer_discard; + IMMED_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), !rasterizer_discard); + } +} + +static void +nvc0_switch_pipe_context(struct nvc0_context *ctx_to) +{ + struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx; + unsigned s; + + if (ctx_from) + ctx_to->state = ctx_from->state; + + ctx_to->dirty = ~0; + + for (s = 0; s < 5; ++s) { + ctx_to->samplers_dirty[s] = ~0; + ctx_to->textures_dirty[s] = ~0; + } + + if (!ctx_to->vertex) + ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS); + if (!ctx_to->idxbuf.buffer) + ctx_to->dirty &= ~NVC0_NEW_IDXBUF; + + if (!ctx_to->vertprog) + ctx_to->dirty &= ~NVC0_NEW_VERTPROG; + if (!ctx_to->fragprog) + ctx_to->dirty &= ~NVC0_NEW_FRAGPROG; + + if (!ctx_to->blend) + ctx_to->dirty &= ~NVC0_NEW_BLEND; + if (!ctx_to->rast) + ctx_to->dirty &= ~(NVC0_NEW_RASTERIZER | NVC0_NEW_SCISSOR); + if (!ctx_to->zsa) + ctx_to->dirty &= ~NVC0_NEW_ZSA; + + ctx_to->screen->cur_ctx = ctx_to; +} + +static struct state_validate { + void (*func)(struct nvc0_context *); + uint32_t states; +} validate_list[] = { + { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER }, + { nvc0_validate_blend, NVC0_NEW_BLEND }, + { nvc0_validate_zsa, NVC0_NEW_ZSA }, + { nvc0_validate_sample_mask, NVC0_NEW_SAMPLE_MASK }, + { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER }, + { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR }, + { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF }, + { nvc0_validate_stipple, NVC0_NEW_STIPPLE }, + { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER }, + { nvc0_validate_viewport, NVC0_NEW_VIEWPORT }, + { nvc0_vertprog_validate, NVC0_NEW_VERTPROG }, + { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG }, + { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, + { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, + { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, + { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA | + NVC0_NEW_RASTERIZER }, + { nvc0_validate_clip, NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER | + NVC0_NEW_VERTPROG | + NVC0_NEW_TEVLPROG | + NVC0_NEW_GMTYPROG }, + { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, + { nvc0_validate_textures, NVC0_NEW_TEXTURES }, + { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, + { nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS }, + { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, + { nvc0_validate_surfaces, NVC0_NEW_SURFACES }, + { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF }, + { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG } +}; +#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) + +boolean +nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask, unsigned words) +{ + uint32_t state_mask; + int ret; + unsigned i; + + if (nvc0->screen->cur_ctx != nvc0) + nvc0_switch_pipe_context(nvc0); + + state_mask = nvc0->dirty & mask; + + if (state_mask) { + for (i = 0; i < validate_list_len; ++i) { + struct state_validate *validate = &validate_list[i]; + + if (state_mask & validate->states) + validate->func(nvc0); + } + nvc0->dirty &= ~state_mask; + + nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, FALSE); + } + + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_3d); + ret = nouveau_pushbuf_validate(nvc0->base.pushbuf); + + if (unlikely(nvc0->state.flushed)) { + nvc0->state.flushed = FALSE; + nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, TRUE); + } + return !ret; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h new file mode 100644 index 00000000000..80c33424032 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h @@ -0,0 +1,77 @@ + +#ifndef __NVC0_STATEOBJ_H__ +#define __NVC0_STATEOBJ_H__ + +#include "pipe/p_state.h" + +#define SB_BEGIN_3D(so, m, s) \ + (so)->state[(so)->size++] = NVC0_FIFO_PKHDR_SQ(NVC0_3D(m), s) + +#define SB_IMMED_3D(so, m, d) \ + (so)->state[(so)->size++] = NVC0_FIFO_PKHDR_IL(NVC0_3D(m), d) + +#define SB_DATA(so, u) (so)->state[(so)->size++] = (u) + +#include "nv50/nv50_stateobj_tex.h" + +struct nvc0_blend_stateobj { + struct pipe_blend_state pipe; + int size; + uint32_t state[72]; +}; + +struct nvc0_rasterizer_stateobj { + struct pipe_rasterizer_state pipe; + int size; + uint32_t state[43]; +}; + +struct nvc0_zsa_stateobj { + struct pipe_depth_stencil_alpha_state pipe; + int size; + uint32_t state[26]; +}; + +struct nvc0_constbuf { + union { + struct pipe_resource *buf; + const void *data; + } u; + uint32_t size; + uint32_t offset; + boolean user; /* should only be TRUE if u.data is valid and non-NULL */ +}; + +struct nvc0_vertex_element { + struct pipe_vertex_element pipe; + uint32_t state; + uint32_t state_alt; /* buffer 0 and with source offset (for translate) */ +}; + +struct nvc0_vertex_stateobj { + uint32_t min_instance_div[PIPE_MAX_ATTRIBS]; + uint16_t vb_access_size[PIPE_MAX_ATTRIBS]; + struct translate *translate; + unsigned num_elements; + uint32_t instance_elts; + uint32_t instance_bufs; + boolean shared_slots; + boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */ + unsigned size; /* size of vertex in bytes (when packed) */ + struct nvc0_vertex_element element[0]; +}; + +struct nvc0_so_target { + struct pipe_stream_output_target pipe; + struct pipe_query *pq; + unsigned stride; + boolean clean; +}; + +static INLINE struct nvc0_so_target * +nvc0_so_target(struct pipe_stream_output_target *ptarg) +{ + return (struct nvc0_so_target *)ptarg; +} + +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c new file mode 100644 index 00000000000..5070df80671 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -0,0 +1,1265 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <stdint.h> + +#include "pipe/p_defines.h" + +#include "util/u_inlines.h" +#include "util/u_pack_color.h" +#include "util/u_format.h" +#include "util/u_surface.h" + +#include "os/os_thread.h" + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_resource.h" + +#include "nv50/nv50_defs.xml.h" +#include "nv50/nv50_texture.xml.h" + +/* these are used in nv50_blit.h */ +#define NV50_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL +#define NV50_ENG2D_NOCONVERT_FORMATS 0x009cc02000000000ULL +#define NV50_ENG2D_LUMINANCE_FORMATS 0x001cc02000000000ULL +#define NV50_ENG2D_INTENSITY_FORMATS 0x0080000000000000ULL +#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000638000ULL + +#define NOUVEAU_DRIVER 0xc0 +#include "nv50/nv50_blit.h" + +static INLINE uint8_t +nvc0_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal) +{ + uint8_t id = nvc0_format_table[format].rt; + + /* A8_UNORM is treated as I8_UNORM as far as the 2D engine is concerned. */ + if (!dst && unlikely(format == PIPE_FORMAT_I8_UNORM) && !dst_src_equal) + return NV50_SURFACE_FORMAT_A8_UNORM; + + /* Hardware values for color formats range from 0xc0 to 0xff, + * but the 2D engine doesn't support all of them. + */ + if (nv50_2d_format_supported(format)) + return id; + assert(dst_src_equal); + + switch (util_format_get_blocksize(format)) { + case 1: + return NV50_SURFACE_FORMAT_R8_UNORM; + case 2: + return NV50_SURFACE_FORMAT_R16_UNORM; + case 4: + return NV50_SURFACE_FORMAT_BGRA8_UNORM; + case 8: + return NV50_SURFACE_FORMAT_RGBA16_UNORM; + case 16: + return NV50_SURFACE_FORMAT_RGBA32_FLOAT; + default: + assert(0); + return 0; + } +} + +static int +nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst, + struct nv50_miptree *mt, unsigned level, unsigned layer, + enum pipe_format pformat, boolean dst_src_pformat_equal) +{ + struct nouveau_bo *bo = mt->base.bo; + uint32_t width, height, depth; + uint32_t format; + uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT; + uint32_t offset = mt->level[level].offset; + + format = nvc0_2d_format(pformat, dst, dst_src_pformat_equal); + if (!format) { + NOUVEAU_ERR("invalid/unsupported surface format: %s\n", + util_format_name(pformat)); + return 1; + } + + width = u_minify(mt->base.base.width0, level) << mt->ms_x; + height = u_minify(mt->base.base.height0, level) << mt->ms_y; + depth = u_minify(mt->base.base.depth0, level); + + /* layer has to be < depth, and depth > tile depth / 2 */ + + if (!mt->layout_3d) { + offset += mt->layer_stride * layer; + layer = 0; + depth = 1; + } else + if (!dst) { + offset += nvc0_mt_zslice_offset(mt, level, layer); + layer = 0; + } + + if (!nouveau_bo_memtype(bo)) { + BEGIN_NVC0(push, SUBC_2D(mthd), 2); + PUSH_DATA (push, format); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, SUBC_2D(mthd + 0x14), 5); + PUSH_DATA (push, mt->level[level].pitch); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + PUSH_DATAh(push, bo->offset + offset); + PUSH_DATA (push, bo->offset + offset); + } else { + BEGIN_NVC0(push, SUBC_2D(mthd), 5); + PUSH_DATA (push, format); + PUSH_DATA (push, 0); + PUSH_DATA (push, mt->level[level].tile_mode); + PUSH_DATA (push, depth); + PUSH_DATA (push, layer); + BEGIN_NVC0(push, SUBC_2D(mthd + 0x18), 4); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + PUSH_DATAh(push, bo->offset + offset); + PUSH_DATA (push, bo->offset + offset); + } + +#if 0 + if (dst) { + BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, width); + PUSH_DATA (push, height); + } +#endif + return 0; +} + +static int +nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push, + struct nv50_miptree *dst, unsigned dst_level, + unsigned dx, unsigned dy, unsigned dz, + struct nv50_miptree *src, unsigned src_level, + unsigned sx, unsigned sy, unsigned sz, + unsigned w, unsigned h) +{ + const enum pipe_format dfmt = dst->base.base.format; + const enum pipe_format sfmt = src->base.base.format; + int ret; + boolean eqfmt = dfmt == sfmt; + + if (!PUSH_SPACE(push, 2 * 16 + 32)) + return PIPE_ERROR; + + ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt, eqfmt); + if (ret) + return ret; + + ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt, eqfmt); + if (ret) + return ret; + + IMMED_NVC0(push, NVC0_2D(BLIT_CONTROL), 0x00); + BEGIN_NVC0(push, NVC0_2D(BLIT_DST_X), 4); + PUSH_DATA (push, dx << dst->ms_x); + PUSH_DATA (push, dy << dst->ms_y); + PUSH_DATA (push, w << dst->ms_x); + PUSH_DATA (push, h << dst->ms_y); + BEGIN_NVC0(push, NVC0_2D(BLIT_DU_DX_FRACT), 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_X_FRACT), 4); + PUSH_DATA (push, 0); + PUSH_DATA (push, sx << src->ms_x); + PUSH_DATA (push, 0); + PUSH_DATA (push, sy << src->ms_x); + + return 0; +} + +static void +nvc0_resource_copy_region(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + int ret; + boolean m2mf; + unsigned dst_layer = dstz, src_layer = src_box->z; + + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + nouveau_copy_buffer(&nvc0->base, + nv04_resource(dst), dstx, + nv04_resource(src), src_box->x, src_box->width); + NOUVEAU_DRV_STAT(&nvc0->screen->base, buf_copy_bytes, src_box->width); + return; + } + NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_copy_count, 1); + + /* 0 and 1 are equal, only supporting 0/1, 2, 4 and 8 */ + assert((src->nr_samples | 1) == (dst->nr_samples | 1)); + + m2mf = (src->format == dst->format) || + (util_format_get_blocksizebits(src->format) == + util_format_get_blocksizebits(dst->format)); + + nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + + if (m2mf) { + struct nv50_m2mf_rect drect, srect; + unsigned i; + unsigned nx = util_format_get_nblocksx(src->format, src_box->width); + unsigned ny = util_format_get_nblocksy(src->format, src_box->height); + + nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz); + nv50_m2mf_rect_setup(&srect, src, src_level, + src_box->x, src_box->y, src_box->z); + + for (i = 0; i < src_box->depth; ++i) { + nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny); + + if (nv50_miptree(dst)->layout_3d) + drect.z++; + else + drect.base += nv50_miptree(dst)->layer_stride; + + if (nv50_miptree(src)->layout_3d) + srect.z++; + else + srect.base += nv50_miptree(src)->layer_stride; + } + return; + } + + assert(nv50_2d_dst_format_faithful(dst->format)); + assert(nv50_2d_src_format_faithful(src->format)); + + BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(src), RD); + BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(dst), WR); + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx); + nouveau_pushbuf_validate(nvc0->base.pushbuf); + + for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) { + ret = nvc0_2d_texture_do_copy(nvc0->base.pushbuf, + nv50_miptree(dst), dst_level, + dstx, dsty, dst_layer, + nv50_miptree(src), src_level, + src_box->x, src_box->y, src_layer, + src_box->width, src_box->height); + if (ret) + break; + } + nouveau_bufctx_reset(nvc0->bufctx, 0); +} + +static void +nvc0_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv50_surface *sf = nv50_surface(dst); + struct nv04_resource *res = nv04_resource(sf->base.texture); + unsigned z; + + if (!PUSH_SPACE(push, 32 + sf->depth)) + return; + + PUSH_REFN (push, res->bo, res->domain | NOUVEAU_BO_WR); + + BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4); + PUSH_DATAf(push, color->f[0]); + PUSH_DATAf(push, color->f[1]); + PUSH_DATAf(push, color->f[2]); + PUSH_DATAf(push, color->f[3]); + + BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2); + PUSH_DATA (push, ( width << 16) | dstx); + PUSH_DATA (push, (height << 16) | dsty); + + BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9); + PUSH_DATAh(push, res->address + sf->offset); + PUSH_DATA (push, res->address + sf->offset); + if (likely(nouveau_bo_memtype(res->bo))) { + struct nv50_miptree *mt = nv50_miptree(dst->texture); + + PUSH_DATA(push, sf->width); + PUSH_DATA(push, sf->height); + PUSH_DATA(push, nvc0_format_table[dst->format].rt); + PUSH_DATA(push, (mt->layout_3d << 16) | + mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA(push, dst->u.tex.first_layer + sf->depth); + PUSH_DATA(push, mt->layer_stride >> 2); + PUSH_DATA(push, dst->u.tex.first_layer); + } else { + if (res->base.target == PIPE_BUFFER) { + PUSH_DATA(push, 262144); + PUSH_DATA(push, 1); + } else { + PUSH_DATA(push, nv50_miptree(&res->base)->level[0].pitch); + PUSH_DATA(push, sf->height); + } + PUSH_DATA(push, nvc0_format_table[sf->base.format].rt); + PUSH_DATA(push, 1 << 12); + PUSH_DATA(push, 1); + PUSH_DATA(push, 0); + PUSH_DATA(push, 0); + + IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0); + + /* tiled textures don't have to be fenced, they're not mapped directly */ + nvc0_resource_fence(res, NOUVEAU_BO_WR); + } + + BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); + for (z = 0; z < sf->depth; ++z) { + PUSH_DATA (push, 0x3c | + (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); + } + + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +static void +nvc0_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nv50_surface *sf = nv50_surface(dst); + uint32_t mode = 0; + int unk = mt->base.base.target == PIPE_TEXTURE_2D; + unsigned z; + + if (!PUSH_SPACE(push, 32 + sf->depth)) + return; + + PUSH_REFN (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR); + + if (clear_flags & PIPE_CLEAR_DEPTH) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1); + PUSH_DATAf(push, depth); + mode |= NVC0_3D_CLEAR_BUFFERS_Z; + } + + if (clear_flags & PIPE_CLEAR_STENCIL) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1); + PUSH_DATA (push, stencil & 0xff); + mode |= NVC0_3D_CLEAR_BUFFERS_S; + } + + BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2); + PUSH_DATA (push, ( width << 16) | dstx); + PUSH_DATA (push, (height << 16) | dsty); + + BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5); + PUSH_DATAh(push, mt->base.address + sf->offset); + PUSH_DATA (push, mt->base.address + sf->offset); + PUSH_DATA (push, nvc0_format_table[dst->format].rt); + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, mt->layer_stride >> 2); + BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3); + PUSH_DATA (push, sf->width); + PUSH_DATA (push, sf->height); + PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth)); + BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1); + PUSH_DATA (push, dst->u.tex.first_layer); + + BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); + for (z = 0; z < sf->depth; ++z) { + PUSH_DATA (push, mode | + (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); + } + + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; +} + +void +nvc0_clear(struct pipe_context *pipe, unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + unsigned i; + uint32_t mode = 0; + + /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ + if (!nvc0_state_validate(nvc0, NVC0_NEW_FRAMEBUFFER, 9 + (fb->nr_cbufs * 2))) + return; + + if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4); + PUSH_DATAf(push, color->f[0]); + PUSH_DATAf(push, color->f[1]); + PUSH_DATAf(push, color->f[2]); + PUSH_DATAf(push, color->f[3]); + mode = + NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G | + NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A; + } + + if (buffers & PIPE_CLEAR_DEPTH) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1); + PUSH_DATA (push, fui(depth)); + mode |= NVC0_3D_CLEAR_BUFFERS_Z; + } + + if (buffers & PIPE_CLEAR_STENCIL) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1); + PUSH_DATA (push, stencil & 0xff); + mode |= NVC0_3D_CLEAR_BUFFERS_S; + } + + BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1); + PUSH_DATA (push, mode); + + for (i = 1; i < fb->nr_cbufs; i++) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1); + PUSH_DATA (push, (i << 6) | 0x3c); + } +} + + +/* =============================== BLIT CODE =================================== + */ + +struct nvc0_blitter +{ + struct nvc0_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES]; + struct nvc0_program vp; + + struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */ + + pipe_mutex mutex; + + struct nvc0_screen *screen; +}; + +struct nvc0_blitctx +{ + struct nvc0_context *nvc0; + struct nvc0_program *fp; + uint8_t mode; + uint16_t color_mask; + uint8_t filter; + enum pipe_texture_target target; + struct { + struct pipe_framebuffer_state fb; + struct nvc0_rasterizer_stateobj *rast; + struct nvc0_program *vp; + struct nvc0_program *tcp; + struct nvc0_program *tep; + struct nvc0_program *gp; + struct nvc0_program *fp; + unsigned num_textures[5]; + unsigned num_samplers[5]; + struct pipe_sampler_view *texture[2]; + struct nv50_tsc_entry *sampler[2]; + uint32_t dirty; + } saved; + struct nvc0_rasterizer_stateobj rast; +}; + +static void +nvc0_blitter_make_vp(struct nvc0_blitter *blit) +{ + static const uint32_t code_nvc0[] = + { + 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */ + 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */ + 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */ + 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */ + 0x00001de7, 0x80000000, /* exit */ + }; + static const uint32_t code_nve4[] = + { + 0x00000007, 0x20000000, /* sched */ + 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */ + 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */ + 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */ + 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */ + 0x00001de7, 0x80000000, /* exit */ + }; + + blit->vp.type = PIPE_SHADER_VERTEX; + blit->vp.translated = TRUE; + if (blit->screen->base.class_3d >= NVE4_3D_CLASS) { + blit->vp.code = (uint32_t *)code_nve4; /* const_cast */ + blit->vp.code_size = sizeof(code_nve4); + } else { + blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */ + blit->vp.code_size = sizeof(code_nvc0); + } + blit->vp.num_gprs = 6; + blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS; + + blit->vp.hdr[0] = 0x00020461; /* vertprog magic */ + blit->vp.hdr[4] = 0x000ff000; /* no outputs read */ + blit->vp.hdr[6] = 0x00000073; /* a[0x80].xy, a[0x90].xyz */ + blit->vp.hdr[13] = 0x00073000; /* o[0x70].xy, o[0x80].xyz */ +} + +static void +nvc0_blitter_make_sampler(struct nvc0_blitter *blit) +{ + /* clamp to edge, min/max lod = 0, nearest filtering */ + + blit->sampler[0].id = -1; + + blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED | + (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) | + (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) | + (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT); + blit->sampler[0].tsc[1] = + NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE; + + /* clamp to edge, min/max lod = 0, bilinear filtering */ + + blit->sampler[1].id = -1; + + blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0]; + blit->sampler[1].tsc[1] = + NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE; +} + +static void +nvc0_blit_select_fp(struct nvc0_blitctx *ctx, const struct pipe_blit_info *info) +{ + struct nvc0_blitter *blitter = ctx->nvc0->screen->blitter; + + const enum pipe_texture_target ptarg = + nv50_blit_reinterpret_pipe_texture_target(info->src.resource->target); + + const unsigned targ = nv50_blit_texture_type(ptarg); + const unsigned mode = ctx->mode; + + if (!blitter->fp[targ][mode]) { + pipe_mutex_lock(blitter->mutex); + if (!blitter->fp[targ][mode]) + blitter->fp[targ][mode] = + nv50_blitter_make_fp(&ctx->nvc0->base.pipe, mode, ptarg); + pipe_mutex_unlock(blitter->mutex); + } + ctx->fp = blitter->fp[targ][mode]; +} + +static void +nvc0_blit_set_dst(struct nvc0_blitctx *ctx, + struct pipe_resource *res, unsigned level, unsigned layer, + enum pipe_format format) +{ + struct nvc0_context *nvc0 = ctx->nvc0; + struct pipe_context *pipe = &nvc0->base.pipe; + struct pipe_surface templ; + + if (util_format_is_depth_or_stencil(format)) + templ.format = nv50_blit_zeta_to_colour_format(format); + else + templ.format = format; + + templ.u.tex.level = level; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + + if (layer == -1) { + templ.u.tex.first_layer = 0; + templ.u.tex.last_layer = + (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1; + } + + nvc0->framebuffer.cbufs[0] = nvc0_miptree_surface_new(pipe, res, &templ); + nvc0->framebuffer.nr_cbufs = 1; + nvc0->framebuffer.zsbuf = NULL; + nvc0->framebuffer.width = nvc0->framebuffer.cbufs[0]->width; + nvc0->framebuffer.height = nvc0->framebuffer.cbufs[0]->height; +} + +static void +nvc0_blit_set_src(struct nvc0_blitctx *ctx, + struct pipe_resource *res, unsigned level, unsigned layer, + enum pipe_format format, const uint8_t filter) +{ + struct nvc0_context *nvc0 = ctx->nvc0; + struct pipe_context *pipe = &nvc0->base.pipe; + struct pipe_sampler_view templ; + uint32_t flags; + unsigned s; + enum pipe_texture_target target; + + target = nv50_blit_reinterpret_pipe_texture_target(res->target); + + templ.format = format; + templ.u.tex.first_layer = templ.u.tex.last_layer = layer; + templ.u.tex.first_level = templ.u.tex.last_level = level; + templ.swizzle_r = PIPE_SWIZZLE_RED; + templ.swizzle_g = PIPE_SWIZZLE_GREEN; + templ.swizzle_b = PIPE_SWIZZLE_BLUE; + templ.swizzle_a = PIPE_SWIZZLE_ALPHA; + + if (layer == -1) { + templ.u.tex.first_layer = 0; + templ.u.tex.last_layer = + (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1; + } + + flags = res->last_level ? 0 : NV50_TEXVIEW_SCALED_COORDS; + flags |= NV50_TEXVIEW_ACCESS_RESOLVE; + if (filter && res->nr_samples == 8) + flags |= NV50_TEXVIEW_FILTER_MSAA8; + + nvc0->textures[4][0] = nvc0_create_texture_view( + pipe, res, &templ, flags, target); + nvc0->textures[4][1] = NULL; + + for (s = 0; s <= 3; ++s) + nvc0->num_textures[s] = 0; + nvc0->num_textures[4] = 1; + + templ.format = nv50_zs_to_s_format(format); + if (templ.format != format) { + nvc0->textures[4][1] = nvc0_create_texture_view( + pipe, res, &templ, flags, target); + nvc0->num_textures[4] = 2; + } +} + +static void +nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit) +{ + struct nouveau_pushbuf *push = blit->nvc0->base.pushbuf; + + /* TODO: maybe make this a MACRO (if we need more logic) ? */ + + if (blit->nvc0->cond_query) + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + + /* blend state */ + BEGIN_NVC0(push, NVC0_3D(COLOR_MASK(0)), 1); + PUSH_DATA (push, blit->color_mask); + IMMED_NVC0(push, NVC0_3D(BLEND_ENABLE(0)), 0); + IMMED_NVC0(push, NVC0_3D(LOGIC_OP_ENABLE), 0); + + /* rasterizer state */ + IMMED_NVC0(push, NVC0_3D(FRAG_COLOR_CLAMP_EN), 0); + IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_ENABLE), 0); + BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4); + PUSH_DATA (push, 0xffff); + PUSH_DATA (push, 0xffff); + PUSH_DATA (push, 0xffff); + PUSH_DATA (push, 0xffff); + BEGIN_NVC0(push, NVC0_3D(MACRO_POLYGON_MODE_FRONT), 1); + PUSH_DATA (push, NVC0_3D_MACRO_POLYGON_MODE_FRONT_FILL); + BEGIN_NVC0(push, NVC0_3D(MACRO_POLYGON_MODE_BACK), 1); + PUSH_DATA (push, NVC0_3D_MACRO_POLYGON_MODE_BACK_FILL); + IMMED_NVC0(push, NVC0_3D(POLYGON_SMOOTH_ENABLE), 0); + IMMED_NVC0(push, NVC0_3D(POLYGON_OFFSET_FILL_ENABLE), 0); + IMMED_NVC0(push, NVC0_3D(POLYGON_STIPPLE_ENABLE), 0); + IMMED_NVC0(push, NVC0_3D(CULL_FACE_ENABLE), 0); + + /* zsa state */ + IMMED_NVC0(push, NVC0_3D(DEPTH_TEST_ENABLE), 0); + IMMED_NVC0(push, NVC0_3D(STENCIL_ENABLE), 0); + IMMED_NVC0(push, NVC0_3D(ALPHA_TEST_ENABLE), 0); + + /* disable transform feedback */ + IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), 0); +} + +static void +nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx) +{ + struct nvc0_context *nvc0 = ctx->nvc0; + struct nvc0_blitter *blitter = nvc0->screen->blitter; + int s; + + ctx->saved.fb.width = nvc0->framebuffer.width; + ctx->saved.fb.height = nvc0->framebuffer.height; + ctx->saved.fb.nr_cbufs = nvc0->framebuffer.nr_cbufs; + ctx->saved.fb.cbufs[0] = nvc0->framebuffer.cbufs[0]; + ctx->saved.fb.zsbuf = nvc0->framebuffer.zsbuf; + + ctx->saved.rast = nvc0->rast; + + ctx->saved.vp = nvc0->vertprog; + ctx->saved.tcp = nvc0->tctlprog; + ctx->saved.tep = nvc0->tevlprog; + ctx->saved.gp = nvc0->gmtyprog; + ctx->saved.fp = nvc0->fragprog; + + nvc0->rast = &ctx->rast; + + nvc0->vertprog = &blitter->vp; + nvc0->tctlprog = NULL; + nvc0->tevlprog = NULL; + nvc0->gmtyprog = NULL; + nvc0->fragprog = ctx->fp; + + for (s = 0; s <= 4; ++s) { + ctx->saved.num_textures[s] = nvc0->num_textures[s]; + ctx->saved.num_samplers[s] = nvc0->num_samplers[s]; + nvc0->textures_dirty[s] = (1 << nvc0->num_textures[s]) - 1; + nvc0->samplers_dirty[s] = (1 << nvc0->num_samplers[s]) - 1; + } + ctx->saved.texture[0] = nvc0->textures[4][0]; + ctx->saved.texture[1] = nvc0->textures[4][1]; + ctx->saved.sampler[0] = nvc0->samplers[4][0]; + ctx->saved.sampler[1] = nvc0->samplers[4][1]; + + nvc0->samplers[4][0] = &blitter->sampler[ctx->filter]; + nvc0->samplers[4][1] = &blitter->sampler[ctx->filter]; + + for (s = 0; s <= 3; ++s) + nvc0->num_samplers[s] = 0; + nvc0->num_samplers[4] = 2; + + ctx->saved.dirty = nvc0->dirty; + + nvc0->textures_dirty[4] |= 3; + nvc0->samplers_dirty[4] |= 3; + + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1)); + + nvc0->dirty = NVC0_NEW_FRAMEBUFFER | + NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG | + NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG | + NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS; +} + +static void +nvc0_blitctx_post_blit(struct nvc0_blitctx *blit) +{ + struct nvc0_context *nvc0 = blit->nvc0; + int s; + + pipe_surface_reference(&nvc0->framebuffer.cbufs[0], NULL); + + nvc0->framebuffer.width = blit->saved.fb.width; + nvc0->framebuffer.height = blit->saved.fb.height; + nvc0->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs; + nvc0->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0]; + nvc0->framebuffer.zsbuf = blit->saved.fb.zsbuf; + + nvc0->rast = blit->saved.rast; + + nvc0->vertprog = blit->saved.vp; + nvc0->tctlprog = blit->saved.tcp; + nvc0->tevlprog = blit->saved.tep; + nvc0->gmtyprog = blit->saved.gp; + nvc0->fragprog = blit->saved.fp; + + pipe_sampler_view_reference(&nvc0->textures[4][0], NULL); + pipe_sampler_view_reference(&nvc0->textures[4][1], NULL); + + for (s = 0; s <= 4; ++s) { + nvc0->num_textures[s] = blit->saved.num_textures[s]; + nvc0->num_samplers[s] = blit->saved.num_samplers[s]; + nvc0->textures_dirty[s] = (1 << nvc0->num_textures[s]) - 1; + nvc0->samplers_dirty[s] = (1 << nvc0->num_samplers[s]) - 1; + } + nvc0->textures[4][0] = blit->saved.texture[0]; + nvc0->textures[4][1] = blit->saved.texture[1]; + nvc0->samplers[4][0] = blit->saved.sampler[0]; + nvc0->samplers[4][1] = blit->saved.sampler[1]; + + nvc0->textures_dirty[4] |= 3; + nvc0->samplers_dirty[4] |= 3; + + if (nvc0->cond_query) + nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query, + nvc0->cond_cond, nvc0->cond_mode); + + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1)); + + nvc0->dirty = blit->saved.dirty | + (NVC0_NEW_FRAMEBUFFER | NVC0_NEW_SCISSOR | NVC0_NEW_SAMPLE_MASK | + NVC0_NEW_RASTERIZER | NVC0_NEW_ZSA | NVC0_NEW_BLEND | + NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS | + NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG | + NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG | + NVC0_NEW_TFB_TARGETS); +} + +static void +nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) +{ + struct nvc0_blitctx *blit = nvc0->blit; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_resource *src = info->src.resource; + struct pipe_resource *dst = info->dst.resource; + int32_t minx, maxx, miny, maxy; + int32_t i; + float x0, x1, y0, y1, z; + float dz; + float x_range, y_range; + + blit->mode = nv50_blit_select_mode(info); + blit->color_mask = nv50_blit_derive_color_mask(info); + blit->filter = nv50_blit_get_filter(info); + + nvc0_blit_select_fp(blit, info); + nvc0_blitctx_pre_blit(blit); + + nvc0_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format); + nvc0_blit_set_src(blit, src, info->src.level, -1, info->src.format, + blit->filter); + + nvc0_blitctx_prepare_state(blit); + + nvc0_state_validate(nvc0, ~0, 48); + + x_range = (float)info->src.box.width / (float)info->dst.box.width; + y_range = (float)info->src.box.height / (float)info->dst.box.height; + + x0 = (float)info->src.box.x - x_range * (float)info->dst.box.x; + y0 = (float)info->src.box.y - y_range * (float)info->dst.box.y; + + x1 = x0 + 16384.0f * x_range; + y1 = y0 + 16384.0f * y_range; + + x0 *= (float)(1 << nv50_miptree(src)->ms_x); + x1 *= (float)(1 << nv50_miptree(src)->ms_x); + y0 *= (float)(1 << nv50_miptree(src)->ms_y); + y1 *= (float)(1 << nv50_miptree(src)->ms_y); + + if (src->last_level > 0) { + /* If there are mip maps, GPU always assumes normalized coordinates. */ + const unsigned l = info->src.level; + const float fh = u_minify(src->width0 << nv50_miptree(src)->ms_x, l); + const float fv = u_minify(src->height0 << nv50_miptree(src)->ms_y, l); + x0 /= fh; + x1 /= fh; + y0 /= fv; + y1 /= fv; + } + + dz = (float)info->src.box.depth / (float)info->dst.box.depth; + z = (float)info->src.box.z; + if (nv50_miptree(src)->layout_3d) + z += 0.5f * dz; + + IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0); + IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1); + BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2); + PUSH_DATA (push, nvc0->framebuffer.width << 16); + PUSH_DATA (push, nvc0->framebuffer.height << 16); + + /* Draw a large triangle in screen coordinates covering the whole + * render target, with scissors defining the destination region. + * The vertex is supplied with non-normalized texture coordinates + * arranged in a way to yield the desired offset and scale. + */ + + minx = info->dst.box.x; + maxx = info->dst.box.x + info->dst.box.width; + miny = info->dst.box.y; + maxy = info->dst.box.y + info->dst.box.height; + if (info->scissor_enable) { + minx = MAX2(minx, info->scissor.minx); + maxx = MIN2(maxx, info->scissor.maxx); + miny = MAX2(miny, info->scissor.miny); + maxy = MIN2(maxy, info->scissor.maxy); + } + BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2); + PUSH_DATA (push, (maxx << 16) | minx); + PUSH_DATA (push, (maxy << 16) | miny); + + for (i = 0; i < info->dst.box.depth; ++i, z += dz) { + if (info->dst.box.z + i) { + BEGIN_NVC0(push, NVC0_3D(LAYER), 1); + PUSH_DATA (push, info->dst.box.z + i); + } + + IMMED_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), + NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); + + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 4); + PUSH_DATA (push, 0x74301); + PUSH_DATAf(push, x0); + PUSH_DATAf(push, y0); + PUSH_DATAf(push, z); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 3); + PUSH_DATA (push, 0x74200); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 0.0f); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 4); + PUSH_DATA (push, 0x74301); + PUSH_DATAf(push, x1); + PUSH_DATAf(push, y0); + PUSH_DATAf(push, z); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 3); + PUSH_DATA (push, 0x74200); + PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_x); + PUSH_DATAf(push, 0.0f); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 4); + PUSH_DATA (push, 0x74301); + PUSH_DATAf(push, x0); + PUSH_DATAf(push, y1); + PUSH_DATAf(push, z); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 3); + PUSH_DATA (push, 0x74200); + PUSH_DATAf(push, 0.0f); + PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_y); + + IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0); + } + if (info->dst.box.z + info->dst.box.depth - 1) + IMMED_NVC0(push, NVC0_3D(LAYER), 0); + + nvc0_blitctx_post_blit(blit); + + /* restore viewport */ + + BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2); + PUSH_DATA (push, nvc0->vport_int[0]); + PUSH_DATA (push, nvc0->vport_int[1]); + IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1); +} + +static void +nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv50_miptree *dst = nv50_miptree(info->dst.resource); + struct nv50_miptree *src = nv50_miptree(info->src.resource); + const int32_t srcx_adj = info->src.box.width < 0 ? -1 : 0; + const int32_t srcy_adj = info->src.box.height < 0 ? -1 : 0; + const int dz = info->dst.box.z; + const int sz = info->src.box.z; + uint32_t dstw, dsth; + int32_t dstx, dsty; + int64_t srcx, srcy; + int64_t du_dx, dv_dy; + int i; + uint32_t mode; + uint32_t mask = nv50_blit_eng2d_get_mask(info); + boolean b; + + mode = nv50_blit_get_filter(info) ? + NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR : + NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE; + mode |= (src->base.base.nr_samples > dst->base.base.nr_samples) ? + NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER : NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER; + + du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width; + dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height; + + b = info->dst.format == info->src.format; + nvc0_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b); + nvc0_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b); + + if (info->scissor_enable) { + BEGIN_NVC0(push, NVC0_2D(CLIP_X), 5); + PUSH_DATA (push, info->scissor.minx << dst->ms_x); + PUSH_DATA (push, info->scissor.miny << dst->ms_y); + PUSH_DATA (push, (info->scissor.maxx - info->scissor.minx) << dst->ms_x); + PUSH_DATA (push, (info->scissor.maxy - info->scissor.miny) << dst->ms_y); + PUSH_DATA (push, 1); /* enable */ + } + + if (mask != 0xffffffff) { + IMMED_NVC0(push, NVC0_2D(ROP), 0xca); /* DPSDxax */ + IMMED_NVC0(push, NVC0_2D(PATTERN_COLOR_FORMAT), + NVC0_2D_PATTERN_COLOR_FORMAT_32BPP); + BEGIN_NVC0(push, NVC0_2D(PATTERN_COLOR(0)), 4); + PUSH_DATA (push, 0x00000000); + PUSH_DATA (push, mask); + PUSH_DATA (push, 0xffffffff); + PUSH_DATA (push, 0xffffffff); + IMMED_NVC0(push, NVC0_2D(OPERATION), NVC0_2D_OPERATION_ROP); + } else + if (info->src.format != info->dst.format) { + if (info->src.format == PIPE_FORMAT_R8_UNORM || + info->src.format == PIPE_FORMAT_R8_SNORM || + info->src.format == PIPE_FORMAT_R16_UNORM || + info->src.format == PIPE_FORMAT_R16_SNORM || + info->src.format == PIPE_FORMAT_R16_FLOAT || + info->src.format == PIPE_FORMAT_R32_FLOAT) { + mask = 0xffff0000; /* also makes condition for OPERATION reset true */ + BEGIN_NVC0(push, NVC0_2D(BETA4), 2); + PUSH_DATA (push, mask); + PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT); + } else + if (info->src.format == PIPE_FORMAT_A8_UNORM) { + mask = 0xff000000; + BEGIN_NVC0(push, NVC0_2D(BETA4), 2); + PUSH_DATA (push, mask); + PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT); + } + } + + if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) { + /* ms_x is always >= ms_y */ + du_dx <<= src->ms_x - dst->ms_x; + dv_dy <<= src->ms_y - dst->ms_y; + } else { + du_dx >>= dst->ms_x - src->ms_x; + dv_dy >>= dst->ms_y - src->ms_y; + } + + srcx = (int64_t)(info->src.box.x + srcx_adj) << (src->ms_x + 32); + srcy = (int64_t)(info->src.box.y + srcy_adj) << (src->ms_y + 32); + + if (src->base.base.nr_samples > dst->base.base.nr_samples) { + /* center src coorinates for proper MS resolve filtering */ + srcx += (int64_t)(src->ms_x + 0) << 32; + srcy += (int64_t)(src->ms_y + 1) << 31; + } + + dstx = info->dst.box.x << dst->ms_x; + dsty = info->dst.box.y << dst->ms_y; + + dstw = info->dst.box.width << dst->ms_x; + dsth = info->dst.box.height << dst->ms_y; + + if (dstx < 0) { + dstw += dstx; + srcx -= du_dx * dstx; + dstx = 0; + } + if (dsty < 0) { + dsth += dsty; + srcy -= dv_dy * dsty; + dsty = 0; + } + + IMMED_NVC0(push, NVC0_2D(BLIT_CONTROL), mode); + BEGIN_NVC0(push, NVC0_2D(BLIT_DST_X), 4); + PUSH_DATA (push, dstx); + PUSH_DATA (push, dsty); + PUSH_DATA (push, dstw); + PUSH_DATA (push, dsth); + BEGIN_NVC0(push, NVC0_2D(BLIT_DU_DX_FRACT), 4); + PUSH_DATA (push, du_dx); + PUSH_DATA (push, du_dx >> 32); + PUSH_DATA (push, dv_dy); + PUSH_DATA (push, dv_dy >> 32); + + BCTX_REFN(nvc0->bufctx, 2D, &dst->base, WR); + BCTX_REFN(nvc0->bufctx, 2D, &src->base, RD); + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx); + if (nouveau_pushbuf_validate(nvc0->base.pushbuf)) + return; + + for (i = 0; i < info->dst.box.depth; ++i) { + if (i > 0) { + /* no scaling in z-direction possible for eng2d blits */ + if (dst->layout_3d) { + BEGIN_NVC0(push, NVC0_2D(DST_LAYER), 1); + PUSH_DATA (push, info->dst.box.z + i); + } else { + const unsigned z = info->dst.box.z + i; + BEGIN_NVC0(push, NVC0_2D(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, dst->base.address + z * dst->layer_stride); + PUSH_DATA (push, dst->base.address + z * dst->layer_stride); + } + if (src->layout_3d) { + /* not possible because of depth tiling */ + assert(0); + } else { + const unsigned z = info->src.box.z + i; + BEGIN_NVC0(push, NVC0_2D(SRC_ADDRESS_HIGH), 2); + PUSH_DATAh(push, src->base.address + z * src->layer_stride); + PUSH_DATA (push, src->base.address + z * src->layer_stride); + } + BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_Y_INT), 1); /* trigger */ + PUSH_DATA (push, srcy >> 32); + } else { + BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_X_FRACT), 4); + PUSH_DATA (push, srcx); + PUSH_DATA (push, srcx >> 32); + PUSH_DATA (push, srcy); + PUSH_DATA (push, srcy >> 32); + } + } + nvc0_resource_validate(&dst->base, NOUVEAU_BO_WR); + nvc0_resource_validate(&src->base, NOUVEAU_BO_RD); + + nouveau_bufctx_reset(nvc0->bufctx, NVC0_BIND_2D); + + if (info->scissor_enable) + IMMED_NVC0(push, NVC0_2D(CLIP_ENABLE), 0); + if (mask != 0xffffffff) + IMMED_NVC0(push, NVC0_2D(OPERATION), NVC0_2D_OPERATION_SRCCOPY); +} + +static void +nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + boolean eng3d = FALSE; + + if (util_format_is_depth_or_stencil(info->dst.resource->format)) { + if (!(info->mask & PIPE_MASK_ZS)) + return; + if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT || + info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + eng3d = TRUE; + if (info->filter != PIPE_TEX_FILTER_NEAREST) + eng3d = TRUE; + } else { + if (!(info->mask & PIPE_MASK_RGBA)) + return; + if (info->mask != PIPE_MASK_RGBA) + eng3d = TRUE; + } + + if (nv50_miptree(info->src.resource)->layout_3d) { + eng3d = TRUE; + } else + if (info->src.box.depth != info->dst.box.depth) { + eng3d = TRUE; + debug_printf("blit: cannot filter array or cube textures in z direction"); + } + + if (!eng3d && info->dst.format != info->src.format) { + if (!nv50_2d_dst_format_faithful(info->dst.format)) { + eng3d = TRUE; + } else + if (!nv50_2d_src_format_faithful(info->src.format)) { + if (!util_format_is_luminance(info->src.format)) { + if (util_format_is_intensity(info->src.format)) + eng3d = info->src.format != PIPE_FORMAT_I8_UNORM; + else + if (!nv50_2d_dst_format_ops_supported(info->dst.format)) + eng3d = TRUE; + else + eng3d = !nv50_2d_format_supported(info->src.format); + } + } else + if (util_format_is_luminance_alpha(info->src.format)) + eng3d = TRUE; + } + + if (info->src.resource->nr_samples == 8 && + info->dst.resource->nr_samples <= 1) + eng3d = TRUE; +#if 0 + /* FIXME: can't make this work with eng2d anymore, at least not on nv50 */ + if (info->src.resource->nr_samples > 1 || + info->dst.resource->nr_samples > 1) + eng3d = TRUE; +#endif + /* FIXME: find correct src coordinates adjustments */ + if ((info->src.box.width != info->dst.box.width && + info->src.box.width != -info->dst.box.width) || + (info->src.box.height != info->dst.box.height && + info->src.box.height != -info->dst.box.height)) + eng3d = TRUE; + + if (!eng3d) + nvc0_blit_eng2d(nvc0, info); + else + nvc0_blit_3d(nvc0, info); + + NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_blit_count, 1); +} + +boolean +nvc0_blitter_create(struct nvc0_screen *screen) +{ + screen->blitter = CALLOC_STRUCT(nvc0_blitter); + if (!screen->blitter) { + NOUVEAU_ERR("failed to allocate blitter struct\n"); + return FALSE; + } + screen->blitter->screen = screen; + + pipe_mutex_init(screen->blitter->mutex); + + nvc0_blitter_make_vp(screen->blitter); + nvc0_blitter_make_sampler(screen->blitter); + + return TRUE; +} + +void +nvc0_blitter_destroy(struct nvc0_screen *screen) +{ + struct nvc0_blitter *blitter = screen->blitter; + unsigned i, m; + + for (i = 0; i < NV50_BLIT_MAX_TEXTURE_TYPES; ++i) { + for (m = 0; m < NV50_BLIT_MODES; ++m) { + struct nvc0_program *prog = blitter->fp[i][m]; + if (prog) { + nvc0_program_destroy(NULL, prog); + FREE((void *)prog->pipe.tokens); + FREE(prog); + } + } + } + + FREE(blitter); +} + +boolean +nvc0_blitctx_create(struct nvc0_context *nvc0) +{ + nvc0->blit = CALLOC_STRUCT(nvc0_blitctx); + if (!nvc0->blit) { + NOUVEAU_ERR("failed to allocate blit context\n"); + return FALSE; + } + + nvc0->blit->nvc0 = nvc0; + + nvc0->blit->rast.pipe.half_pixel_center = 1; + + return TRUE; +} + +void +nvc0_blitctx_destroy(struct nvc0_context *nvc0) +{ + if (nvc0->blit) + FREE(nvc0->blit); +} + +void +nvc0_init_surface_functions(struct nvc0_context *nvc0) +{ + struct pipe_context *pipe = &nvc0->base.pipe; + + pipe->resource_copy_region = nvc0_resource_copy_region; + pipe->blit = nvc0_blit; + pipe->clear_render_target = nvc0_clear_render_target; + pipe->clear_depth_stencil = nvc0_clear_depth_stencil; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c new file mode 100644 index 00000000000..765cd2d2bab --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -0,0 +1,814 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_resource.h" +#include "nv50/nv50_texture.xml.h" +#include "nv50/nv50_defs.xml.h" + +#include "util/u_format.h" + +#define NVE4_TIC_ENTRY_INVALID 0x000fffff +#define NVE4_TSC_ENTRY_INVALID 0xfff00000 + +#define NV50_TIC_0_SWIZZLE__MASK \ + (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \ + NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK) + +static INLINE uint32_t +nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int) +{ + switch (swz) { + case PIPE_SWIZZLE_RED: + return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT; + case PIPE_SWIZZLE_GREEN: + return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT; + case PIPE_SWIZZLE_BLUE: + return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT; + case PIPE_SWIZZLE_ALPHA: + return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT; + case PIPE_SWIZZLE_ONE: + return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT; + case PIPE_SWIZZLE_ZERO: + default: + return NV50_TIC_MAP_ZERO; + } +} + +struct pipe_sampler_view * +nvc0_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *res, + const struct pipe_sampler_view *templ) +{ + uint32_t flags = 0; + + if (res->target == PIPE_TEXTURE_RECT || res->target == PIPE_BUFFER) + flags |= NV50_TEXVIEW_SCALED_COORDS; + + return nvc0_create_texture_view(pipe, res, templ, flags, res->target); +} + +struct pipe_sampler_view * +nvc0_create_texture_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ, + uint32_t flags, + enum pipe_texture_target target) +{ + const struct util_format_description *desc; + uint64_t address; + uint32_t *tic; + uint32_t swz[4]; + uint32_t width, height; + uint32_t depth; + struct nv50_tic_entry *view; + struct nv50_miptree *mt; + boolean tex_int; + + view = MALLOC_STRUCT(nv50_tic_entry); + if (!view) + return NULL; + mt = nv50_miptree(texture); + + view->pipe = *templ; + view->pipe.reference.count = 1; + view->pipe.texture = NULL; + view->pipe.context = pipe; + + view->id = -1; + + pipe_resource_reference(&view->pipe.texture, texture); + + tic = &view->tic[0]; + + desc = util_format_description(view->pipe.format); + + tic[0] = nvc0_format_table[view->pipe.format].tic; + + tex_int = util_format_is_pure_integer(view->pipe.format); + + swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int); + swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int); + swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int); + swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int); + tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | + (swz[0] << NV50_TIC_0_MAPR__SHIFT) | + (swz[1] << NV50_TIC_0_MAPG__SHIFT) | + (swz[2] << NV50_TIC_0_MAPB__SHIFT) | + (swz[3] << NV50_TIC_0_MAPA__SHIFT); + + address = mt->base.address; + + tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER; + + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; + + if (!(flags & NV50_TEXVIEW_SCALED_COORDS)) + tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; + + /* check for linear storage type */ + if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) { + if (texture->target == PIPE_BUFFER) { + assert(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS)); + address += + view->pipe.u.buf.first_element * desc->block.bits / 8; + tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER; + tic[3] = 0; + tic[4] = /* width */ + view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1; + tic[5] = 0; + } else { + /* must be 2D texture without mip maps */ + tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT; + tic[3] = mt->level[0].pitch; + tic[4] = mt->base.base.width0; + tic[5] = (1 << 16) | mt->base.base.height0; + } + tic[6] = + tic[7] = 0; + tic[1] = address; + tic[2] |= address >> 32; + return &view->pipe; + } + + tic[2] |= + ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) | + ((mt->level[0].tile_mode & 0xf00) << (25 - 8)); + + depth = MAX2(mt->base.base.array_size, mt->base.base.depth0); + + if (mt->base.base.array_size > 1) { + /* there doesn't seem to be a base layer field in TIC */ + address += view->pipe.u.tex.first_layer * mt->layer_stride; + depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1; + } + tic[1] = address; + tic[2] |= address >> 32; + + switch (target) { + case PIPE_TEXTURE_1D: + tic[2] |= NV50_TIC_2_TARGET_1D; + break; + case PIPE_TEXTURE_2D: + tic[2] |= NV50_TIC_2_TARGET_2D; + break; + case PIPE_TEXTURE_RECT: + tic[2] |= NV50_TIC_2_TARGET_RECT; + break; + case PIPE_TEXTURE_3D: + tic[2] |= NV50_TIC_2_TARGET_3D; + break; + case PIPE_TEXTURE_CUBE: + depth /= 6; + tic[2] |= NV50_TIC_2_TARGET_CUBE; + break; + case PIPE_TEXTURE_1D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; + break; + case PIPE_TEXTURE_2D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; + break; + case PIPE_TEXTURE_CUBE_ARRAY: + depth /= 6; + tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; + break; + default: + NOUVEAU_ERR("unexpected/invalid texture target: %d\n", + mt->base.base.target); + return FALSE; + } + + tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000; + + if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) { + width = mt->base.base.width0 << mt->ms_x; + height = mt->base.base.height0 << mt->ms_y; + } else { + width = mt->base.base.width0; + height = mt->base.base.height0; + } + + tic[4] = (1 << 31) | width; + + tic[5] = height & 0xffff; + tic[5] |= depth << 16; + tic[5] |= mt->base.base.last_level << 28; + + /* sampling points: (?) */ + if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) + tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; + else + tic[6] = 0x03000000; + + tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; + tic[7] |= mt->ms_mode << 12; + + return &view->pipe; +} + +static boolean +nvc0_validate_tic(struct nvc0_context *nvc0, int s) +{ + uint32_t commands[32]; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nouveau_bo *txc = nvc0->screen->txc; + unsigned i; + unsigned n = 0; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_textures[s]; ++i) { + struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); + struct nv04_resource *res; + const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i)); + + if (!tic) { + if (dirty) + commands[n++] = (i << 1) | 0; + continue; + } + res = nv04_resource(tic->pipe.texture); + + if (tic->id < 0) { + tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); + + PUSH_SPACE(push, 17); + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); + PUSH_DATAh(push, txc->offset + (tic->id * 32)); + PUSH_DATA (push, txc->offset + (tic->id * 32)); + BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, 32); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); + PUSH_DATA (push, 0x100111); + BEGIN_NIC0(push, NVC0_M2MF(DATA), 8); + PUSH_DATAp(push, &tic->tic[0], 8); + + need_flush = TRUE; + } else + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); + PUSH_DATA (push, (tic->id << 4) | 1); + NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1); + } + nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + if (!dirty) + continue; + commands[n++] = (tic->id << 9) | (i << 1) | 1; + + BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD); + } + for (; i < nvc0->state.num_textures[s]; ++i) + commands[n++] = (i << 1) | 0; + + nvc0->state.num_textures[s] = nvc0->num_textures[s]; + + if (n) { + BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n); + PUSH_DATAp(push, commands, n); + } + nvc0->textures_dirty[s] = 0; + + return need_flush; +} + +static boolean +nve4_validate_tic(struct nvc0_context *nvc0, unsigned s) +{ + struct nouveau_bo *txc = nvc0->screen->txc; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_textures[s]; ++i) { + struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); + struct nv04_resource *res; + const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i)); + + if (!tic) { + nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; + continue; + } + res = nv04_resource(tic->pipe.texture); + + if (tic->id < 0) { + tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); + + PUSH_SPACE(push, 16); + BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, txc->offset + (tic->id * 32)); + PUSH_DATA (push, txc->offset + (tic->id * 32)); + BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, 32); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9); + PUSH_DATA (push, 0x1001); + PUSH_DATAp(push, &tic->tic[0], 8); + + need_flush = TRUE; + } else + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); + PUSH_DATA (push, (tic->id << 4) | 1); + } + nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID; + nvc0->tex_handles[s][i] |= tic->id; + if (dirty) + BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD); + } + for (; i < nvc0->state.num_textures[s]; ++i) { + nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; + nvc0->textures_dirty[s] |= 1 << i; + } + + nvc0->state.num_textures[s] = nvc0->num_textures[s]; + + return need_flush; +} + +void nvc0_validate_textures(struct nvc0_context *nvc0) +{ + boolean need_flush; + + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { + need_flush = nve4_validate_tic(nvc0, 0); + need_flush |= nve4_validate_tic(nvc0, 3); + need_flush |= nve4_validate_tic(nvc0, 4); + } else { + need_flush = nvc0_validate_tic(nvc0, 0); + need_flush |= nvc0_validate_tic(nvc0, 3); + need_flush |= nvc0_validate_tic(nvc0, 4); + } + + if (need_flush) { + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, 0); + } +} + +static boolean +nvc0_validate_tsc(struct nvc0_context *nvc0, int s) +{ + uint32_t commands[16]; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned i; + unsigned n = 0; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_samplers[s]; ++i) { + struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]); + + if (!(nvc0->samplers_dirty[s] & (1 << i))) + continue; + if (!tsc) { + commands[n++] = (i << 4) | 0; + continue; + } + if (tsc->id < 0) { + tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); + + nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc, + 65536 + tsc->id * 32, NOUVEAU_BO_VRAM, + 32, tsc->tsc); + need_flush = TRUE; + } + nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); + + commands[n++] = (tsc->id << 12) | (i << 4) | 1; + } + for (; i < nvc0->state.num_samplers[s]; ++i) + commands[n++] = (i << 4) | 0; + + nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; + + if (n) { + BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n); + PUSH_DATAp(push, commands, n); + } + nvc0->samplers_dirty[s] = 0; + + return need_flush; +} + +boolean +nve4_validate_tsc(struct nvc0_context *nvc0, int s) +{ + struct nouveau_bo *txc = nvc0->screen->txc; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_samplers[s]; ++i) { + struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]); + + if (!tsc) { + nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID; + continue; + } + if (tsc->id < 0) { + tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); + + PUSH_SPACE(push, 16); + BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, txc->offset + 65536 + (tsc->id * 32)); + PUSH_DATA (push, txc->offset + 65536 + (tsc->id * 32)); + BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, 32); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9); + PUSH_DATA (push, 0x1001); + PUSH_DATAp(push, &tsc->tsc[0], 8); + + need_flush = TRUE; + } + nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); + + nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID; + nvc0->tex_handles[s][i] |= tsc->id << 20; + } + for (; i < nvc0->state.num_samplers[s]; ++i) { + nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID; + nvc0->samplers_dirty[s] |= 1 << i; + } + + nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; + + return need_flush; +} + +void nvc0_validate_samplers(struct nvc0_context *nvc0) +{ + boolean need_flush; + + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { + need_flush = nve4_validate_tsc(nvc0, 0); + need_flush |= nve4_validate_tsc(nvc0, 3); + need_flush |= nve4_validate_tsc(nvc0, 4); + } else { + need_flush = nvc0_validate_tsc(nvc0, 0); + need_flush |= nvc0_validate_tsc(nvc0, 3); + need_flush |= nvc0_validate_tsc(nvc0, 4); + } + + if (need_flush) { + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, 0); + } +} + +/* Upload the "diagonal" entries for the possible texture sources ($t == $s). + * At some point we might want to get a list of the combinations used by a + * shader and fill in those entries instead of having it extract the handles. + */ +void +nve4_set_tex_handles(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + uint64_t address; + unsigned s; + + if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) + return; + address = nvc0->screen->uniform_bo->offset + (5 << 16); + + for (s = 0; s < 5; ++s, address += (1 << 9)) { + uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s]; + if (!dirty) + continue; + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 512); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + do { + int i = ffs(dirty) - 1; + dirty &= ~(1 << i); + + BEGIN_NVC0(push, NVC0_3D(CB_POS), 2); + PUSH_DATA (push, (8 + i) * 4); + PUSH_DATA (push, nvc0->tex_handles[s][i]); + } while (dirty); + + nvc0->textures_dirty[s] = 0; + nvc0->samplers_dirty[s] = 0; + } +} + + +static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT]; +static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT]; +static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT]; + +void +nve4_set_surface_info(struct nouveau_pushbuf *push, + struct pipe_surface *psf, + struct nvc0_screen *screen) +{ + struct nv50_surface *sf = nv50_surface(psf); + struct nv04_resource *res; + uint64_t address; + uint32_t *const info = push->cur; + uint8_t log2cpp; + + if (psf && !nve4_su_format_map[psf->format]) + NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n"); + + push->cur += 16; + + if (!psf || !nve4_su_format_map[psf->format]) { + memset(info, 0, 16 * sizeof(*info)); + + info[0] = 0xbadf0000; + info[1] = 0x80004000; + info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] + + screen->lib_code->start; + return; + } + res = nv04_resource(sf->base.texture); + + address = res->address + sf->offset; + + info[8] = sf->width; + info[9] = sf->height; + info[10] = sf->depth; + switch (res->base.target) { + case PIPE_TEXTURE_1D_ARRAY: + info[11] = 1; + break; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + info[11] = 2; + break; + case PIPE_TEXTURE_3D: + info[11] = 3; + break; + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + info[11] = 4; + break; + default: + info[11] = 0; + break; + } + log2cpp = (0xf000 & nve4_su_format_aux_map[sf->base.format]) >> 12; + + info[12] = nve4_suldp_lib_offset[sf->base.format] + screen->lib_code->start; + + /* limit in bytes for raw access */ + info[13] = (0x06 << 22) | ((sf->width << log2cpp) - 1); + + info[1] = nve4_su_format_map[sf->base.format]; + +#if 0 + switch (util_format_get_blocksizebits(sf->base.format)) { + case 16: info[1] |= 1 << 16; break; + case 32: info[1] |= 2 << 16; break; + case 64: info[1] |= 3 << 16; break; + case 128: info[1] |= 4 << 16; break; + default: + break; + } +#else + info[1] |= log2cpp << 16; + info[1] |= 0x4000; + info[1] |= (0x0f00 & nve4_su_format_aux_map[sf->base.format]); +#endif + + if (res->base.target == PIPE_BUFFER) { + info[0] = address >> 8; + info[2] = sf->width - 1; + info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22; + info[3] = 0; + info[4] = 0; + info[5] = 0; + info[6] = 0; + info[7] = 0; + info[14] = 0; + info[15] = 0; + } else { + struct nv50_miptree *mt = nv50_miptree(&res->base); + struct nv50_miptree_level *lvl = &mt->level[sf->base.u.tex.level]; + const unsigned z = sf->base.u.tex.first_layer; + + if (z) { + if (mt->layout_3d) { + address += nvc0_mt_zslice_offset(mt, psf->u.tex.level, z); + /* doesn't work if z passes z-tile boundary */ + assert(sf->depth == 1); + } else { + address += mt->layer_stride * z; + } + } + info[0] = address >> 8; + info[2] = sf->width - 1; + /* NOTE: this is really important: */ + info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22; + info[3] = (0x88 << 24) | (lvl->pitch / 64); + info[4] = sf->height - 1; + info[4] |= (lvl->tile_mode & 0x0f0) << 25; + info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22; + info[5] = mt->layer_stride >> 8; + info[6] = sf->depth - 1; + info[6] |= (lvl->tile_mode & 0xf00) << 21; + info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22; + info[7] = 0; + info[14] = mt->ms_x; + info[15] = mt->ms_y; + } +} + +static INLINE void +nvc0_update_surface_bindings(struct nvc0_context *nvc0) +{ + /* TODO */ +} + +static INLINE void +nve4_update_surface_bindings(struct nvc0_context *nvc0) +{ + /* TODO */ +} + +void +nvc0_validate_surfaces(struct nvc0_context *nvc0) +{ + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { + nve4_update_surface_bindings(nvc0); + } else { + nvc0_update_surface_bindings(nvc0); + } +} + + +static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_R32G32B32A32_FLOAT] = NVE4_IMAGE_FORMAT_RGBA32_FLOAT, + [PIPE_FORMAT_R32G32B32A32_SINT] = NVE4_IMAGE_FORMAT_RGBA32_SINT, + [PIPE_FORMAT_R32G32B32A32_UINT] = NVE4_IMAGE_FORMAT_RGBA32_UINT, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = NVE4_IMAGE_FORMAT_RGBA16_FLOAT, + [PIPE_FORMAT_R16G16B16A16_UNORM] = NVE4_IMAGE_FORMAT_RGBA16_UNORM, + [PIPE_FORMAT_R16G16B16A16_SNORM] = NVE4_IMAGE_FORMAT_RGBA16_SNORM, + [PIPE_FORMAT_R16G16B16A16_SINT] = NVE4_IMAGE_FORMAT_RGBA16_SINT, + [PIPE_FORMAT_R16G16B16A16_UINT] = NVE4_IMAGE_FORMAT_RGBA16_UINT, + [PIPE_FORMAT_R8G8B8A8_UNORM] = NVE4_IMAGE_FORMAT_RGBA8_UNORM, + [PIPE_FORMAT_R8G8B8A8_SNORM] = NVE4_IMAGE_FORMAT_RGBA8_SNORM, + [PIPE_FORMAT_R8G8B8A8_SINT] = NVE4_IMAGE_FORMAT_RGBA8_SINT, + [PIPE_FORMAT_R8G8B8A8_UINT] = NVE4_IMAGE_FORMAT_RGBA8_UINT, + [PIPE_FORMAT_R11G11B10_FLOAT] = NVE4_IMAGE_FORMAT_R11G11B10_FLOAT, + [PIPE_FORMAT_R10G10B10A2_UNORM] = NVE4_IMAGE_FORMAT_RGB10_A2_UNORM, +/* [PIPE_FORMAT_R10G10B10A2_UINT] = NVE4_IMAGE_FORMAT_RGB10_A2_UINT, */ + [PIPE_FORMAT_R32G32_FLOAT] = NVE4_IMAGE_FORMAT_RG32_FLOAT, + [PIPE_FORMAT_R32G32_SINT] = NVE4_IMAGE_FORMAT_RG32_SINT, + [PIPE_FORMAT_R32G32_UINT] = NVE4_IMAGE_FORMAT_RG32_UINT, + [PIPE_FORMAT_R16G16_FLOAT] = NVE4_IMAGE_FORMAT_RG16_FLOAT, + [PIPE_FORMAT_R16G16_UNORM] = NVE4_IMAGE_FORMAT_RG16_UNORM, + [PIPE_FORMAT_R16G16_SNORM] = NVE4_IMAGE_FORMAT_RG16_SNORM, + [PIPE_FORMAT_R16G16_SINT] = NVE4_IMAGE_FORMAT_RG16_SINT, + [PIPE_FORMAT_R16G16_UINT] = NVE4_IMAGE_FORMAT_RG16_UINT, + [PIPE_FORMAT_R8G8_UNORM] = NVE4_IMAGE_FORMAT_RG8_UNORM, + [PIPE_FORMAT_R8G8_SNORM] = NVE4_IMAGE_FORMAT_RG8_SNORM, + [PIPE_FORMAT_R8G8_SINT] = NVE4_IMAGE_FORMAT_RG8_SINT, + [PIPE_FORMAT_R8G8_UINT] = NVE4_IMAGE_FORMAT_RG8_UINT, + [PIPE_FORMAT_R32_FLOAT] = NVE4_IMAGE_FORMAT_R32_FLOAT, + [PIPE_FORMAT_R32_SINT] = NVE4_IMAGE_FORMAT_R32_SINT, + [PIPE_FORMAT_R32_UINT] = NVE4_IMAGE_FORMAT_R32_UINT, + [PIPE_FORMAT_R16_FLOAT] = NVE4_IMAGE_FORMAT_R16_FLOAT, + [PIPE_FORMAT_R16_UNORM] = NVE4_IMAGE_FORMAT_R16_UNORM, + [PIPE_FORMAT_R16_SNORM] = NVE4_IMAGE_FORMAT_R16_SNORM, + [PIPE_FORMAT_R16_SINT] = NVE4_IMAGE_FORMAT_R16_SINT, + [PIPE_FORMAT_R16_UINT] = NVE4_IMAGE_FORMAT_R16_UINT, + [PIPE_FORMAT_R8_UNORM] = NVE4_IMAGE_FORMAT_R8_UNORM, + [PIPE_FORMAT_R8_SNORM] = NVE4_IMAGE_FORMAT_R8_SNORM, + [PIPE_FORMAT_R8_SINT] = NVE4_IMAGE_FORMAT_R8_SINT, + [PIPE_FORMAT_R8_UINT] = NVE4_IMAGE_FORMAT_R8_UINT, +}; + +/* Auxiliary format description values for surface instructions. + * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22 + */ +static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842, + [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842, + [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842, + + [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933, + [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933, + [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933, + [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933, + + [PIPE_FORMAT_R32G32_FLOAT] = 0x3433, + [PIPE_FORMAT_R32G32_SINT] = 0x3433, + [PIPE_FORMAT_R32G32_UINT] = 0x3433, + + [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24, +/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24, */ + [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24, + [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24, + [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24, + [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24, + [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24, + + [PIPE_FORMAT_R16G16_UNORM] = 0x2524, + [PIPE_FORMAT_R16G16_SNORM] = 0x2524, + [PIPE_FORMAT_R16G16_SINT] = 0x2524, + [PIPE_FORMAT_R16G16_UINT] = 0x2524, + [PIPE_FORMAT_R16G16_FLOAT] = 0x2524, + + [PIPE_FORMAT_R32_SINT] = 0x2024, + [PIPE_FORMAT_R32_UINT] = 0x2024, + [PIPE_FORMAT_R32_FLOAT] = 0x2024, + + [PIPE_FORMAT_R8G8_UNORM] = 0x1615, + [PIPE_FORMAT_R8G8_SNORM] = 0x1615, + [PIPE_FORMAT_R8G8_SINT] = 0x1615, + [PIPE_FORMAT_R8G8_UINT] = 0x1615, + + [PIPE_FORMAT_R16_UNORM] = 0x1115, + [PIPE_FORMAT_R16_SNORM] = 0x1115, + [PIPE_FORMAT_R16_SINT] = 0x1115, + [PIPE_FORMAT_R16_UINT] = 0x1115, + [PIPE_FORMAT_R16_FLOAT] = 0x1115, + + [PIPE_FORMAT_R8_UNORM] = 0x0206, + [PIPE_FORMAT_R8_SNORM] = 0x0206, + [PIPE_FORMAT_R8_SINT] = 0x0206, + [PIPE_FORMAT_R8_UINT] = 0x0206 +}; + +/* NOTE: These are hardcoded offsets for the shader library. + * TODO: Automate them. + */ +static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218, + [PIPE_FORMAT_R32G32B32A32_SINT] = 0x218, + [PIPE_FORMAT_R32G32B32A32_UINT] = 0x218, + [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248, + [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8, + [PIPE_FORMAT_R16G16B16A16_SINT] = 0x330, + [PIPE_FORMAT_R16G16B16A16_UINT] = 0x388, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8, + [PIPE_FORMAT_R32G32_FLOAT] = 0x428, + [PIPE_FORMAT_R32G32_SINT] = 0x468, + [PIPE_FORMAT_R32G32_UINT] = 0x468, + [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8, +/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x530, */ + [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588, + [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8, + [PIPE_FORMAT_R8G8B8A8_SINT] = 0x670, + [PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8, + [PIPE_FORMAT_B5G6R5_UNORM] = 0x718, + [PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0, + [PIPE_FORMAT_R16G16_UNORM] = 0x828, + [PIPE_FORMAT_R16G16_SNORM] = 0x890, + [PIPE_FORMAT_R16G16_SINT] = 0x8f0, + [PIPE_FORMAT_R16G16_UINT] = 0x948, + [PIPE_FORMAT_R16G16_FLOAT] = 0x998, + [PIPE_FORMAT_R32_FLOAT] = 0x9e8, + [PIPE_FORMAT_R32_SINT] = 0xa30, + [PIPE_FORMAT_R32_UINT] = 0xa30, + [PIPE_FORMAT_R8G8_UNORM] = 0xa78, + [PIPE_FORMAT_R8G8_SNORM] = 0xae0, + [PIPE_FORMAT_R8G8_UINT] = 0xb48, + [PIPE_FORMAT_R8G8_SINT] = 0xb98, + [PIPE_FORMAT_R16_UNORM] = 0xbe8, + [PIPE_FORMAT_R16_SNORM] = 0xc48, + [PIPE_FORMAT_R16_SINT] = 0xca0, + [PIPE_FORMAT_R16_UINT] = 0xce8, + [PIPE_FORMAT_R16_FLOAT] = 0xd30, + [PIPE_FORMAT_R8_UNORM] = 0xd88, + [PIPE_FORMAT_R8_SNORM] = 0xde0, + [PIPE_FORMAT_R8_SINT] = 0xe38, + [PIPE_FORMAT_R8_UINT] = 0xe88, + [PIPE_FORMAT_R11G11B10_FLOAT] = 0xed0 +}; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c new file mode 100644 index 00000000000..82f1ffcd329 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c @@ -0,0 +1,558 @@ + +#include "util/u_format.h" + +#include "nvc0/nvc0_context.h" + +#include "nv50/nv50_defs.xml.h" + +struct nvc0_transfer { + struct pipe_transfer base; + struct nv50_m2mf_rect rect[2]; + uint32_t nblocksx; + uint16_t nblocksy; + uint16_t nlayers; +}; + +static void +nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nouveau_bufctx *bctx = nvc0->bufctx; + const int cpp = dst->cpp; + uint32_t src_ofst = src->base; + uint32_t dst_ofst = dst->base; + uint32_t height = nblocksy; + uint32_t sy = src->y; + uint32_t dy = dst->y; + uint32_t exec = (1 << 20); + + assert(dst->cpp == src->cpp); + + nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD); + nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + if (nouveau_bo_memtype(src->bo)) { + BEGIN_NVC0(push, NVC0_M2MF(TILING_MODE_IN), 5); + PUSH_DATA (push, src->tile_mode); + PUSH_DATA (push, src->width * cpp); + PUSH_DATA (push, src->height); + PUSH_DATA (push, src->depth); + PUSH_DATA (push, src->z); + } else { + src_ofst += src->y * src->pitch + src->x * cpp; + + BEGIN_NVC0(push, NVC0_M2MF(PITCH_IN), 1); + PUSH_DATA (push, src->width * cpp); + + exec |= NVC0_M2MF_EXEC_LINEAR_IN; + } + + if (nouveau_bo_memtype(dst->bo)) { + BEGIN_NVC0(push, NVC0_M2MF(TILING_MODE_OUT), 5); + PUSH_DATA (push, dst->tile_mode); + PUSH_DATA (push, dst->width * cpp); + PUSH_DATA (push, dst->height); + PUSH_DATA (push, dst->depth); + PUSH_DATA (push, dst->z); + } else { + dst_ofst += dst->y * dst->pitch + dst->x * cpp; + + BEGIN_NVC0(push, NVC0_M2MF(PITCH_OUT), 1); + PUSH_DATA (push, dst->width * cpp); + + exec |= NVC0_M2MF_EXEC_LINEAR_OUT; + } + + while (height) { + int line_count = height > 2047 ? 2047 : height; + + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_IN_HIGH), 2); + PUSH_DATAh(push, src->bo->offset + src_ofst); + PUSH_DATA (push, src->bo->offset + src_ofst); + + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); + PUSH_DATAh(push, dst->bo->offset + dst_ofst); + PUSH_DATA (push, dst->bo->offset + dst_ofst); + + if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) { + BEGIN_NVC0(push, NVC0_M2MF(TILING_POSITION_IN_X), 2); + PUSH_DATA (push, src->x * cpp); + PUSH_DATA (push, sy); + } else { + src_ofst += line_count * src->pitch; + } + if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) { + BEGIN_NVC0(push, NVC0_M2MF(TILING_POSITION_OUT_X), 2); + PUSH_DATA (push, dst->x * cpp); + PUSH_DATA (push, dy); + } else { + dst_ofst += line_count * dst->pitch; + } + + BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, nblocksx * cpp); + PUSH_DATA (push, line_count); + BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); + PUSH_DATA (push, exec); + + height -= line_count; + sy += line_count; + dy += line_count; + } + + nouveau_bufctx_reset(bctx, 0); +} + +static void +nve4_m2mf_transfer_rect(struct nvc0_context *nvc0, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nouveau_bufctx *bctx = nvc0->bufctx; + uint32_t exec; + uint32_t src_base = src->base; + uint32_t dst_base = dst->base; + const int cpp = dst->cpp; + + assert(dst->cpp == src->cpp); + + nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR); + nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + exec = 0x200 /* 2D_ENABLE */ | 0x6 /* UNK */; + + if (!nouveau_bo_memtype(dst->bo)) { + assert(!dst->z); + dst_base += dst->y * dst->pitch + dst->x * cpp; + exec |= 0x100; /* DST_MODE_2D_LINEAR */ + } + if (!nouveau_bo_memtype(src->bo)) { + assert(!src->z); + src_base += src->y * src->pitch + src->x * cpp; + exec |= 0x080; /* SRC_MODE_2D_LINEAR */ + } + + BEGIN_NVC0(push, SUBC_COPY(0x070c), 6); + PUSH_DATA (push, 0x1000 | dst->tile_mode); + PUSH_DATA (push, dst->pitch); + PUSH_DATA (push, dst->height); + PUSH_DATA (push, dst->depth); + PUSH_DATA (push, dst->z); + PUSH_DATA (push, (dst->y << 16) | (dst->x * cpp)); + + BEGIN_NVC0(push, SUBC_COPY(0x0728), 6); + PUSH_DATA (push, 0x1000 | src->tile_mode); + PUSH_DATA (push, src->pitch); + PUSH_DATA (push, src->height); + PUSH_DATA (push, src->depth); + PUSH_DATA (push, src->z); + PUSH_DATA (push, (src->y << 16) | (src->x * cpp)); + + BEGIN_NVC0(push, SUBC_COPY(0x0400), 8); + PUSH_DATAh(push, src->bo->offset + src_base); + PUSH_DATA (push, src->bo->offset + src_base); + PUSH_DATAh(push, dst->bo->offset + dst_base); + PUSH_DATA (push, dst->bo->offset + dst_base); + PUSH_DATA (push, src->pitch); + PUSH_DATA (push, dst->pitch); + PUSH_DATA (push, nblocksx * cpp); + PUSH_DATA (push, nblocksy); + + BEGIN_NVC0(push, SUBC_COPY(0x0300), 1); + PUSH_DATA (push, exec); + + nouveau_bufctx_reset(bctx, 0); +} + +void +nvc0_m2mf_push_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data) +{ + struct nvc0_context *nvc0 = nvc0_context(&nv->pipe); + struct nouveau_pushbuf *push = nv->pushbuf; + uint32_t *src = (uint32_t *)data; + unsigned count = (size + 3) / 4; + + nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nvc0->bufctx); + nouveau_pushbuf_validate(push); + + while (count) { + unsigned nr; + + if (!PUSH_SPACE(push, 16)) + break; + nr = PUSH_AVAIL(push); + assert(nr >= 16); + nr = MIN2(count, nr - 9); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); + PUSH_DATAh(push, dst->offset + offset); + PUSH_DATA (push, dst->offset + offset); + BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, MIN2(size, nr * 4)); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); + PUSH_DATA (push, 0x100111); + + /* must not be interrupted (trap on QUERY fence, 0x50 works however) */ + BEGIN_NIC0(push, NVC0_M2MF(DATA), nr); + PUSH_DATAp(push, src, nr); + + count -= nr; + src += nr; + offset += nr * 4; + size -= nr * 4; + } + + nouveau_bufctx_reset(nvc0->bufctx, 0); +} + +void +nve4_p2mf_push_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data) +{ + struct nvc0_context *nvc0 = nvc0_context(&nv->pipe); + struct nouveau_pushbuf *push = nv->pushbuf; + uint32_t *src = (uint32_t *)data; + unsigned count = (size + 3) / 4; + + nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nvc0->bufctx); + nouveau_pushbuf_validate(push); + + while (count) { + unsigned nr; + + if (!PUSH_SPACE(push, 16)) + break; + nr = PUSH_AVAIL(push); + assert(nr >= 16); + nr = MIN2(count, nr - 8); + nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1)); + + BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, dst->offset + offset); + PUSH_DATA (push, dst->offset + offset); + BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, MIN2(size, nr * 4)); + PUSH_DATA (push, 1); + /* must not be interrupted (trap on QUERY fence, 0x50 works however) */ + BEGIN_1IC0(push, NVE4_P2MF(EXEC), nr + 1); + PUSH_DATA (push, 0x1001); + PUSH_DATAp(push, src, nr); + + count -= nr; + src += nr; + offset += nr * 4; + size -= nr * 4; + } + + nouveau_bufctx_reset(nvc0->bufctx, 0); +} + +static void +nvc0_m2mf_copy_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size) +{ + struct nouveau_pushbuf *push = nv->pushbuf; + struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx; + + nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD); + nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + while (size) { + unsigned bytes = MIN2(size, 1 << 17); + + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); + PUSH_DATAh(push, dst->offset + dstoff); + PUSH_DATA (push, dst->offset + dstoff); + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_IN_HIGH), 2); + PUSH_DATAh(push, src->offset + srcoff); + PUSH_DATA (push, src->offset + srcoff); + BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, bytes); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); + PUSH_DATA (push, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | + NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT); + + srcoff += bytes; + dstoff += bytes; + size -= bytes; + } + + nouveau_bufctx_reset(bctx, 0); +} + +static void +nve4_m2mf_copy_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size) +{ + struct nouveau_pushbuf *push = nv->pushbuf; + struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx; + + nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD); + nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + BEGIN_NVC0(push, SUBC_COPY(0x0400), 4); + PUSH_DATAh(push, src->offset + srcoff); + PUSH_DATA (push, src->offset + srcoff); + PUSH_DATAh(push, dst->offset + dstoff); + PUSH_DATA (push, dst->offset + dstoff); + BEGIN_NVC0(push, SUBC_COPY(0x0418), 1); + PUSH_DATA (push, size); + BEGIN_NVC0(push, SUBC_COPY(0x0300), 1); + PUSH_DATA (push, 0x186); + + nouveau_bufctx_reset(bctx, 0); +} + + +static INLINE boolean +nvc0_mt_transfer_can_map_directly(struct nv50_miptree *mt) +{ + if (mt->base.domain == NOUVEAU_BO_VRAM) + return FALSE; + if (mt->base.base.usage != PIPE_USAGE_STAGING) + return FALSE; + return !nouveau_bo_memtype(mt->base.bo); +} + +static INLINE boolean +nvc0_mt_sync(struct nvc0_context *nvc0, struct nv50_miptree *mt, unsigned usage) +{ + if (!mt->base.mm) { + uint32_t access = (usage & PIPE_TRANSFER_WRITE) ? + NOUVEAU_BO_WR : NOUVEAU_BO_RD; + return !nouveau_bo_wait(mt->base.bo, access, nvc0->base.client); + } + if (usage & PIPE_TRANSFER_WRITE) + return !mt->base.fence || nouveau_fence_wait(mt->base.fence); + return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr); +} + +void * +nvc0_miptree_transfer_map(struct pipe_context *pctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer) +{ + struct nvc0_context *nvc0 = nvc0_context(pctx); + struct nouveau_device *dev = nvc0->screen->base.device; + struct nv50_miptree *mt = nv50_miptree(res); + struct nvc0_transfer *tx; + uint32_t size; + int ret; + unsigned flags = 0; + + if (nvc0_mt_transfer_can_map_directly(mt)) { + ret = !nvc0_mt_sync(nvc0, mt, usage); + if (!ret) + ret = nouveau_bo_map(mt->base.bo, 0, NULL); + if (ret && + (usage & PIPE_TRANSFER_MAP_DIRECTLY)) + return NULL; + if (!ret) + usage |= PIPE_TRANSFER_MAP_DIRECTLY; + } else + if (usage & PIPE_TRANSFER_MAP_DIRECTLY) + return NULL; + + tx = CALLOC_STRUCT(nvc0_transfer); + if (!tx) + return NULL; + + pipe_resource_reference(&tx->base.resource, res); + + tx->base.level = level; + tx->base.usage = usage; + tx->base.box = *box; + + if (util_format_is_plain(res->format)) { + tx->nblocksx = box->width << mt->ms_x; + tx->nblocksy = box->height << mt->ms_y; + } else { + tx->nblocksx = util_format_get_nblocksx(res->format, box->width); + tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + } + tx->nlayers = box->depth; + + tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); + tx->base.layer_stride = tx->nblocksy * tx->base.stride; + + if (usage & PIPE_TRANSFER_MAP_DIRECTLY) { + tx->base.stride = align(tx->base.stride, 128); + *ptransfer = &tx->base; + return mt->base.bo->map + mt->base.offset; + } + + nv50_m2mf_rect_setup(&tx->rect[0], res, level, box->x, box->y, box->z); + + size = tx->base.layer_stride; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, + size * tx->nlayers, NULL, &tx->rect[1].bo); + if (ret) { + pipe_resource_reference(&tx->base.resource, NULL); + FREE(tx); + return NULL; + } + + tx->rect[1].cpp = tx->rect[0].cpp; + tx->rect[1].width = tx->nblocksx; + tx->rect[1].height = tx->nblocksy; + tx->rect[1].depth = 1; + tx->rect[1].pitch = tx->base.stride; + tx->rect[1].domain = NOUVEAU_BO_GART; + + if (usage & PIPE_TRANSFER_READ) { + unsigned base = tx->rect[0].base; + unsigned z = tx->rect[0].z; + unsigned i; + for (i = 0; i < tx->nlayers; ++i) { + nvc0->m2mf_copy_rect(nvc0, &tx->rect[1], &tx->rect[0], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += size; + } + tx->rect[0].z = z; + tx->rect[0].base = base; + tx->rect[1].base = 0; + } + + if (tx->rect[1].bo->map) { + *ptransfer = &tx->base; + return tx->rect[1].bo->map; + } + + if (usage & PIPE_TRANSFER_READ) + flags = NOUVEAU_BO_RD; + if (usage & PIPE_TRANSFER_WRITE) + flags |= NOUVEAU_BO_WR; + + ret = nouveau_bo_map(tx->rect[1].bo, flags, nvc0->screen->base.client); + if (ret) { + pipe_resource_reference(&tx->base.resource, NULL); + nouveau_bo_ref(NULL, &tx->rect[1].bo); + FREE(tx); + return NULL; + } + + *ptransfer = &tx->base; + return tx->rect[1].bo->map; +} + +void +nvc0_miptree_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct nvc0_context *nvc0 = nvc0_context(pctx); + struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; + struct nv50_miptree *mt = nv50_miptree(tx->base.resource); + unsigned i; + + if (tx->base.usage & PIPE_TRANSFER_MAP_DIRECTLY) { + pipe_resource_reference(&transfer->resource, NULL); + + FREE(tx); + return; + } + + if (tx->base.usage & PIPE_TRANSFER_WRITE) { + for (i = 0; i < tx->nlayers; ++i) { + nvc0->m2mf_copy_rect(nvc0, &tx->rect[0], &tx->rect[1], + tx->nblocksx, tx->nblocksy); + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += tx->nblocksy * tx->base.stride; + } + NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_wr, 1); + } + if (tx->base.usage & PIPE_TRANSFER_READ) + NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_rd, 1); + + nouveau_bo_ref(NULL, &tx->rect[1].bo); + pipe_resource_reference(&transfer->resource, NULL); + + FREE(tx); +} + +/* This happens rather often with DTD9/st. */ +void +nvc0_cb_push(struct nouveau_context *nv, + struct nouveau_bo *bo, unsigned domain, + unsigned base, unsigned size, + unsigned offset, unsigned words, const uint32_t *data) +{ + struct nouveau_pushbuf *push = nv->pushbuf; + + NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_count, 1); + NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_bytes, words * 4); + + assert(!(offset & 3)); + size = align(size, 0x100); + + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, size); + PUSH_DATAh(push, bo->offset + base); + PUSH_DATA (push, bo->offset + base); + + while (words) { + unsigned nr = PUSH_AVAIL(push); + nr = MIN2(nr, words); + nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1); + + PUSH_SPACE(push, nr + 2); + PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain); + BEGIN_1IC0(push, NVC0_3D(CB_POS), nr + 1); + PUSH_DATA (push, offset); + PUSH_DATAp(push, data, nr); + + words -= nr; + data += nr; + offset += nr * 4; + } +} + +void +nvc0_init_transfer_functions(struct nvc0_context *nvc0) +{ + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { + nvc0->m2mf_copy_rect = nve4_m2mf_transfer_rect; + nvc0->base.copy_data = nve4_m2mf_copy_linear; + nvc0->base.push_data = nve4_p2mf_push_linear; + } else { + nvc0->m2mf_copy_rect = nvc0_m2mf_transfer_rect; + nvc0->base.copy_data = nvc0_m2mf_copy_linear; + nvc0->base.push_data = nvc0_m2mf_push_linear; + } + nvc0->base.push_cb = nvc0_cb_push; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c new file mode 100644 index 00000000000..c4bc7dc693b --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -0,0 +1,891 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define NVC0_PUSH_EXPLICIT_SPACE_CHECKING + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_resource.h" + +#include "nvc0/nvc0_3d.xml.h" + +void +nvc0_vertex_state_delete(struct pipe_context *pipe, + void *hwcso) +{ + struct nvc0_vertex_stateobj *so = hwcso; + + if (so->translate) + so->translate->release(so->translate); + FREE(hwcso); +} + +void * +nvc0_vertex_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nvc0_vertex_stateobj *so; + struct translate_key transkey; + unsigned i; + unsigned src_offset_max = 0; + + so = MALLOC(sizeof(*so) + + num_elements * sizeof(struct nvc0_vertex_element)); + if (!so) + return NULL; + so->num_elements = num_elements; + so->instance_elts = 0; + so->instance_bufs = 0; + so->shared_slots = FALSE; + so->need_conversion = FALSE; + + memset(so->vb_access_size, 0, sizeof(so->vb_access_size)); + + for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) + so->min_instance_div[i] = 0xffffffff; + + transkey.nr_elements = 0; + transkey.output_stride = 0; + + for (i = 0; i < num_elements; ++i) { + const struct pipe_vertex_element *ve = &elements[i]; + const unsigned vbi = ve->vertex_buffer_index; + unsigned size; + enum pipe_format fmt = ve->src_format; + + so->element[i].pipe = elements[i]; + so->element[i].state = nvc0_format_table[fmt].vtx; + + if (!so->element[i].state) { + switch (util_format_get_nr_components(fmt)) { + case 1: fmt = PIPE_FORMAT_R32_FLOAT; break; + case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break; + case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break; + case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break; + default: + assert(0); + FREE(so); + return NULL; + } + so->element[i].state = nvc0_format_table[fmt].vtx; + so->need_conversion = TRUE; + } + size = util_format_get_blocksize(fmt); + + src_offset_max = MAX2(src_offset_max, ve->src_offset); + + if (so->vb_access_size[vbi] < (ve->src_offset + size)) + so->vb_access_size[vbi] = ve->src_offset + size; + + if (unlikely(ve->instance_divisor)) { + so->instance_elts |= 1 << i; + so->instance_bufs |= 1 << vbi; + if (ve->instance_divisor < so->min_instance_div[vbi]) + so->min_instance_div[vbi] = ve->instance_divisor; + } + + if (1) { + unsigned ca; + unsigned j = transkey.nr_elements++; + + ca = util_format_description(fmt)->channel[0].size / 8; + if (ca != 1 && ca != 2) + ca = 4; + + transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL; + transkey.element[j].input_format = ve->src_format; + transkey.element[j].input_buffer = vbi; + transkey.element[j].input_offset = ve->src_offset; + transkey.element[j].instance_divisor = ve->instance_divisor; + + transkey.output_stride = align(transkey.output_stride, ca); + transkey.element[j].output_format = fmt; + transkey.element[j].output_offset = transkey.output_stride; + transkey.output_stride += size; + + so->element[i].state_alt = so->element[i].state; + so->element[i].state_alt |= transkey.element[j].output_offset << 7; + } + + so->element[i].state |= i << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT; + } + transkey.output_stride = align(transkey.output_stride, 4); + + so->size = transkey.output_stride; + so->translate = translate_create(&transkey); + + if (so->instance_elts || src_offset_max >= (1 << 14)) + return so; + so->shared_slots = TRUE; + + for (i = 0; i < num_elements; ++i) { + const unsigned b = elements[i].vertex_buffer_index; + const unsigned s = elements[i].src_offset; + so->element[i].state &= ~NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK; + so->element[i].state |= b << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT; + so->element[i].state |= s << NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT; + } + return so; +} + +#define NVC0_3D_VERTEX_ATTRIB_INACTIVE \ + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | \ + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST + +#define VTX_ATTR(a, c, t, s) \ + ((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \ + (NVC0_3D_VTX_ATTR_DEFINE_SIZE_##s) | \ + ((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \ + ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT)) + +static void +nvc0_set_constant_vertex_attrib(struct nvc0_context *nvc0, const unsigned a) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_vertex_element *ve = &nvc0->vertex->element[a].pipe; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; + uint32_t mode; + const struct util_format_description *desc; + void *dst; + const void *src = (const uint8_t *)vb->user_buffer + ve->src_offset; + assert(!vb->buffer); + + desc = util_format_description(ve->src_format); + + PUSH_SPACE(push, 6); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 5); + dst = &push->cur[1]; + if (desc->channel[0].pure_integer) { + if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + mode = VTX_ATTR(a, 4, SINT, 32); + desc->unpack_rgba_sint(dst, 0, src, 0, 1, 1); + } else { + mode = VTX_ATTR(a, 4, UINT, 32); + desc->unpack_rgba_uint(dst, 0, src, 0, 1, 1); + } + } else { + mode = VTX_ATTR(a, 4, FLOAT, 32); + desc->unpack_rgba_float(dst, 0, src, 0, 1, 1); + } + push->cur[0] = mode; + push->cur += 5; +} + +static INLINE void +nvc0_user_vbuf_range(struct nvc0_context *nvc0, int vbi, + uint32_t *base, uint32_t *size) +{ + if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) { + const uint32_t div = nvc0->vertex->min_instance_div[vbi]; + *base = nvc0->instance_off * nvc0->vtxbuf[vbi].stride; + *size = (nvc0->instance_max / div) * nvc0->vtxbuf[vbi].stride + + nvc0->vertex->vb_access_size[vbi]; + } else { + /* NOTE: if there are user buffers, we *must* have index bounds */ + assert(nvc0->vb_elt_limit != ~0); + *base = nvc0->vb_elt_first * nvc0->vtxbuf[vbi].stride; + *size = nvc0->vb_elt_limit * nvc0->vtxbuf[vbi].stride + + nvc0->vertex->vb_access_size[vbi]; + } +} + +static INLINE void +nvc0_release_user_vbufs(struct nvc0_context *nvc0) +{ + if (nvc0->vbo_user) { + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); + nouveau_scratch_done(&nvc0->base); + } +} + +static void +nvc0_update_user_vbufs(struct nvc0_context *nvc0) +{ + uint64_t address[PIPE_MAX_ATTRIBS]; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + int i; + uint32_t written = 0; + + PUSH_SPACE(push, nvc0->vertex->num_elements * 8); + for (i = 0; i < nvc0->vertex->num_elements; ++i) { + struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe; + const unsigned b = ve->vertex_buffer_index; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b]; + uint32_t base, size; + + if (!(nvc0->vbo_user & (1 << b))) + continue; + if (!vb->stride) { + nvc0_set_constant_vertex_attrib(nvc0, i); + continue; + } + nvc0_user_vbuf_range(nvc0, b, &base, &size); + + if (!(written & (1 << b))) { + struct nouveau_bo *bo; + const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART; + written |= 1 << b; + address[b] = nouveau_scratch_data(&nvc0->base, vb->user_buffer, + base, size, &bo); + if (bo) + BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo); + + NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size); + } + + BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5); + PUSH_DATA (push, i); + PUSH_DATAh(push, address[b] + base + size - 1); + PUSH_DATA (push, address[b] + base + size - 1); + PUSH_DATAh(push, address[b] + ve->src_offset); + PUSH_DATA (push, address[b] + ve->src_offset); + } + nvc0->base.vbo_dirty = TRUE; +} + +static void +nvc0_update_user_vbufs_shared(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + uint32_t mask = nvc0->vbo_user & ~nvc0->constant_vbos; + + PUSH_SPACE(push, nvc0->num_vtxbufs * 8); + while (mask) { + struct nouveau_bo *bo; + const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART; + uint64_t address; + uint32_t base, size; + const int b = ffs(mask) - 1; + mask &= ~(1 << b); + + nvc0_user_vbuf_range(nvc0, b, &base, &size); + + address = nouveau_scratch_data(&nvc0->base, nvc0->vtxbuf[b].user_buffer, + base, size, &bo); + if (bo) + BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo); + + BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5); + PUSH_DATA (push, b); + PUSH_DATAh(push, address + base + size - 1); + PUSH_DATA (push, address + base + size - 1); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + + NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size); + } + + mask = nvc0->state.constant_elts; + while (mask) { + int i = ffs(mask) - 1; + mask &= ~(1 << i); + nvc0_set_constant_vertex_attrib(nvc0, i); + } +} + +static void +nvc0_validate_vertex_buffers(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const struct nvc0_vertex_stateobj *vertex = nvc0->vertex; + uint32_t refd = 0; + unsigned i; + + PUSH_SPACE(push, vertex->num_elements * 8); + for (i = 0; i < vertex->num_elements; ++i) { + const struct nvc0_vertex_element *ve; + const struct pipe_vertex_buffer *vb; + struct nv04_resource *res; + unsigned b; + unsigned limit, offset; + + if (nvc0->state.constant_elts & (1 << i)) + continue; + ve = &vertex->element[i]; + b = ve->pipe.vertex_buffer_index; + vb = &nvc0->vtxbuf[b]; + + if (!vb->buffer) { + if (vb->stride) { + if (ve->pipe.instance_divisor) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1); + PUSH_DATA (push, ve->pipe.instance_divisor); + } + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1); + PUSH_DATA (push, (1 << 12) | vb->stride); + } + /* address/value set in nvc0_update_user_vbufs */ + continue; + } + res = nv04_resource(vb->buffer); + offset = ve->pipe.src_offset + vb->buffer_offset; + limit = vb->buffer->width0 - 1; + + if (unlikely(ve->pipe.instance_divisor)) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4); + PUSH_DATA (push, (1 << 12) | vb->stride); + PUSH_DATAh(push, res->address + offset); + PUSH_DATA (push, res->address + offset); + PUSH_DATA (push, ve->pipe.instance_divisor); + } else { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3); + PUSH_DATA (push, (1 << 12) | vb->stride); + PUSH_DATAh(push, res->address + offset); + PUSH_DATA (push, res->address + offset); + } + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); + PUSH_DATAh(push, res->address + limit); + PUSH_DATA (push, res->address + limit); + + if (!(refd & (1 << b))) { + refd |= 1 << b; + BCTX_REFN(nvc0->bufctx_3d, VTX, res, RD); + } + } + if (nvc0->vbo_user) + nvc0_update_user_vbufs(nvc0); +} + +static void +nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned b; + const uint32_t mask = nvc0->vbo_user; + + PUSH_SPACE(push, nvc0->num_vtxbufs * 8); + for (b = 0; b < nvc0->num_vtxbufs; ++b) { + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b]; + struct nv04_resource *buf; + uint32_t offset, limit; + + if (mask & (1 << b)) { + if (vb->stride) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 1); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + } + /* address/value set in nvc0_update_user_vbufs_shared */ + continue; + } + buf = nv04_resource(vb->buffer); + offset = vb->buffer_offset; + limit = buf->base.width0 - 1; + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 3); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2); + PUSH_DATAh(push, buf->address + limit); + PUSH_DATA (push, buf->address + limit); + + BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD); + } + if (nvc0->vbo_user) + nvc0_update_user_vbufs_shared(nvc0); +} + +void +nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_vertex_stateobj *vertex = nvc0->vertex; + struct nvc0_vertex_element *ve; + uint32_t const_vbos; + unsigned i; + uint8_t vbo_mode; + boolean update_vertex; + + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); + + if (unlikely(vertex->need_conversion) || + unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) { + vbo_mode = 3; + } else { + vbo_mode = (nvc0->vbo_user && nvc0->vbo_push_hint) ? 1 : 0; + } + const_vbos = vbo_mode ? 0 : nvc0->constant_vbos; + + update_vertex = (nvc0->dirty & NVC0_NEW_VERTEX) || + (const_vbos != nvc0->state.constant_vbos) || + (vbo_mode != nvc0->state.vbo_mode); + + if (update_vertex) { + const unsigned n = MAX2(vertex->num_elements, nvc0->state.num_vtxelts); + + nvc0->state.constant_vbos = const_vbos; + nvc0->state.constant_elts = 0; + nvc0->state.num_vtxelts = vertex->num_elements; + nvc0->state.vbo_mode = vbo_mode; + + if (unlikely(vbo_mode)) { + if (unlikely(nvc0->state.instance_elts & 3)) { + /* translate mode uses only 2 vertex buffers */ + nvc0->state.instance_elts &= ~3; + PUSH_SPACE(push, 3); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(0)), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + } + + PUSH_SPACE(push, n * 2 + 4); + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n); + for (i = 0; i < vertex->num_elements; ++i) + PUSH_DATA(push, vertex->element[i].state_alt); + for (; i < n; ++i) + PUSH_DATA(push, NVC0_3D_VERTEX_ATTRIB_INACTIVE); + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(0)), 1); + PUSH_DATA (push, (1 << 12) | vertex->size); + for (i = 1; i < n; ++i) + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0); + } else { + uint32_t *restrict data; + + if (unlikely(vertex->instance_elts != nvc0->state.instance_elts)) { + nvc0->state.instance_elts = vertex->instance_elts; + assert(n); /* if (n == 0), both masks should be 0 */ + PUSH_SPACE(push, 3); + BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2); + PUSH_DATA (push, n); + PUSH_DATA (push, vertex->instance_elts); + } + + PUSH_SPACE(push, n * 2 + 1); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n); + data = push->cur; + push->cur += n; + for (i = 0; i < vertex->num_elements; ++i) { + ve = &vertex->element[i]; + data[i] = ve->state; + if (unlikely(const_vbos & (1 << ve->pipe.vertex_buffer_index))) { + nvc0->state.constant_elts |= 1 << i; + data[i] |= NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST; + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0); + } + } + for (; i < n; ++i) { + data[i] = NVC0_3D_VERTEX_ATTRIB_INACTIVE; + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0); + } + } + } + if (nvc0->state.vbo_mode) /* using translate, don't set up arrays here */ + return; + + if (vertex->shared_slots) + nvc0_validate_vertex_buffers_shared(nvc0); + else + nvc0_validate_vertex_buffers(nvc0); +} + +void +nvc0_idxbuf_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv04_resource *buf = nv04_resource(nvc0->idxbuf.buffer); + + assert(buf); + assert(nouveau_resource_mapped_by_gpu(&buf->base)); + + PUSH_SPACE(push, 6); + BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5); + PUSH_DATAh(push, buf->address + nvc0->idxbuf.offset); + PUSH_DATA (push, buf->address + nvc0->idxbuf.offset); + PUSH_DATAh(push, buf->address + buf->base.width0 - 1); + PUSH_DATA (push, buf->address + buf->base.width0 - 1); + PUSH_DATA (push, nvc0->idxbuf.index_size >> 1); + + BCTX_REFN(nvc0->bufctx_3d, IDX, buf, RD); +} + +#define NVC0_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ + switch (prim) { + NVC0_PRIM_GL_CASE(POINTS); + NVC0_PRIM_GL_CASE(LINES); + NVC0_PRIM_GL_CASE(LINE_LOOP); + NVC0_PRIM_GL_CASE(LINE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLES); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLE_FAN); + NVC0_PRIM_GL_CASE(QUADS); + NVC0_PRIM_GL_CASE(QUAD_STRIP); + NVC0_PRIM_GL_CASE(POLYGON); + NVC0_PRIM_GL_CASE(LINES_ADJACENCY); + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NVC0_PRIM_GL_CASE(PATCHES); */ + default: + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + } +} + +static void +nvc0_draw_vbo_kick_notify(struct nouveau_pushbuf *push) +{ + struct nvc0_screen *screen = push->user_priv; + + nouveau_fence_update(&screen->base, TRUE); + + NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1); +} + +static void +nvc0_draw_arrays(struct nvc0_context *nvc0, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned prim; + + if (nvc0->state.index_bias) { + PUSH_SPACE(push, 1); + IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 0); + nvc0->state.index_bias = 0; + } + + prim = nvc0_prim_gl(mode); + + while (instance_count--) { + PUSH_SPACE(push, 6); + BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, prim); + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, start); + PUSH_DATA (push, count); + IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0); + + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_array, 1); +} + +static void +nvc0_draw_elements_inline_u08(struct nouveau_pushbuf *push, const uint8_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 3) { + unsigned i; + PUSH_SPACE(push, 4); + BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U32), count & 3); + for (i = 0; i < (count & 3); ++i) + PUSH_DATA(push, *map++); + count &= ~3; + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4; + + PUSH_SPACE(push, nr + 1); + BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U8), nr); + for (i = 0; i < nr; ++i) { + PUSH_DATA(push, + (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]); + map += 4; + } + count -= nr * 4; + } +} + +static void +nvc0_draw_elements_inline_u16(struct nouveau_pushbuf *push, const uint16_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + count &= ~1; + PUSH_SPACE(push, 2); + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + PUSH_SPACE(push, nr + 1); + BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + PUSH_DATA(push, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } +} + +static void +nvc0_draw_elements_inline_u32(struct nouveau_pushbuf *push, const uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + while (count) { + const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); + + PUSH_SPACE(push, nr + 1); + BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U32), nr); + PUSH_DATAp(push, map, nr); + + map += nr; + count -= nr; + } +} + +static void +nvc0_draw_elements_inline_u32_short(struct nouveau_pushbuf *push, + const uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + count--; + PUSH_SPACE(push, 1); + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + PUSH_SPACE(push, nr + 1); + BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + PUSH_DATA(push, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } +} + +static void +nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, + unsigned mode, unsigned start, unsigned count, + unsigned instance_count, int32_t index_bias) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned prim; + const unsigned index_size = nvc0->idxbuf.index_size; + + prim = nvc0_prim_gl(mode); + + if (index_bias != nvc0->state.index_bias) { + PUSH_SPACE(push, 2); + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 1); + PUSH_DATA (push, index_bias); + nvc0->state.index_bias = index_bias; + } + + if (nvc0->idxbuf.buffer) { + PUSH_SPACE(push, 1); + IMMED_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), prim); + do { + PUSH_SPACE(push, 7); + BEGIN_NVC0(push, NVC0_3D(INDEX_BATCH_FIRST), 2); + PUSH_DATA (push, start); + PUSH_DATA (push, count); + if (--instance_count) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, prim | NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT); + } + } while (instance_count); + IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0); + } else { + const void *data = nvc0->idxbuf.user_buffer; + + while (instance_count--) { + PUSH_SPACE(push, 2); + BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, prim); + switch (index_size) { + case 1: + nvc0_draw_elements_inline_u08(push, data, start, count); + break; + case 2: + nvc0_draw_elements_inline_u16(push, data, start, count); + break; + case 4: + if (shorten) + nvc0_draw_elements_inline_u32_short(push, data, start, count); + else + nvc0_draw_elements_inline_u32(push, data, start, count); + break; + default: + assert(0); + return; + } + PUSH_SPACE(push, 1); + IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0); + + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } + } + NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_indexed, 1); +} + +static void +nvc0_draw_stream_output(struct nvc0_context *nvc0, + const struct pipe_draw_info *info) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_so_target *so = nvc0_so_target(info->count_from_stream_output); + struct nv04_resource *res = nv04_resource(so->pipe.buffer); + unsigned mode = nvc0_prim_gl(info->mode); + unsigned num_instances = info->instance_count; + + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + PUSH_SPACE(push, 2); + IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); + nvc0_query_fifo_wait(push, so->pq); + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); + + NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, 1); + } + + while (num_instances--) { + PUSH_SPACE(push, 8); + BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, mode); + BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BASE), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_STRIDE), 1); + PUSH_DATA (push, so->stride); + BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BYTES), 1); + nvc0_query_pushbuf_submit(push, so->pq, 0x4); + IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0); + + mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } +} + +void +nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ + nvc0->vb_elt_first = info->min_index + info->index_bias; + nvc0->vb_elt_limit = info->max_index - info->min_index; + nvc0->instance_off = info->start_instance; + nvc0->instance_max = info->instance_count - 1; + + /* For picking only a few vertices from a large user buffer, push is better, + * if index count is larger and we expect repeated vertices, suggest upload. + */ + nvc0->vbo_push_hint = + info->indexed && (nvc0->vb_elt_limit >= (info->count * 2)); + + /* Check whether we want to switch vertex-submission mode. */ + if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_ARRAYS | NVC0_NEW_VERTEX))) { + if (nvc0->vbo_push_hint != !!nvc0->state.vbo_mode) + if (nvc0->state.vbo_mode != 3) + nvc0->dirty |= NVC0_NEW_ARRAYS; + + if (!(nvc0->dirty & NVC0_NEW_ARRAYS) && nvc0->state.vbo_mode == 0) { + if (nvc0->vertex->shared_slots) + nvc0_update_user_vbufs_shared(nvc0); + else + nvc0_update_user_vbufs(nvc0); + } + } + + /* 8 as minimum to avoid immediate double validation of new buffers */ + nvc0_state_validate(nvc0, ~0, 8); + + push->kick_notify = nvc0_draw_vbo_kick_notify; + + if (nvc0->state.vbo_mode) { + nvc0_push_vbo(nvc0, info); + push->kick_notify = nvc0_default_kick_notify; + return; + } + + /* space for base instance, flush, and prim restart */ + PUSH_SPACE(push, 8); + + if (nvc0->state.instance_base != info->start_instance) { + nvc0->state.instance_base = info->start_instance; + /* NOTE: this does not affect the shader input, should it ? */ + BEGIN_NVC0(push, NVC0_3D(VB_INSTANCE_BASE), 1); + PUSH_DATA (push, info->start_instance); + } + + if (nvc0->base.vbo_dirty) { + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); + nvc0->base.vbo_dirty = FALSE; + } + + if (info->indexed) { + boolean shorten = info->max_index <= 65535; + + if (info->primitive_restart != nvc0->state.prim_restart) { + if (info->primitive_restart) { + BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 2); + PUSH_DATA (push, 1); + PUSH_DATA (push, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } else { + IMMED_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 0); + } + nvc0->state.prim_restart = info->primitive_restart; + } else + if (info->primitive_restart) { + BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_INDEX), 1); + PUSH_DATA (push, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; + } + + nvc0_draw_elements(nvc0, shorten, + info->mode, info->start, info->count, + info->instance_count, info->index_bias); + } else + if (unlikely(info->count_from_stream_output)) { + nvc0_draw_stream_output(nvc0, info); + } else { + nvc0_draw_arrays(nvc0, + info->mode, info->start, info->count, + info->instance_count); + } + push->kick_notify = nvc0_default_kick_notify; + + nvc0_release_user_vbufs(nvc0); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c new file mode 100644 index 00000000000..51e751cfa57 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c @@ -0,0 +1,649 @@ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "translate/translate.h" + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_resource.h" + +#include "nvc0/nvc0_3d.xml.h" + +struct push_context { + struct nouveau_pushbuf *push; + + struct translate *translate; + void *dest; + const void *idxbuf; + + uint32_t vertex_size; + uint32_t restart_index; + uint32_t instance_id; + + boolean prim_restart; + boolean need_vertex_id; + + struct { + boolean enabled; + boolean value; + unsigned stride; + const uint8_t *data; + } edgeflag; +}; + +static void nvc0_push_upload_vertex_ids(struct push_context *, + struct nvc0_context *, + const struct pipe_draw_info *); + +static void +nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx) +{ + ctx->push = nvc0->base.pushbuf; + + ctx->translate = nvc0->vertex->translate; + ctx->vertex_size = nvc0->vertex->size; + + ctx->need_vertex_id = + nvc0->vertprog->vp.need_vertex_id && (nvc0->vertex->num_elements < 32); + + ctx->edgeflag.value = TRUE; + ctx->edgeflag.enabled = nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS; + + /* silence warnings */ + ctx->edgeflag.data = NULL; + ctx->edgeflag.stride = 0; +} + +static INLINE void +nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias) +{ + struct translate *translate = nvc0->vertex->translate; + unsigned i; + + for (i = 0; i < nvc0->num_vtxbufs; ++i) { + const uint8_t *map; + const struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; + + if (likely(!vb->buffer)) + map = (const uint8_t *)vb->user_buffer; + else + map = nouveau_resource_map_offset(&nvc0->base, + nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD); + + if (index_bias && !unlikely(nvc0->vertex->instance_bufs & (1 << i))) + map += (intptr_t)index_bias * vb->stride; + + translate->set_buffer(translate, i, map, vb->stride, ~0); + } +} + +static INLINE void +nvc0_push_map_idxbuf(struct push_context *ctx, struct nvc0_context *nvc0) +{ + if (nvc0->idxbuf.buffer) { + struct nv04_resource *buf = nv04_resource(nvc0->idxbuf.buffer); + ctx->idxbuf = nouveau_resource_map_offset(&nvc0->base, + buf, nvc0->idxbuf.offset, NOUVEAU_BO_RD); + } else { + ctx->idxbuf = nvc0->idxbuf.user_buffer; + } +} + +static INLINE void +nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0, + int32_t index_bias) +{ + unsigned attr = nvc0->vertprog->vp.edgeflag; + struct pipe_vertex_element *ve = &nvc0->vertex->element[attr].pipe; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; + struct nv04_resource *buf = nv04_resource(vb->buffer); + unsigned offset = vb->buffer_offset + ve->src_offset; + + ctx->edgeflag.stride = vb->stride; + ctx->edgeflag.data = nouveau_resource_map_offset(&nvc0->base, + buf, offset, NOUVEAU_BO_RD); + if (index_bias) + ctx->edgeflag.data += (intptr_t)index_bias * vb->stride; +} + +static INLINE unsigned +prim_restart_search_i08(const uint8_t *elts, unsigned push, uint8_t index) +{ + unsigned i; + for (i = 0; i < push && elts[i] != index; ++i); + return i; +} + +static INLINE unsigned +prim_restart_search_i16(const uint16_t *elts, unsigned push, uint16_t index) +{ + unsigned i; + for (i = 0; i < push && elts[i] != index; ++i); + return i; +} + +static INLINE unsigned +prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index) +{ + unsigned i; + for (i = 0; i < push && elts[i] != index; ++i); + return i; +} + +static INLINE boolean +ef_value(const struct push_context *ctx, uint32_t index) +{ + float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride]; + return *pf ? TRUE : FALSE; +} + +static INLINE boolean +ef_toggle(struct push_context *ctx) +{ + ctx->edgeflag.value = !ctx->edgeflag.value; + return ctx->edgeflag.value; +} + +static INLINE unsigned +ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n) +{ + unsigned i; + for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); + return i; +} + +static INLINE unsigned +ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n) +{ + unsigned i; + for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); + return i; +} + +static INLINE unsigned +ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n) +{ + unsigned i; + for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); + return i; +} + +static INLINE unsigned +ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n) +{ + unsigned i; + for (i = 0; i < n && ef_value(ctx, start++) == ctx->edgeflag.value; ++i); + return i; +} + +static INLINE void * +nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nouveau_bo *bo; + uint64_t va; + const unsigned size = count * nvc0->vertex->size; + + void *const dest = nouveau_scratch_get(&nvc0->base, size, &va, &bo); + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2); + PUSH_DATAh(push, va); + PUSH_DATA (push, va); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); + PUSH_DATAh(push, va + size - 1); + PUSH_DATA (push, va + size - 1); + + BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, + bo); + nouveau_pushbuf_validate(push); + + return dest; +} + +static void +disp_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) +{ + struct nouveau_pushbuf *push = ctx->push; + struct translate *translate = ctx->translate; + const uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start; + unsigned pos = 0; + + do { + unsigned nR = count; + + if (unlikely(ctx->prim_restart)) + nR = prim_restart_search_i08(elts, nR, ctx->restart_index); + + translate->run_elts8(translate, elts, nR, 0, ctx->instance_id, ctx->dest); + count -= nR; + ctx->dest += nR * ctx->vertex_size; + + while (nR) { + unsigned nE = nR; + + if (unlikely(ctx->edgeflag.enabled)) + nE = ef_toggle_search_i08(ctx, elts, nR); + + PUSH_SPACE(push, 4); + if (likely(nE >= 2)) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, pos); + PUSH_DATA (push, nE); + } else + if (nE) { + if (pos <= 0xff) { + IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos); + } else { + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, pos); + } + } + if (unlikely(nE != nR)) + IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); + + pos += nE; + elts += nE; + nR -= nE; + } + if (count) { + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, ctx->restart_index); + ++elts; + ctx->dest += ctx->vertex_size; + ++pos; + --count; + } + } while (count); +} + +static void +disp_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) +{ + struct nouveau_pushbuf *push = ctx->push; + struct translate *translate = ctx->translate; + const uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start; + unsigned pos = 0; + + do { + unsigned nR = count; + + if (unlikely(ctx->prim_restart)) + nR = prim_restart_search_i16(elts, nR, ctx->restart_index); + + translate->run_elts16(translate, elts, nR, 0, ctx->instance_id, ctx->dest); + count -= nR; + ctx->dest += nR * ctx->vertex_size; + + while (nR) { + unsigned nE = nR; + + if (unlikely(ctx->edgeflag.enabled)) + nE = ef_toggle_search_i16(ctx, elts, nR); + + PUSH_SPACE(push, 4); + if (likely(nE >= 2)) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, pos); + PUSH_DATA (push, nE); + } else + if (nE) { + if (pos <= 0xff) { + IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos); + } else { + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, pos); + } + } + if (unlikely(nE != nR)) + IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); + + pos += nE; + elts += nE; + nR -= nE; + } + if (count) { + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, ctx->restart_index); + ++elts; + ctx->dest += ctx->vertex_size; + ++pos; + --count; + } + } while (count); +} + +static void +disp_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) +{ + struct nouveau_pushbuf *push = ctx->push; + struct translate *translate = ctx->translate; + const uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start; + unsigned pos = 0; + + do { + unsigned nR = count; + + if (unlikely(ctx->prim_restart)) + nR = prim_restart_search_i32(elts, nR, ctx->restart_index); + + translate->run_elts(translate, elts, nR, 0, ctx->instance_id, ctx->dest); + count -= nR; + ctx->dest += nR * ctx->vertex_size; + + while (nR) { + unsigned nE = nR; + + if (unlikely(ctx->edgeflag.enabled)) + nE = ef_toggle_search_i32(ctx, elts, nR); + + PUSH_SPACE(push, 4); + if (likely(nE >= 2)) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, pos); + PUSH_DATA (push, nE); + } else + if (nE) { + if (pos <= 0xff) { + IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos); + } else { + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, pos); + } + } + if (unlikely(nE != nR)) + IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); + + pos += nE; + elts += nE; + nR -= nE; + } + if (count) { + BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); + PUSH_DATA (push, ctx->restart_index); + ++elts; + ctx->dest += ctx->vertex_size; + ++pos; + --count; + } + } while (count); +} + +static void +disp_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) +{ + struct nouveau_pushbuf *push = ctx->push; + struct translate *translate = ctx->translate; + unsigned pos = 0; + + translate->run(translate, start, count, 0, ctx->instance_id, ctx->dest); + do { + unsigned nr = count; + + if (unlikely(ctx->edgeflag.enabled)) + nr = ef_toggle_search_seq(ctx, start + pos, nr); + + PUSH_SPACE(push, 4); + if (likely(nr)) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, pos); + PUSH_DATA (push, nr); + } + if (unlikely(nr != count)) + IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); + + pos += nr; + count -= nr; + } while (count); +} + + +#define NVC0_PRIM_GL_CASE(n) \ + case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n + +static INLINE unsigned +nvc0_prim_gl(unsigned prim) +{ + switch (prim) { + NVC0_PRIM_GL_CASE(POINTS); + NVC0_PRIM_GL_CASE(LINES); + NVC0_PRIM_GL_CASE(LINE_LOOP); + NVC0_PRIM_GL_CASE(LINE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLES); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); + NVC0_PRIM_GL_CASE(TRIANGLE_FAN); + NVC0_PRIM_GL_CASE(QUADS); + NVC0_PRIM_GL_CASE(QUAD_STRIP); + NVC0_PRIM_GL_CASE(POLYGON); + NVC0_PRIM_GL_CASE(LINES_ADJACENCY); + NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); + NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); + /* + NVC0_PRIM_GL_CASE(PATCHES); */ + default: + return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; + } +} + +void +nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) +{ + struct push_context ctx; + unsigned i, index_size; + unsigned inst_count = info->instance_count; + unsigned vert_count = info->count; + unsigned prim; + + nvc0_push_context_init(nvc0, &ctx); + + nvc0_vertex_configure_translate(nvc0, info->index_bias); + + if (unlikely(ctx.edgeflag.enabled)) + nvc0_push_map_edgeflag(&ctx, nvc0, info->index_bias); + + ctx.prim_restart = info->primitive_restart; + ctx.restart_index = info->restart_index; + + if (info->indexed) { + nvc0_push_map_idxbuf(&ctx, nvc0); + index_size = nvc0->idxbuf.index_size; + + if (info->primitive_restart) { + BEGIN_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 2); + PUSH_DATA (ctx.push, 1); + PUSH_DATA (ctx.push, info->restart_index); + } else + if (nvc0->state.prim_restart) { + IMMED_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 0); + } + nvc0->state.prim_restart = info->primitive_restart; + } else { + if (unlikely(info->count_from_stream_output)) { + struct pipe_context *pipe = &nvc0->base.pipe; + struct nvc0_so_target *targ; + targ = nvc0_so_target(info->count_from_stream_output); + pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count); + vert_count /= targ->stride; + } + ctx.idxbuf = NULL; /* shut up warnings */ + index_size = 0; + } + + ctx.instance_id = info->start_instance; + + prim = nvc0_prim_gl(info->mode); + do { + PUSH_SPACE(ctx.push, 9); + + ctx.dest = nvc0_push_setup_vertex_array(nvc0, vert_count); + if (unlikely(!ctx.dest)) + break; + + if (unlikely(ctx.need_vertex_id)) + nvc0_push_upload_vertex_ids(&ctx, nvc0, info); + + IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); + BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (ctx.push, prim); + switch (index_size) { + case 1: + disp_vertices_i08(&ctx, info->start, vert_count); + break; + case 2: + disp_vertices_i16(&ctx, info->start, vert_count); + break; + case 4: + disp_vertices_i32(&ctx, info->start, vert_count); + break; + default: + assert(index_size == 0); + disp_vertices_seq(&ctx, info->start, vert_count); + break; + } + PUSH_SPACE(ctx.push, 1); + IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0); + + if (--inst_count) { + prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + ++ctx.instance_id; + } + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); + nouveau_scratch_done(&nvc0->base); + } while (inst_count); + + + /* reset state and unmap buffers (no-op) */ + + if (unlikely(!ctx.edgeflag.value)) { + PUSH_SPACE(ctx.push, 1); + IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1); + } + + if (unlikely(ctx.need_vertex_id)) { + PUSH_SPACE(ctx.push, 4); + IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0); + BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(1)), 1); + PUSH_DATA (ctx.push, + NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST | + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | + NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); + IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 0); + } + + if (info->indexed) + nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer)); + for (i = 0; i < nvc0->num_vtxbufs; ++i) + nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer)); + + NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1); +} + +static INLINE void +copy_indices_u8(uint32_t *dst, const uint8_t *elts, uint32_t bias, unsigned n) +{ + unsigned i; + for (i = 0; i < n; ++i) + dst[i] = elts[i] + bias; +} + +static INLINE void +copy_indices_u16(uint32_t *dst, const uint16_t *elts, uint32_t bias, unsigned n) +{ + unsigned i; + for (i = 0; i < n; ++i) + dst[i] = elts[i] + bias; +} + +static INLINE void +copy_indices_u32(uint32_t *dst, const uint32_t *elts, uint32_t bias, unsigned n) +{ + unsigned i; + for (i = 0; i < n; ++i) + dst[i] = elts[i] + bias; +} + +static void +nvc0_push_upload_vertex_ids(struct push_context *ctx, + struct nvc0_context *nvc0, + const struct pipe_draw_info *info) + +{ + struct nouveau_pushbuf *push = ctx->push; + struct nouveau_bo *bo; + uint64_t va; + uint32_t *data; + uint32_t format; + unsigned index_size = nvc0->idxbuf.index_size; + unsigned i; + unsigned a = nvc0->vertex->num_elements; + + if (!index_size || info->index_bias) + index_size = 4; + data = (uint32_t *)nouveau_scratch_get(&nvc0->base, + info->count * index_size, &va, &bo); + + BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, + bo); + nouveau_pushbuf_validate(push); + + if (info->indexed) { + if (!info->index_bias) { + memcpy(data, ctx->idxbuf, info->count * index_size); + } else { + switch (nvc0->idxbuf.index_size) { + case 1: + copy_indices_u8(data, ctx->idxbuf, info->index_bias, info->count); + break; + case 2: + copy_indices_u16(data, ctx->idxbuf, info->index_bias, info->count); + break; + default: + copy_indices_u32(data, ctx->idxbuf, info->index_bias, info->count); + break; + } + } + } else { + for (i = 0; i < info->count; ++i) + data[i] = i + (info->start + info->index_bias); + } + + format = (1 << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) | + NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT; + + switch (index_size) { + case 1: + format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8; + break; + case 2: + format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16; + break; + default: + format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32; + break; + } + + PUSH_SPACE(push, 12); + + if (unlikely(nvc0->state.instance_elts & 2)) { + nvc0->state.instance_elts &= ~2; + IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(1)), 0); + } + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1); + PUSH_DATA (push, format); + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 3); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size); + PUSH_DATAh(push, va); + PUSH_DATA (push, va); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2); + PUSH_DATAh(push, va + info->count * index_size - 1); + PUSH_DATA (push, va + info->count * index_size - 1); + +#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) \ + (((0x80 + (a) * 0x10) / 4) << NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT) + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ID_REPLACE), 1); + PUSH_DATA (push, NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) | 1); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c new file mode 100644 index 00000000000..5871f590e0e --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c @@ -0,0 +1,331 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_video.h" + +#include "util/u_sampler.h" +#include "util/u_format.h" + +static void +nvc0_decoder_decode_bitstream(struct pipe_video_codec *decoder, + struct pipe_video_buffer *video_target, + struct pipe_picture_desc *picture, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes) +{ + struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder; + struct nouveau_vp3_video_buffer *target = (struct nouveau_vp3_video_buffer *)video_target; + uint32_t comm_seq = ++dec->fence_seq; + union pipe_desc desc; + + unsigned vp_caps, is_ref, ret; + struct nouveau_vp3_video_buffer *refs[16] = {}; + + desc.base = picture; + + assert(target->base.buffer_format == PIPE_FORMAT_NV12); + + ret = nvc0_decoder_bsp(dec, desc, target, comm_seq, + num_buffers, data, num_bytes, + &vp_caps, &is_ref, refs); + + /* did we decode bitstream correctly? */ + assert(ret == 2); + + nvc0_decoder_vp(dec, desc, target, comm_seq, vp_caps, is_ref, refs); + nvc0_decoder_ppp(dec, desc, target, comm_seq); +} + +struct pipe_video_codec * +nvc0_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ) +{ + struct nouveau_screen *screen = &((struct nvc0_context *)context)->screen->base; + struct nouveau_vp3_decoder *dec; + struct nouveau_pushbuf **push; + union nouveau_bo_config cfg; + bool kepler = screen->device->chipset >= 0xe0; + + cfg.nvc0.tile_mode = 0x10; + cfg.nvc0.memtype = 0xfe; + + int ret, i; + uint32_t codec = 1, ppp_codec = 3; + uint32_t timeout; + u32 tmp_size = 0; + + if (getenv("XVMC_VL")) + return vl_create_decoder(context, templ); + + if (templ->entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + debug_printf("%x\n", templ->entrypoint); + return NULL; + } + + dec = CALLOC_STRUCT(nouveau_vp3_decoder); + if (!dec) + return NULL; + dec->client = screen->client; + dec->base = *templ; + nouveau_vp3_decoder_init_common(&dec->base); + + if (!kepler) { + dec->bsp_idx = 5; + dec->vp_idx = 6; + dec->ppp_idx = 7; + } else { + dec->bsp_idx = 2; + dec->vp_idx = 2; + dec->ppp_idx = 2; + } + + for (i = 0; i < 3; ++i) + if (i && !kepler) { + dec->channel[i] = dec->channel[0]; + dec->pushbuf[i] = dec->pushbuf[0]; + } else { + void *data; + u32 size; + struct nvc0_fifo nvc0_args = {}; + struct nve0_fifo nve0_args = {}; + + if (!kepler) { + size = sizeof(nvc0_args); + data = &nvc0_args; + } else { + unsigned engine[] = { + NVE0_FIFO_ENGINE_BSP, + NVE0_FIFO_ENGINE_VP, + NVE0_FIFO_ENGINE_PPP + }; + + nve0_args.engine = engine[i]; + size = sizeof(nve0_args); + data = &nve0_args; + } + + ret = nouveau_object_new(&screen->device->object, 0, + NOUVEAU_FIFO_CHANNEL_CLASS, + data, size, &dec->channel[i]); + + if (!ret) + ret = nouveau_pushbuf_new(screen->client, dec->channel[i], 4, + 32 * 1024, true, &dec->pushbuf[i]); + if (ret) + break; + } + push = dec->pushbuf; + + if (!kepler) { + if (!ret) + ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x90b1, NULL, 0, &dec->bsp); + if (!ret) + ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x90b2, NULL, 0, &dec->vp); + if (!ret) + ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x90b3, NULL, 0, &dec->ppp); + } else { + if (!ret) + ret = nouveau_object_new(dec->channel[0], 0x95b1, 0x95b1, NULL, 0, &dec->bsp); + if (!ret) + ret = nouveau_object_new(dec->channel[1], 0x95b2, 0x95b2, NULL, 0, &dec->vp); + if (!ret) + ret = nouveau_object_new(dec->channel[2], 0x90b3, 0x90b3, NULL, 0, &dec->ppp); + } + if (ret) + goto fail; + + BEGIN_NVC0(push[0], SUBC_BSP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[0], dec->bsp->handle); + + BEGIN_NVC0(push[1], SUBC_VP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[1], dec->vp->handle); + + BEGIN_NVC0(push[2], SUBC_PPP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[2], dec->ppp->handle); + + dec->base.context = context; + dec->base.decode_bitstream = nvc0_decoder_decode_bitstream; + + for (i = 0; i < NOUVEAU_VP3_VIDEO_QDEPTH && !ret; ++i) + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0, 1 << 20, &cfg, &dec->bsp_bo[i]); + if (!ret) + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0x100, 4 << 20, &cfg, &dec->inter_bo[0]); + if (!ret) { + if (!kepler) + nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]); + else + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0x100, dec->inter_bo[0]->size, &cfg, + &dec->inter_bo[1]); + } + if (ret) + goto fail; + + switch (u_reduce_video_profile(templ->profile)) { + case PIPE_VIDEO_FORMAT_MPEG12: { + codec = 1; + assert(templ->max_references <= 2); + break; + } + case PIPE_VIDEO_FORMAT_MPEG4: { + codec = 4; + tmp_size = mb(templ->height)*16 * mb(templ->width)*16; + assert(templ->max_references <= 2); + break; + } + case PIPE_VIDEO_FORMAT_VC1: { + ppp_codec = codec = 2; + tmp_size = mb(templ->height)*16 * mb(templ->width)*16; + assert(templ->max_references <= 2); + break; + } + case PIPE_VIDEO_FORMAT_MPEG4_AVC: { + codec = 3; + dec->tmp_stride = 16 * mb_half(templ->width) * nouveau_vp3_video_align(templ->height) * 3 / 2; + tmp_size = dec->tmp_stride * (templ->max_references + 1); + assert(templ->max_references <= 16); + break; + } + default: + fprintf(stderr, "invalid codec\n"); + goto fail; + } + + if (screen->device->chipset < 0xd0) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + 0x4000, &cfg, &dec->fw_bo); + if (ret) + goto fail; + + ret = nouveau_vp3_load_firmware(dec, templ->profile, screen->device->chipset); + if (ret) + goto fw_fail; + } + + if (codec != 3) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + 0x400, &cfg, &dec->bitplane_bo); + if (ret) + goto fail; + } + + dec->ref_stride = mb(templ->width)*16 * (mb_half(templ->height)*32 + nouveau_vp3_video_align(templ->height)/2); + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + dec->ref_stride * (templ->max_references+2) + tmp_size, + &cfg, &dec->ref_bo); + if (ret) + goto fail; + + timeout = 0; + + BEGIN_NVC0(push[0], SUBC_BSP(0x200), 2); + PUSH_DATA (push[0], codec); + PUSH_DATA (push[0], timeout); + + BEGIN_NVC0(push[1], SUBC_VP(0x200), 2); + PUSH_DATA (push[1], codec); + PUSH_DATA (push[1], timeout); + + BEGIN_NVC0(push[2], SUBC_PPP(0x200), 2); + PUSH_DATA (push[2], ppp_codec); + PUSH_DATA (push[2], timeout); + + ++dec->fence_seq; + +#if NOUVEAU_VP3_DEBUG_FENCE + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART|NOUVEAU_BO_MAP, + 0, 0x1000, NULL, &dec->fence_bo); + if (ret) + goto fail; + + nouveau_bo_map(dec->fence_bo, NOUVEAU_BO_RDWR, screen->client); + dec->fence_map = dec->fence_bo->map; + dec->fence_map[0] = dec->fence_map[4] = dec->fence_map[8] = 0; + dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map))); + + /* So lets test if the fence is working? */ + nouveau_pushbuf_space(push[0], 6, 1, 0); + PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); + BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3); + PUSH_DATAh(push[0], dec->fence_bo->offset); + PUSH_DATA (push[0], dec->fence_bo->offset); + PUSH_DATA (push[0], dec->fence_seq); + + BEGIN_NVC0(push[0], SUBC_BSP(0x304), 1); + PUSH_DATA (push[0], 0); + PUSH_KICK (push[0]); + + nouveau_pushbuf_space(push[1], 6, 1, 0); + PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); + BEGIN_NVC0(push[1], SUBC_VP(0x240), 3); + PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push[1], (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push[1], dec->fence_seq); + + BEGIN_NVC0(push[1], SUBC_VP(0x304), 1); + PUSH_DATA (push[1], 0); + PUSH_KICK (push[1]); + + nouveau_pushbuf_space(push[2], 6, 1, 0); + PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); + BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3); + PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push[2], (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push[2], dec->fence_seq); + + BEGIN_NVC0(push[2], SUBC_PPP(0x304), 1); + PUSH_DATA (push[2], 0); + PUSH_KICK (push[2]); + + usleep(100); + while (dec->fence_seq > dec->fence_map[0] || + dec->fence_seq > dec->fence_map[4] || + dec->fence_seq > dec->fence_map[8]) { + debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); + usleep(100); + } + debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); +#endif + + return &dec->base; + +fw_fail: + debug_printf("Cannot create decoder without firmware..\n"); + dec->base.destroy(&dec->base); + return NULL; + +fail: + debug_printf("Creation failed: %s (%i)\n", strerror(-ret), ret); + dec->base.destroy(&dec->base); + return NULL; +} + +struct pipe_video_buffer * +nvc0_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *templat) +{ + return nouveau_vp3_video_buffer_create( + pipe, templat, NVC0_RESOURCE_FLAG_VIDEO); +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video.h b/src/gallium/drivers/nouveau/nvc0/nvc0_video.h new file mode 100644 index 00000000000..9ee0280f8ea --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.h @@ -0,0 +1,48 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_screen.h" +#include "nouveau_vp3_video.h" + +#include "vl/vl_decoder.h" +#include "vl/vl_types.h" + +#include "util/u_video.h" + +extern unsigned +nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, + unsigned comm_seq, unsigned num_buffers, + const void *const *data, const unsigned *num_bytes, + unsigned *vp_caps, unsigned *is_ref, + struct nouveau_vp3_video_buffer *refs[16]); + +extern void +nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, unsigned comm_seq, + unsigned caps, unsigned is_ref, + struct nouveau_vp3_video_buffer *refs[16]); + +extern void +nvc0_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, unsigned comm_seq); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c new file mode 100644 index 00000000000..40696fa779f --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c @@ -0,0 +1,155 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_video.h" + +#if NOUVEAU_VP3_DEBUG_FENCE +static void dump_comm_bsp(struct comm *comm) +{ + unsigned idx = comm->bsp_cur_index & 0xf; + debug_printf("Cur seq: %x, bsp byte ofs: %x\n", comm->bsp_cur_index, comm->byte_ofs); + debug_printf("Status: %08x, pos: %08x\n", comm->status[idx], comm->pos[idx]); +} +#endif + +unsigned +nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, + unsigned comm_seq, unsigned num_buffers, + const void *const *data, const unsigned *num_bytes, + unsigned *vp_caps, unsigned *is_ref, + struct nouveau_vp3_video_buffer *refs[16]) +{ + struct nouveau_pushbuf *push = dec->pushbuf[0]; + enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); + uint32_t bsp_addr, comm_addr, inter_addr; + uint32_t slice_size, bucket_size, ring_size; + uint32_t caps; + int ret; + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH]; + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; + unsigned fence_extra = 0; + struct nouveau_pushbuf_refn bo_refs[] = { + { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, + { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, +#if NOUVEAU_VP3_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + + if (!dec->bitplane_bo) + num_refs--; + +#if NOUVEAU_VP3_DEBUG_FENCE + fence_extra = 4; +#endif + + ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client); + if (ret) { + debug_printf("map failed: %i %s\n", ret, strerror(-ret)); + return -1; + } + + caps = nouveau_vp3_bsp(dec, desc, target, comm_seq, + num_buffers, data, num_bytes); + + nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs); + + nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 7) + fence_extra + 2, num_refs, 0); + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + bsp_addr = bsp_bo->offset >> 8; + inter_addr = inter_bo->offset >> 8; + +#if NOUVEAU_VP3_DEBUG_FENCE + memset(dec->comm, 0, 0x200); + comm_addr = (dec->fence_bo->offset + COMM_OFFSET) >> 8; +#else + comm_addr = bsp_addr + (COMM_OFFSET>>8); +#endif + + BEGIN_NVC0(push, SUBC_BSP(0x700), 5); + PUSH_DATA (push, caps); // 700 cmd + PUSH_DATA (push, bsp_addr + 1); // 704 strparm_bsp + PUSH_DATA (push, bsp_addr + 7); // 708 str addr + PUSH_DATA (push, comm_addr); // 70c comm + PUSH_DATA (push, comm_seq); // 710 seq + + if (codec != PIPE_VIDEO_FORMAT_MPEG4_AVC) { + u32 bitplane_addr; + + bitplane_addr = dec->bitplane_bo->offset >> 8; + + nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size); + BEGIN_NVC0(push, SUBC_BSP(0x400), 6); + PUSH_DATA (push, bsp_addr); // 400 picparm addr + PUSH_DATA (push, inter_addr); // 404 interparm addr + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 408 interdata addr + PUSH_DATA (push, ring_size << 8); // 40c interdata_size + PUSH_DATA (push, bitplane_addr); // 410 BITPLANE_DATA + PUSH_DATA (push, 0x400); // 414 BITPLANE_DATA_SIZE + } else { + nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); + BEGIN_NVC0(push, SUBC_BSP(0x400), 8); + PUSH_DATA (push, bsp_addr); // 400 picparm addr + PUSH_DATA (push, inter_addr); // 404 interparm addr + PUSH_DATA (push, slice_size << 8); // 408 interparm size? + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 40c interdata addr + PUSH_DATA (push, ring_size << 8); // 410 interdata size + PUSH_DATA (push, inter_addr + slice_size); // 414 bucket? + PUSH_DATA (push, bucket_size << 8); // 418 bucket size? unshifted.. + PUSH_DATA (push, 0); // 41c targets + // TODO: Double check 414 / 418 with nvidia trace + } + +#if NOUVEAU_VP3_DEBUG_FENCE + BEGIN_NVC0(push, SUBC_BSP(0x240), 3); + PUSH_DATAh(push, dec->fence_bo->offset); + PUSH_DATA (push, dec->fence_bo->offset); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NVC0(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK (push); + + { + unsigned spin = 0; + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) { + debug_printf("b%u: %u\n", dec->fence_seq, dec->fence_map[0]); + dump_comm_bsp(dec->comm); + } + } while (dec->fence_seq > dec->fence_map[0]); + } + + dump_comm_bsp(dec->comm); + return dec->comm->status[comm_seq & 0xf]; +#else + BEGIN_NVC0(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); + return 2; +#endif +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c new file mode 100644 index 00000000000..4ceec4fbffc --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c @@ -0,0 +1,143 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_video.h" + +static void +nvc0_decoder_setup_ppp(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target, uint32_t low700) { + struct nouveau_pushbuf *push = dec->pushbuf[2]; + + uint32_t stride_in = mb(dec->base.width); + uint32_t stride_out = mb(target->resources[0]->width0); + uint32_t dec_h = mb(dec->base.height); + uint32_t dec_w = mb(dec->base.width); + uint64_t in_addr; + uint32_t y2, cbcr, cbcr2, i; + struct nouveau_pushbuf_refn bo_refs[] = { + { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { dec->ref_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, +#if NOUVEAU_VP3_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + }; + unsigned num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + + for (i = 0; i < 2; ++i) { + struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i]; + bo_refs[i].bo = mt->base.bo; + } + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + nouveau_vp3_ycbcr_offsets(dec, &y2, &cbcr, &cbcr2); + + BEGIN_NVC0(push, SUBC_PPP(0x700), 10); + in_addr = nouveau_vp3_video_addr(dec, target) >> 8; + + PUSH_DATA (push, (stride_out << 24) | (stride_out << 16) | low700); // 700 + PUSH_DATA (push, (stride_in << 24) | (stride_in << 16) | (dec_h << 8) | dec_w); // 704 + assert(dec_w == stride_in); + + /* Input: */ + PUSH_DATA (push, in_addr); // 708 + PUSH_DATA (push, in_addr + y2); // 70c + PUSH_DATA (push, in_addr + cbcr); // 710 + PUSH_DATA (push, in_addr + cbcr2); // 714 + + for (i = 0; i < 2; ++i) { + struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i]; + + PUSH_DATA (push, mt->base.address >> 8); + PUSH_DATA (push, (mt->base.address + mt->total_size/2) >> 8); + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + } +} + +static uint32_t +nvc0_decoder_vc1_ppp(struct nouveau_vp3_decoder *dec, struct pipe_vc1_picture_desc *desc, struct nouveau_vp3_video_buffer *target) { + struct nouveau_pushbuf *push = dec->pushbuf[2]; + + nvc0_decoder_setup_ppp(dec, target, 0x1412); + assert(!desc->deblockEnable); + assert(!(dec->base.width & 0xf)); + assert(!(dec->base.height & 0xf)); + + BEGIN_NVC0(push, SUBC_PPP(0x400), 1); + PUSH_DATA (push, desc->pquant << 11); + + // 728 = wtf? + return 0x10; +} + +void +nvc0_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct nouveau_vp3_video_buffer *target, unsigned comm_seq) { + enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); + struct nouveau_pushbuf *push = dec->pushbuf[2]; + unsigned ppp_caps = 0x10; + unsigned fence_extra = 0; + +#if NOUVEAU_VP3_DEBUG_FENCE + fence_extra = 4; +#endif + + nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0); + + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG12: { + unsigned mpeg2 = dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1; + nvc0_decoder_setup_ppp(dec, target, 0x1410 | mpeg2); + break; + } + case PIPE_VIDEO_FORMAT_MPEG4: nvc0_decoder_setup_ppp(dec, target, 0x1414); break; + case PIPE_VIDEO_FORMAT_VC1: ppp_caps = nvc0_decoder_vc1_ppp(dec, desc.vc1, target); break; + case PIPE_VIDEO_FORMAT_MPEG4_AVC: nvc0_decoder_setup_ppp(dec, target, 0x1413); break; + default: assert(0); + } + BEGIN_NVC0(push, SUBC_PPP(0x734), 2); + PUSH_DATA (push, comm_seq); + PUSH_DATA (push, ppp_caps); + +#if NOUVEAU_VP3_DEBUG_FENCE + BEGIN_NVC0(push, SUBC_PPP(0x240), 3); + PUSH_DATAh(push, (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push, (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NVC0(push, SUBC_PPP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK (push); + + { + unsigned spin = 0; + + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) + debug_printf("p%u: %u\n", dec->fence_seq, dec->fence_map[8]); + } while (dec->fence_seq > dec->fence_map[8]); + } +#else + BEGIN_NVC0(push, SUBC_PPP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); +#endif +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c new file mode 100644 index 00000000000..0d152b9624f --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c @@ -0,0 +1,202 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_video.h" +#include <sys/mman.h> + +#if NOUVEAU_VP3_DEBUG_FENCE +static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq, + struct nouveau_bo *inter_bo, unsigned slice_size) +{ + unsigned i, idx = comm->pvp_cur_index & 0xf; + debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); +#if 0 + debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); + debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); + + for (i = 0; i != comm->irq_index; ++i) + debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); + for (i = 0; i != comm->parse_endpos_index; ++i) + debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); +#endif + debug_printf("mb_y = %u\n", comm->mb_y[idx]); + if (comm->status_vp[idx] == 1) + return; + + if ((comm->pvp_stage & 0xff) != 0xff) { + unsigned *map; + assert(nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client) >= 0); + map = inter_bo->map; + for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { + debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); + } + munmap(inter_bo->map, inter_bo->size); + inter_bo->map = NULL; + } + assert((comm->pvp_stage & 0xff) == 0xff); +} +#endif + +static void +nvc0_decoder_kick_ref(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target) +{ + dec->refs[target->valid_ref].vidbuf = NULL; + dec->refs[target->valid_ref].last_used = 0; +// debug_printf("Unreffed %p\n", target); +} + +void +nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, + struct nouveau_vp3_video_buffer *target, unsigned comm_seq, + unsigned caps, unsigned is_ref, + struct nouveau_vp3_video_buffer *refs[16]) +{ + struct nouveau_pushbuf *push = dec->pushbuf[1]; + uint32_t bsp_addr, comm_addr, inter_addr, ucode_addr, pic_addr[17], last_addr, null_addr; + uint32_t slice_size, bucket_size, ring_size, i; + enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile); + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH]; + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; + u32 fence_extra = 0, codec_extra = 0; + struct nouveau_pushbuf_refn bo_refs[] = { + { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, +#if NOUVEAU_VP3_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + { dec->fw_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs) - !dec->fw_bo; + +#if NOUVEAU_VP3_DEBUG_FENCE + fence_extra = 4; +#endif + + if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) { + nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); + codec_extra += 2; + } else + nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size); + + if (dec->base.max_references > 2) + codec_extra += 1 + (dec->base.max_references - 2); + + pic_addr[16] = nouveau_vp3_video_addr(dec, target) >> 8; + last_addr = null_addr = nouveau_vp3_video_addr(dec, NULL) >> 8; + + for (i = 0; i < dec->base.max_references; ++i) { + if (!refs[i]) + pic_addr[i] = last_addr; + else if (dec->refs[refs[i]->valid_ref].vidbuf == refs[i]) + last_addr = pic_addr[i] = nouveau_vp3_video_addr(dec, refs[i]) >> 8; + else + pic_addr[i] = null_addr; + } + if (!is_ref) + nvc0_decoder_kick_ref(dec, target); + + nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) + + 6 + codec_extra + fence_extra + 2, num_refs, 0); + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + bsp_addr = bsp_bo->offset >> 8; +#if NOUVEAU_VP3_DEBUG_FENCE + comm_addr = (dec->fence_bo->offset + COMM_OFFSET)>>8; +#else + comm_addr = bsp_addr + (COMM_OFFSET>>8); +#endif + inter_addr = inter_bo->offset >> 8; + if (dec->fw_bo) + ucode_addr = dec->fw_bo->offset >> 8; + else + ucode_addr = 0; + + BEGIN_NVC0(push, SUBC_VP(0x700), 7); + PUSH_DATA (push, caps); // 700 + PUSH_DATA (push, comm_seq); // 704 + PUSH_DATA (push, 0); // 708 fuc targets, ignored for nvc0 + PUSH_DATA (push, dec->fw_sizes); // 70c + PUSH_DATA (push, bsp_addr+(VP_OFFSET>>8)); // 710 picparm_addr + PUSH_DATA (push, inter_addr); // 714 inter_parm + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 718 inter_data_ofs + + if (bucket_size) { + uint64_t tmpimg_addr = dec->ref_bo->offset + dec->ref_stride * (dec->base.max_references+2); + + BEGIN_NVC0(push, SUBC_VP(0x71c), 2); + PUSH_DATA (push, tmpimg_addr >> 8); // 71c + PUSH_DATA (push, inter_addr + slice_size); // 720 bucket_ofs + } + + BEGIN_NVC0(push, SUBC_VP(0x724), 5); + PUSH_DATA (push, comm_addr); // 724 + PUSH_DATA (push, ucode_addr); // 728 + PUSH_DATA (push, pic_addr[16]); // 734 + PUSH_DATA (push, pic_addr[0]); // 72c + PUSH_DATA (push, pic_addr[1]); // 730 + + if (dec->base.max_references > 2) { + int i; + + BEGIN_NVC0(push, SUBC_VP(0x400), dec->base.max_references - 2); + for (i = 2; i < dec->base.max_references; ++i) { + assert(0x400 + (i - 2) * 4 < 0x438); + PUSH_DATA (push, pic_addr[i]); + } + } + + if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) { + BEGIN_NVC0(push, SUBC_VP(0x438), 1); + PUSH_DATA (push, desc.h264->slice_count); + } + + //debug_printf("Decoding %08lx with %08lx and %08lx\n", pic_addr[16], pic_addr[0], pic_addr[1]); + +#if NOUVEAU_VP3_DEBUG_FENCE + BEGIN_NVC0(push, SUBC_VP(0x240), 3); + PUSH_DATAh(push, (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push, (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NVC0(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK(push); + + { + unsigned spin = 0; + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) { + debug_printf("v%u: %u\n", dec->fence_seq, dec->fence_map[4]); + dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8); + } + } while (dec->fence_seq > dec->fence_map[4]); + } + dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8); +#else + BEGIN_NVC0(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); +#endif +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h new file mode 100644 index 00000000000..3514d9dc3d0 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h @@ -0,0 +1,144 @@ + +#ifndef __NVC0_WINSYS_H__ +#define __NVC0_WINSYS_H__ + +#include <stdint.h> +#include <unistd.h> + +#include "pipe/p_defines.h" + +#include "nouveau_winsys.h" +#include "nouveau_buffer.h" + +#ifndef NV04_PFIFO_MAX_PACKET_LEN +#define NV04_PFIFO_MAX_PACKET_LEN 2047 +#endif + + +static INLINE void +nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin, + unsigned flags, struct nouveau_bo *bo) +{ + nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL; +} + +static INLINE void +nvc0_add_resident(struct nouveau_bufctx *bufctx, int bin, + struct nv04_resource *res, unsigned flags) +{ + struct nouveau_bufref *ref = + nouveau_bufctx_refn(bufctx, bin, res->bo, flags | res->domain); + ref->priv = res; + ref->priv_data = flags; +} + +#define BCTX_REFN_bo(ctx, bin, fl, bo) \ + nv50_add_bufctx_resident_bo(ctx, NVC0_BIND_##bin, fl, bo); + +#define BCTX_REFN(bctx, bin, res, acc) \ + nvc0_add_resident(bctx, NVC0_BIND_##bin, res, NOUVEAU_BO_##acc) + +static INLINE void +PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) +{ + struct nouveau_pushbuf_refn ref = { bo, flags }; + nouveau_pushbuf_refn(push, &ref, 1); +} + + +#define SUBC_3D(m) 0, (m) +#define NVC0_3D(n) SUBC_3D(NVC0_3D_##n) +#define NVE4_3D(n) SUBC_3D(NVE4_3D_##n) + +#define SUBC_COMPUTE(m) 1, (m) +#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n) +#define NVE4_COMPUTE(n) SUBC_COMPUTE(NVE4_COMPUTE_##n) + +#define SUBC_M2MF(m) 2, (m) +#define SUBC_P2MF(m) 2, (m) +#define NVC0_M2MF(n) SUBC_M2MF(NVC0_M2MF_##n) +#define NVE4_P2MF(n) SUBC_P2MF(NVE4_P2MF_##n) + +#define SUBC_2D(m) 3, (m) +#define NVC0_2D(n) SUBC_2D(NVC0_2D_##n) + +#define SUBC_COPY(m) 4, (m) +#define NVE4_COPY(m) SUBC_COPY(NVE4_COPY_##n) + +#define SUBC_SW(m) 7, (m) + +static INLINE uint32_t +NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size) +{ + return 0x20000000 | (size << 16) | (subc << 13) | (mthd >> 2); +} + +static INLINE uint32_t +NVC0_FIFO_PKHDR_NI(int subc, int mthd, unsigned size) +{ + return 0x60000000 | (size << 16) | (subc << 13) | (mthd >> 2); +} + +static INLINE uint32_t +NVC0_FIFO_PKHDR_IL(int subc, int mthd, uint8_t data) +{ + return 0x80000000 | (data << 16) | (subc << 13) | (mthd >> 2); +} + +static INLINE uint32_t +NVC0_FIFO_PKHDR_1I(int subc, int mthd, unsigned size) +{ + return 0xa0000000 | (size << 16) | (subc << 13) | (mthd >> 2); +} + + +static INLINE uint8_t +nouveau_bo_memtype(const struct nouveau_bo *bo) +{ + return bo->config.nvc0.memtype; +} + + +static INLINE void +PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data) +{ + *push->cur++ = (uint32_t)(data >> 32); +} + +static INLINE void +BEGIN_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size) +{ +#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, size + 1); +#endif + PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(subc, mthd, size)); +} + +static INLINE void +BEGIN_NIC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size) +{ +#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, size + 1); +#endif + PUSH_DATA (push, NVC0_FIFO_PKHDR_NI(subc, mthd, size)); +} + +static INLINE void +BEGIN_1IC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size) +{ +#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, size + 1); +#endif + PUSH_DATA (push, NVC0_FIFO_PKHDR_1I(subc, mthd, size)); +} + +static INLINE void +IMMED_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, uint8_t data) +{ +#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING + PUSH_SPACE(push, 1); +#endif + PUSH_DATA (push, NVC0_FIFO_PKHDR_IL(subc, mthd, data)); +} + +#endif /* __NVC0_WINSYS_H__ */ diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c new file mode 100644 index 00000000000..06c914fb5e6 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -0,0 +1,652 @@ +/* + * Copyright 2012 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christoph Bumiller + */ + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_compute.h" +#include "nvc0/nve4_compute.h" + +#include "codegen/nv50_ir_driver.h" + +#ifdef DEBUG +static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *); +#endif + + +int +nve4_screen_compute_setup(struct nvc0_screen *screen, + struct nouveau_pushbuf *push) +{ + struct nouveau_device *dev = screen->base.device; + struct nouveau_object *chan = screen->base.channel; + unsigned i; + int ret; + uint32_t obj_class; + + switch (dev->chipset & 0xf0) { + case 0xf0: + obj_class = NVF0_COMPUTE_CLASS; /* GK110 */ + break; + case 0xe0: + obj_class = NVE4_COMPUTE_CLASS; /* GK104 */ + break; + default: + NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); + return -1; + } + + ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0, + &screen->compute); + if (ret) { + NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); + return ret; + } + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, NVE4_CP_PARAM_SIZE, NULL, + &screen->parm); + if (ret) + return ret; + + BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->compute->oclass); + + BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->tls->offset); + PUSH_DATA (push, screen->tls->offset); + /* No idea why there are 2. Divide size by 2 to be safe. + * Actually this might be per-MP TEMP size and looks like I'm only using + * 2 MPs instead of all 8. + */ + BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(0)), 3); + PUSH_DATAh(push, screen->tls->size / screen->mp_count); + PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); + PUSH_DATA (push, 0xff); + BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(1)), 3); + PUSH_DATAh(push, screen->tls->size / screen->mp_count); + PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); + PUSH_DATA (push, 0xff); + + /* Unified address space ? Who needs that ? Certainly not OpenCL. + * + * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be + * accessible. We cannot prevent that at the moment, so expect failure. + */ + BEGIN_NVC0(push, NVE4_COMPUTE(LOCAL_BASE), 1); + PUSH_DATA (push, 1 << 24); + BEGIN_NVC0(push, NVE4_COMPUTE(SHARED_BASE), 1); + PUSH_DATA (push, 2 << 24); + + BEGIN_NVC0(push, NVE4_COMPUTE(CODE_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->text->offset); + PUSH_DATA (push, screen->text->offset); + + BEGIN_NVC0(push, SUBC_COMPUTE(0x0310), 1); + PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300); + + /* NOTE: these do not affect the state used by the 3D object */ + BEGIN_NVC0(push, NVE4_COMPUTE(TIC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset); + PUSH_DATA (push, screen->txc->offset); + PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); + BEGIN_NVC0(push, NVE4_COMPUTE(TSC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset + 65536); + PUSH_DATA (push, screen->txc->offset + 65536); + PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); + + if (obj_class >= NVF0_COMPUTE_CLASS) { + BEGIN_NVC0(push, SUBC_COMPUTE(0x0248), 1); + PUSH_DATA (push, 0x100); + BEGIN_NIC0(push, SUBC_COMPUTE(0x0248), 63); + for (i = 63; i >= 1; --i) + PUSH_DATA(push, 0x38000 | i); + IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0); + IMMED_NVC0(push, SUBC_COMPUTE(0x518), 0); + } + + BEGIN_NVC0(push, NVE4_COMPUTE(TEX_CB_INDEX), 1); + PUSH_DATA (push, 0); /* does not interefere with 3D */ + + if (obj_class >= NVF0_COMPUTE_CLASS) + IMMED_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + + /* MS sample coordinate offsets: these do not work with _ALT modes ! */ + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS); + PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 64); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + PUSH_DATA (push, 0); /* 0 */ + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); /* 1 */ + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); /* 2 */ + PUSH_DATA (push, 1); + PUSH_DATA (push, 1); /* 3 */ + PUSH_DATA (push, 1); + PUSH_DATA (push, 2); /* 4 */ + PUSH_DATA (push, 0); + PUSH_DATA (push, 3); /* 5 */ + PUSH_DATA (push, 0); + PUSH_DATA (push, 2); /* 6 */ + PUSH_DATA (push, 1); + PUSH_DATA (push, 3); /* 7 */ + PUSH_DATA (push, 1); + +#ifdef DEBUG + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); + PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 28); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 8); + PUSH_DATA (push, 1); + PUSH_DATA (push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO); + PUSH_DATAh(push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO); + PUSH_DATA (push, screen->tls->offset); + PUSH_DATAh(push, screen->tls->offset); + PUSH_DATA (push, screen->tls->size / 2); /* MP TEMP block size */ + PUSH_DATA (push, screen->tls->size / 2 / 64); /* warp TEMP block size */ + PUSH_DATA (push, 0); /* warp cfstack size */ +#endif + + BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); + + return 0; +} + + +static void +nve4_compute_validate_surfaces(struct nvc0_context *nvc0) +{ + struct nvc0_screen *screen = nvc0->screen; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv50_surface *sf; + struct nv04_resource *res; + uint32_t mask; + unsigned i; + const unsigned t = 1; + + mask = nvc0->surfaces_dirty[t]; + while (mask) { + i = ffs(mask) - 1; + mask &= ~(1 << i); + + /* + * NVE4's surface load/store instructions receive all the information + * directly instead of via binding points, so we have to supply them. + */ + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i)); + PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i)); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 64); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + + nve4_set_surface_info(push, nvc0->surfaces[t][i], screen); + + sf = nv50_surface(nvc0->surfaces[t][i]); + if (sf) { + res = nv04_resource(sf->base.texture); + + if (sf->base.writable) + BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR); + else + BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD); + } + } + if (nvc0->surfaces_dirty[t]) { + BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); + } + + /* re-reference non-dirty surfaces */ + mask = nvc0->surfaces_valid[t] & ~nvc0->surfaces_dirty[t]; + while (mask) { + i = ffs(mask) - 1; + mask &= ~(1 << i); + + sf = nv50_surface(nvc0->surfaces[t][i]); + res = nv04_resource(sf->base.texture); + + if (sf->base.writable) + BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR); + else + BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD); + } + + nvc0->surfaces_dirty[t] = 0; +} + + +/* Thankfully, textures with samplers follow the normal rules. */ +static void +nve4_compute_validate_samplers(struct nvc0_context *nvc0) +{ + boolean need_flush = nve4_validate_tsc(nvc0, 5); + if (need_flush) { + BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, 0); + } +} +/* (Code duplicated at bottom for various non-convincing reasons. + * E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC + * entries to avoid a subchannel switch. + * Same for texture cache flushes. + * Also, the bufctx differs, and more IFs in the 3D version looks ugly.) + */ +static void nve4_compute_validate_textures(struct nvc0_context *); + +static void +nve4_compute_set_tex_handles(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + uint64_t address; + const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE); + unsigned i, n; + uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s]; + + if (!dirty) + return; + i = ffs(dirty) - 1; + n = util_logbase2(dirty) + 1 - i; + assert(n); + + address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i); + + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, n * 4); + PUSH_DATA (push, 0x1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + n); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + PUSH_DATAp(push, &nvc0->tex_handles[s][i], n); + + BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); + + nvc0->textures_dirty[s] = 0; + nvc0->samplers_dirty[s] = 0; +} + + +static boolean +nve4_compute_state_validate(struct nvc0_context *nvc0) +{ + if (!nvc0_compute_validate_program(nvc0)) + return FALSE; + if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES) + nve4_compute_validate_textures(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS) + nve4_compute_validate_samplers(nvc0); + if (nvc0->dirty_cp & (NVC0_NEW_CP_TEXTURES | NVC0_NEW_CP_SAMPLERS)) + nve4_compute_set_tex_handles(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_SURFACES) + nve4_compute_validate_surfaces(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_GLOBALS) + nvc0_validate_global_residents(nvc0, + nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL); + + nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE); + + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp); + if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf))) + return FALSE; + if (unlikely(nvc0->state.flushed)) + nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE); + + return TRUE; +} + + +static void +nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input, + const uint *block_layout, + const uint *grid_layout) +{ + struct nvc0_screen *screen = nvc0->screen; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *cp = nvc0->compprog; + + if (cp->parm_size) { + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->parm->offset); + PUSH_DATA (push, screen->parm->offset); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, cp->parm_size); + PUSH_DATA (push, 0x1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4)); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + PUSH_DATAp(push, input, cp->parm_size / 4); + } + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0)); + PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0)); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 7 * 4); + PUSH_DATA (push, 0x1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + 7); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + PUSH_DATAp(push, block_layout, 3); + PUSH_DATAp(push, grid_layout, 3); + PUSH_DATA (push, 0); + + BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); +} + +static INLINE uint8_t +nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size) +{ + if (shared_size > (32 << 10)) + return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1; + if (shared_size > (16 << 10)) + return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1; + return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1; +} + +static void +nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, + struct nve4_cp_launch_desc *desc, + uint32_t label, + const uint *block_layout, + const uint *grid_layout) +{ + const struct nvc0_screen *screen = nvc0->screen; + const struct nvc0_program *cp = nvc0->compprog; + unsigned i; + + nve4_cp_launch_desc_init_default(desc); + + desc->entry = nvc0_program_symbol_offset(cp, label); + + desc->griddim_x = grid_layout[0]; + desc->griddim_y = grid_layout[1]; + desc->griddim_z = grid_layout[2]; + desc->blockdim_x = block_layout[0]; + desc->blockdim_y = block_layout[1]; + desc->blockdim_z = block_layout[2]; + + desc->shared_size = align(cp->cp.smem_size, 0x100); + desc->local_size_p = align(cp->cp.lmem_size, 0x10); + desc->local_size_n = 0; + desc->cstack_size = 0x800; + desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size); + + desc->gpr_alloc = cp->num_gprs; + desc->bar_alloc = cp->num_barriers; + + for (i = 0; i < 7; ++i) { + const unsigned s = 5; + if (nvc0->constbuf[s][i].u.buf) + nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]); + } + nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE); +} + +static INLINE struct nve4_cp_launch_desc * +nve4_compute_alloc_launch_desc(struct nouveau_context *nv, + struct nouveau_bo **pbo, uint64_t *pgpuaddr) +{ + uint8_t *ptr = nouveau_scratch_get(nv, 512, pgpuaddr, pbo); + if (!ptr) + return NULL; + if (*pgpuaddr & 255) { + unsigned adj = 256 - (*pgpuaddr & 255); + ptr += adj; + *pgpuaddr += adj; + } + return (struct nve4_cp_launch_desc *)ptr; +} + +void +nve4_launch_grid(struct pipe_context *pipe, + const uint *block_layout, const uint *grid_layout, + uint32_t label, + const void *input) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nve4_cp_launch_desc *desc; + uint64_t desc_gpuaddr; + struct nouveau_bo *desc_bo; + int ret; + + desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr); + if (!desc) { + ret = -1; + goto out; + } + BCTX_REFN_bo(nvc0->bufctx_cp, CP_DESC, NOUVEAU_BO_GART | NOUVEAU_BO_RD, + desc_bo); + + ret = !nve4_compute_state_validate(nvc0); + if (ret) + goto out; + + nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout); +#ifdef DEBUG + if (debug_get_num_option("NV50_PROG_DEBUG", 0)) + nve4_compute_dump_launch_desc(desc); +#endif + + nve4_compute_upload_input(nvc0, input, block_layout, grid_layout); + + /* upload descriptor and flush */ +#if 0 + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, desc_gpuaddr); + PUSH_DATA (push, desc_gpuaddr); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 256); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (256 / 4)); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1)); + PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4); + BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE); +#endif + BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH_DESC_ADDRESS), 1); + PUSH_DATA (push, desc_gpuaddr >> 8); + BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH), 1); + PUSH_DATA (push, 0x3); + BEGIN_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + +out: + if (ret) + NOUVEAU_ERR("Failed to launch grid !\n"); + nouveau_scratch_done(&nvc0->base); + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC); +} + + +#define NVE4_TIC_ENTRY_INVALID 0x000fffff + +static void +nve4_compute_validate_textures(struct nvc0_context *nvc0) +{ + struct nouveau_bo *txc = nvc0->screen->txc; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const unsigned s = 5; + unsigned i; + uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX]; + unsigned n[2] = { 0, 0 }; + + for (i = 0; i < nvc0->num_textures[s]; ++i) { + struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); + struct nv04_resource *res; + const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i)); + + if (!tic) { + nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; + continue; + } + res = nv04_resource(tic->pipe.texture); + + if (tic->id < 0) { + tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); + + PUSH_SPACE(push, 16); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, txc->offset + (tic->id * 32)); + PUSH_DATA (push, txc->offset + (tic->id * 32)); + BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 32); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 9); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + PUSH_DATAp(push, &tic->tic[0], 8); + + commands[0][n[0]++] = (tic->id << 4) | 1; + } else + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + commands[1][n[1]++] = (tic->id << 4) | 1; + } + nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID; + nvc0->tex_handles[s][i] |= tic->id; + if (dirty) + BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD); + } + for (; i < nvc0->state.num_textures[s]; ++i) + nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; + + if (n[0]) { + BEGIN_NIC0(push, NVE4_COMPUTE(TIC_FLUSH), n[0]); + PUSH_DATAp(push, commands[0], n[0]); + } + if (n[1]) { + BEGIN_NIC0(push, NVE4_COMPUTE(TEX_CACHE_CTL), n[1]); + PUSH_DATAp(push, commands[1], n[1]); + } + + nvc0->state.num_textures[s] = nvc0->num_textures[s]; +} + + +#ifdef DEBUG +static const char *nve4_cache_split_name(unsigned value) +{ + switch (value) { + case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1"; + case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1"; + case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1"; + default: + return "(invalid)"; + } +} + +static void +nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc) +{ + const uint32_t *data = (const uint32_t *)desc; + unsigned i; + boolean zero = FALSE; + + debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n"); + + for (i = 0; i < sizeof(*desc); i += 4) { + if (data[i / 4]) { + debug_printf("[%x]: 0x%08x\n", i, data[i / 4]); + zero = FALSE; + } else + if (!zero) { + debug_printf("...\n"); + zero = TRUE; + } + } + + debug_printf("entry = 0x%x\n", desc->entry); + debug_printf("grid dimensions = %ux%ux%u\n", + desc->griddim_x, desc->griddim_y, desc->griddim_z); + debug_printf("block dimensions = %ux%ux%u\n", + desc->blockdim_x, desc->blockdim_y, desc->blockdim_z); + debug_printf("s[] size: 0x%x\n", desc->shared_size); + debug_printf("l[] size: -0x%x / +0x%x\n", + desc->local_size_n, desc->local_size_p); + debug_printf("stack size: 0x%x\n", desc->cstack_size); + debug_printf("barrier count: %u\n", desc->bar_alloc); + debug_printf("$r count: %u\n", desc->gpr_alloc); + debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split)); + + for (i = 0; i < 8; ++i) { + uint64_t address; + uint32_t size = desc->cb[i].size; + boolean valid = !!(desc->cb_mask & (1 << i)); + + address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l; + + if (!valid && !address && !size) + continue; + debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n", + i, address, size, valid ? "" : " (invalid)"); + } +} +#endif + +#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER +static void +nve4_compute_trap_info(struct nvc0_context *nvc0) +{ + struct nvc0_screen *screen = nvc0->screen; + struct nouveau_bo *bo = screen->parm; + int ret, i; + volatile struct nve4_mp_trap_info *info; + uint8_t *map; + + ret = nouveau_bo_map(bo, NOUVEAU_BO_RDWR, nvc0->base.client); + if (ret) + return; + map = (uint8_t *)bo->map; + info = (volatile struct nve4_mp_trap_info *)(map + NVE4_CP_PARAM_TRAP_INFO); + + if (info->lock) { + debug_printf("trapstat = %08x\n", info->trapstat); + debug_printf("warperr = %08x\n", info->warperr); + debug_printf("PC = %x\n", info->pc); + debug_printf("tid = %u %u %u\n", + info->tid[0], info->tid[1], info->tid[2]); + debug_printf("ctaid = %u %u %u\n", + info->ctaid[0], info->ctaid[1], info->ctaid[2]); + for (i = 0; i <= 63; ++i) + debug_printf("$r%i = %08x\n", i, info->r[i]); + for (i = 0; i <= 6; ++i) + debug_printf("$p%i = %i\n", i, (info->flags >> i) & 1); + debug_printf("$c = %x\n", info->flags >> 12); + } + info->lock = 0; +} +#endif diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h new file mode 100644 index 00000000000..79862b7dcd8 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h @@ -0,0 +1,131 @@ + +#ifndef NVE4_COMPUTE_H +#define NVE4_COMPUTE_H + +#include "nv50/nv50_defs.xml.h" +#include "nvc0/nve4_compute.xml.h" + +/* Input space is implemented as c0[], to which we bind the screen->parm bo. + */ +#define NVE4_CP_INPUT_USER 0x0000 +#define NVE4_CP_INPUT_USER_LIMIT 0x1000 +#define NVE4_CP_INPUT_GRID_INFO(i) (0x1000 + (i) * 4) +#define NVE4_CP_INPUT_NTID(i) (0x1000 + (i) * 4) +#define NVE4_CP_INPUT_NCTAID(i) (0x100c + (i) * 4) +#define NVE4_CP_INPUT_GRIDID 0x1018 +#define NVE4_CP_INPUT_TEX(i) (0x1040 + (i) * 4) +#define NVE4_CP_INPUT_TEX_STRIDE 4 +#define NVE4_CP_INPUT_TEX_MAX 32 +#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0 +#define NVE4_CP_INPUT_SUF_STRIDE 64 +#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE) +#define NVE4_CP_INPUT_SUF_MAX 32 +#define NVE4_CP_INPUT_TRAP_INFO_PTR 0x1900 +#define NVE4_CP_INPUT_TEMP_PTR 0x1908 +#define NVE4_CP_INPUT_MP_TEMP_SIZE 0x1910 +#define NVE4_CP_INPUT_WARP_TEMP_SIZE 0x1914 +#define NVE4_CP_INPUT_CSTACK_SIZE 0x1918 +#define NVE4_CP_INPUT_SIZE 0x1a00 +#define NVE4_CP_PARAM_TRAP_INFO 0x2000 +#define NVE4_CP_PARAM_TRAP_INFO_SZ (1 << 16) +#define NVE4_CP_PARAM_SIZE (NVE4_CP_PARAM_TRAP_INFO + (1 << 16)) + +struct nve4_cp_launch_desc +{ + u32 unk0[8]; + u32 entry; + u32 unk9[3]; + u32 griddim_x : 31; + u32 unk12 : 1; + u16 griddim_y; + u16 griddim_z; + u32 unk14[3]; + u16 shared_size; /* must be aligned to 0x100 */ + u16 unk15; + u16 unk16; + u16 blockdim_x; + u16 blockdim_y; + u16 blockdim_z; + u32 cb_mask : 8; + u32 unk20_8 : 21; + u32 cache_split : 2; + u32 unk20_31 : 1; + u32 unk21[8]; + struct { + u32 address_l; + u32 address_h : 8; + u32 reserved : 7; + u32 size : 17; + } cb[8]; + u32 local_size_p : 20; + u32 unk45_20 : 7; + u32 bar_alloc : 5; + u32 local_size_n : 20; + u32 unk46_20 : 4; + u32 gpr_alloc : 8; + u32 cstack_size : 20; + u32 unk47_20 : 12; + u32 unk48[16]; +}; + +static INLINE void +nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc) +{ + memset(desc, 0, sizeof(*desc)); + + desc->unk0[7] = 0xbc000000; + desc->unk9[2] = 0x44014000; + desc->unk47_20 = 0x300; +} + +static INLINE void +nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc, + unsigned index, + struct nouveau_bo *bo, + uint32_t base, uint16_t size) +{ + uint64_t address = bo->offset + base; + + assert(index < 8); + assert(!(base & 0xff)); + assert(size <= 65536); + + desc->cb[index].address_l = address; + desc->cb[index].address_h = address >> 32; + desc->cb[index].size = size; + + desc->cb_mask |= 1 << index; +} + +static INLINE void +nve4_cp_launch_desc_set_ctx_cb(struct nve4_cp_launch_desc *desc, + unsigned index, + const struct nvc0_constbuf *cb) +{ + assert(index < 8); + + if (!cb->u.buf) { + desc->cb_mask &= ~(1 << index); + } else { + const struct nv04_resource *buf = nv04_resource(cb->u.buf); + assert(!cb->user); + nve4_cp_launch_desc_set_cb(desc, index, + buf->bo, buf->offset + cb->offset, cb->size); + } +} + +struct nve4_mp_trap_info { + u32 lock; + u32 pc; + u32 trapstat; + u32 warperr; + u32 tid[3]; + u32 ctaid[3]; + u32 pad028[2]; + u32 r[64]; + u32 flags; + u32 pad134[3]; + u32 s[0x3000]; +}; + +#endif /* NVE4_COMPUTE_H */ diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h new file mode 100644 index 00000000000..e971fc1ac6b --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h @@ -0,0 +1,429 @@ +#ifndef NVE4_COMPUTE_XML +#define NVE4_COMPUTE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nve4_compute.xml ( 10168 bytes, from 2013-06-04 13:57:02) +- copyright.xml ( 6452 bytes, from 2012-04-16 22:51:01) +- nvchipsets.xml ( 3954 bytes, from 2013-06-04 13:57:02) +- nv_object.xml ( 14395 bytes, from 2013-06-04 13:57:02) +- nv_defs.xml ( 4437 bytes, from 2012-04-16 22:51:01) +- nv50_defs.xml ( 16877 bytes, from 2013-07-17 09:10:01) +- nve4_p2mf.xml ( 2373 bytes, from 2013-06-04 13:57:02) + +Copyright (C) 2006-2013 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + + +#define NVE4_COMPUTE_UNK0144 0x00000144 + +#define NVE4_COMPUTE_UPLOAD 0x00000000 + +#define NVE4_COMPUTE_UPLOAD_LINE_LENGTH_IN 0x00000180 + +#define NVE4_COMPUTE_UPLOAD_LINE_COUNT 0x00000184 + +#define NVE4_COMPUTE_UPLOAD_DST_ADDRESS_HIGH 0x00000188 + +#define NVE4_COMPUTE_UPLOAD_DST_ADDRESS_LOW 0x0000018c + +#define NVE4_COMPUTE_UPLOAD_DST_PITCH 0x00000190 + +#define NVE4_COMPUTE_UPLOAD_DST_TILE_MODE 0x00000194 + +#define NVE4_COMPUTE_UPLOAD_DST_WIDTH 0x00000198 + +#define NVE4_COMPUTE_UPLOAD_DST_HEIGHT 0x0000019c + +#define NVE4_COMPUTE_UPLOAD_DST_DEPTH 0x000001a0 + +#define NVE4_COMPUTE_UPLOAD_DST_Z 0x000001a4 + +#define NVE4_COMPUTE_UPLOAD_DST_X 0x000001a8 + +#define NVE4_COMPUTE_UPLOAD_DST_Y 0x000001ac + +#define NVE4_COMPUTE_UPLOAD_EXEC 0x000001b0 +#define NVE4_COMPUTE_UPLOAD_EXEC_LINEAR 0x00000001 +#define NVE4_COMPUTE_UPLOAD_EXEC_UNK1__MASK 0x0000007e +#define NVE4_COMPUTE_UPLOAD_EXEC_UNK1__SHIFT 1 +#define NVE4_COMPUTE_UPLOAD_EXEC_BUF_NOTIFY 0x00000300 +#define NVE4_COMPUTE_UPLOAD_EXEC_UNK12__MASK 0x0000f000 +#define NVE4_COMPUTE_UPLOAD_EXEC_UNK12__SHIFT 12 + +#define NVE4_COMPUTE_UPLOAD_DATA 0x000001b4 + +#define NVE4_COMPUTE_UPLOAD_QUERY_ADDRESS_HIGH 0x000001dc + +#define NVE4_COMPUTE_UPLOAD_QUERY_ADDRESS_LOW 0x000001e0 + +#define NVE4_COMPUTE_UPLOAD_QUERY_SEQUENCE 0x000001e4 + +#define NVE4_COMPUTE_UPLOAD_UNK01F0 0x000001f0 + +#define NVE4_COMPUTE_UPLOAD_UNK01F4 0x000001f4 + +#define NVE4_COMPUTE_UPLOAD_UNK01F8 0x000001f8 + +#define NVE4_COMPUTE_UPLOAD_UNK01FC 0x000001fc + +#define NVE4_COMPUTE_SHARED_BASE 0x00000214 + +#define NVE4_COMPUTE_MEM_BARRIER 0x0000021c +#define NVE4_COMPUTE_MEM_BARRIER_UNK0__MASK 0x00000007 +#define NVE4_COMPUTE_MEM_BARRIER_UNK0__SHIFT 0 +#define NVE4_COMPUTE_MEM_BARRIER_UNK4 0x00000010 +#define NVE4_COMPUTE_MEM_BARRIER_UNK12 0x00001000 + +#define NVE4_COMPUTE_UNK0240 0x00000240 + +#define NVE4_COMPUTE_UNK244_TIC_FLUSH 0x00000244 + +#define NVE4_COMPUTE_UNK0248 0x00000248 +#define NVE4_COMPUTE_UNK0248_UNK0__MASK 0x0000003f +#define NVE4_COMPUTE_UNK0248_UNK0__SHIFT 0 +#define NVE4_COMPUTE_UNK0248_UNK8__MASK 0x00ffff00 +#define NVE4_COMPUTE_UNK0248_UNK8__SHIFT 8 + +#define NVE4_COMPUTE_UNK0274 0x00000274 + +#define NVE4_COMPUTE_UNK0278 0x00000278 + +#define NVE4_COMPUTE_UNK027C 0x0000027c + +#define NVE4_COMPUTE_UNK0280 0x00000280 + +#define NVE4_COMPUTE_UNK0284 0x00000284 + +#define NVE4_COMPUTE_UNK0288 0x00000288 + +#define NVE4_COMPUTE_UNK0290 0x00000290 + +#define NVE4_COMPUTE_UNK02B0 0x000002b0 + +#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS 0x000002b4 +#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS__SHR 8 + +#define NVE4_COMPUTE_UNK02B8 0x000002b8 + +#define NVE4_COMPUTE_LAUNCH 0x000002bc + +#define NVE4_COMPUTE_MP_TEMP_SIZE(i0) (0x000002e4 + 0xc*(i0)) +#define NVE4_COMPUTE_MP_TEMP_SIZE__ESIZE 0x0000000c +#define NVE4_COMPUTE_MP_TEMP_SIZE__LEN 0x00000002 + +#define NVE4_COMPUTE_MP_TEMP_SIZE_HIGH(i0) (0x000002e4 + 0xc*(i0)) + +#define NVE4_COMPUTE_MP_TEMP_SIZE_LOW(i0) (0x000002e8 + 0xc*(i0)) + +#define NVE4_COMPUTE_MP_TEMP_SIZE_MASK(i0) (0x000002ec + 0xc*(i0)) + +#define NVE4_COMPUTE_UNK0310 0x00000310 + +#define NVE4_COMPUTE_FIRMWARE(i0) (0x00000500 + 0x4*(i0)) +#define NVE4_COMPUTE_FIRMWARE__ESIZE 0x00000004 +#define NVE4_COMPUTE_FIRMWARE__LEN 0x00000020 + +#define NVE4_COMPUTE_LOCAL_BASE 0x0000077c + +#define NVE4_COMPUTE_TEMP_ADDRESS_HIGH 0x00000790 + +#define NVE4_COMPUTE_TEMP_ADDRESS_LOW 0x00000794 + +#define NVE4_COMPUTE_UNK0D94 0x00000d94 + +#define NVE4_COMPUTE_WATCHDOG_TIMER 0x00000de4 + +#define NVE4_COMPUTE_UNK0F44(i0) (0x00000f44 + 0x4*(i0)) +#define NVE4_COMPUTE_UNK0F44__ESIZE 0x00000004 +#define NVE4_COMPUTE_UNK0F44__LEN 0x00000004 + +#define NVE4_COMPUTE_UNK1040(i0) (0x00001040 + 0x4*(i0)) +#define NVE4_COMPUTE_UNK1040__ESIZE 0x00000004 +#define NVE4_COMPUTE_UNK1040__LEN 0x0000000c + +#define NVE4_COMPUTE_UNK1288_TIC_FLUSH 0x00001288 + +#define NVE4_COMPUTE_TSC_FLUSH 0x00001330 +#define NVE4_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001 +#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NVE4_COMPUTE_TIC_FLUSH 0x00001334 +#define NVE4_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001 +#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NVE4_COMPUTE_TEX_CACHE_CTL 0x00001338 +#define NVE4_COMPUTE_TEX_CACHE_CTL_UNK0 0x00000001 +#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__MASK 0x03fffff0 +#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__SHIFT 4 + +#define NVE4_COMPUTE_UNK1424_TSC_FLUSH 0x00001424 + +#define NVE4_COMPUTE_COND_ADDRESS_HIGH 0x00001550 + +#define NVE4_COMPUTE_COND_ADDRESS_LOW 0x00001554 + +#define NVE4_COMPUTE_COND_MODE 0x00001558 +#define NVE4_COMPUTE_COND_MODE_NEVER 0x00000000 +#define NVE4_COMPUTE_COND_MODE_ALWAYS 0x00000001 +#define NVE4_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002 +#define NVE4_COMPUTE_COND_MODE_EQUAL 0x00000003 +#define NVE4_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004 + +#define NVE4_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c + +#define NVE4_COMPUTE_TSC_ADDRESS_LOW 0x00001560 + +#define NVE4_COMPUTE_TSC_LIMIT 0x00001564 + +#define NVE4_COMPUTE_TIC_ADDRESS_HIGH 0x00001574 + +#define NVE4_COMPUTE_TIC_ADDRESS_LOW 0x00001578 + +#define NVE4_COMPUTE_TIC_LIMIT 0x0000157c + +#define NVE4_COMPUTE_CODE_ADDRESS_HIGH 0x00001608 + +#define NVE4_COMPUTE_CODE_ADDRESS_LOW 0x0000160c + +#define NVE4_COMPUTE_UNK1690 0x00001690 + +#define NVE4_COMPUTE_FLUSH 0x00001698 +#define NVE4_COMPUTE_FLUSH_CODE 0x00000001 +#define NVE4_COMPUTE_FLUSH_GLOBAL 0x00000010 +#define NVE4_COMPUTE_FLUSH_CB 0x00001000 + +#define NVE4_COMPUTE_UNK1944 0x00001944 + +#define NVE4_COMPUTE_DELAY 0x00001a24 + +#define NVE4_COMPUTE_UNK1A2C(i0) (0x00001a2c + 0x4*(i0)) +#define NVE4_COMPUTE_UNK1A2C__ESIZE 0x00000004 +#define NVE4_COMPUTE_UNK1A2C__LEN 0x00000005 + +#define NVE4_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00 + +#define NVE4_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04 + +#define NVE4_COMPUTE_QUERY_SEQUENCE 0x00001b08 + +#define NVE4_COMPUTE_QUERY_GET 0x00001b0c +#define NVE4_COMPUTE_QUERY_GET_MODE__MASK 0x00000003 +#define NVE4_COMPUTE_QUERY_GET_MODE__SHIFT 0 +#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE 0x00000000 +#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE_INTR_NRHOST 0x00000003 +#define NVE4_COMPUTE_QUERY_GET_INTR 0x00100000 +#define NVE4_COMPUTE_QUERY_GET_SHORT 0x10000000 + +#define NVE4_COMPUTE_TEX_CB_INDEX 0x00002608 + +#define NVE4_COMPUTE_UNK260C 0x0000260c + +#define NVE4_COMPUTE_MP_PM_SET(i0) (0x0000335c + 0x4*(i0)) +#define NVE4_COMPUTE_MP_PM_SET__ESIZE 0x00000004 +#define NVE4_COMPUTE_MP_PM_SET__LEN 0x00000008 + +#define NVE4_COMPUTE_MP_PM_A_SIGSEL(i0) (0x0000337c + 0x4*(i0)) +#define NVE4_COMPUTE_MP_PM_A_SIGSEL__ESIZE 0x00000004 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL__LEN 0x00000004 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_NONE 0x00000000 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_USER 0x00000001 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_LAUNCH 0x00000003 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_EXEC 0x00000004 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_ISSUE 0x00000005 +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_LDST 0x0000001b +#define NVE4_COMPUTE_MP_PM_A_SIGSEL_BRANCH 0x0000001c + +#define NVE4_COMPUTE_MP_PM_B_SIGSEL(i0) (0x0000338c + 0x4*(i0)) +#define NVE4_COMPUTE_MP_PM_B_SIGSEL__ESIZE 0x00000004 +#define NVE4_COMPUTE_MP_PM_B_SIGSEL__LEN 0x00000004 +#define NVE4_COMPUTE_MP_PM_B_SIGSEL_NONE 0x00000000 +#define NVE4_COMPUTE_MP_PM_B_SIGSEL_WARP 0x00000002 +#define NVE4_COMPUTE_MP_PM_B_SIGSEL_REPLAY 0x00000008 +#define NVE4_COMPUTE_MP_PM_B_SIGSEL_TRANSACTION 0x0000000e +#define NVE4_COMPUTE_MP_PM_B_SIGSEL_L1 0x00000010 +#define NVE4_COMPUTE_MP_PM_B_SIGSEL_MEM 0x00000011 + +#define NVE4_COMPUTE_MP_PM_SRCSEL(i0) (0x0000339c + 0x4*(i0)) +#define NVE4_COMPUTE_MP_PM_SRCSEL__ESIZE 0x00000004 +#define NVE4_COMPUTE_MP_PM_SRCSEL__LEN 0x00000008 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP0__MASK 0x00000003 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP0__SHIFT 0 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG0__MASK 0x0000001c +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG0__SHIFT 2 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP1__MASK 0x00000060 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP1__SHIFT 5 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG1__MASK 0x00000380 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG1__SHIFT 7 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP2__MASK 0x00000c00 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP2__SHIFT 10 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG2__MASK 0x00007000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG2__SHIFT 12 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP3__MASK 0x00018000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP3__SHIFT 15 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG3__MASK 0x000e0000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG3__SHIFT 17 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP4__MASK 0x00300000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP4__SHIFT 20 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG4__MASK 0x01c00000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG4__SHIFT 22 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP5__MASK 0x06000000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP5__SHIFT 25 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG5__MASK 0x38000000 +#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG5__SHIFT 27 + +#define NVE4_COMPUTE_MP_PM_FUNC(i0) (0x000033bc + 0x4*(i0)) +#define NVE4_COMPUTE_MP_PM_FUNC__ESIZE 0x00000004 +#define NVE4_COMPUTE_MP_PM_FUNC__LEN 0x00000008 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE__MASK 0x0000000f +#define NVE4_COMPUTE_MP_PM_FUNC_MODE__SHIFT 0 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP 0x00000000 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP_PULSE 0x00000001 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_B6 0x00000002 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK3 0x00000003 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP_B6 0x00000004 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP_B6_PULSE 0x00000005 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK6 0x00000006 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK7 0x00000007 +#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK8 0x00000008 +#define NVE4_COMPUTE_MP_PM_FUNC_FUNC__MASK 0x000ffff0 +#define NVE4_COMPUTE_MP_PM_FUNC_FUNC__SHIFT 4 + +#define NVE4_COMPUTE_MP_PM_UNK33DC 0x000033dc + +#define NVE4_COMPUTE_LAUNCH_DESC__SIZE 0x00000100 +#define NVE4_COMPUTE_LAUNCH_DESC_6 0x00000018 +#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__MASK 0x00000c00 +#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__SHIFT 10 + +#define NVE4_COMPUTE_LAUNCH_DESC_PROG_START 0x00000020 + +#define NVE4_COMPUTE_LAUNCH_DESC_12 0x00000030 +#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__MASK 0x7fffffff +#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__SHIFT 0 + +#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ 0x00000034 +#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__MASK 0x0000ffff +#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__SHIFT 0 +#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__MASK 0xffff0000 +#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__SHIFT 16 + +#define NVE4_COMPUTE_LAUNCH_DESC_17 0x00000044 +#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__MASK 0x0000ffff +#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__SHIFT 0 + +#define NVE4_COMPUTE_LAUNCH_DESC_18 0x00000048 +#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__MASK 0xffff0000 +#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__SHIFT 16 + +#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ 0x0000004c +#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__MASK 0x0000ffff +#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__SHIFT 0 +#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__MASK 0xffff0000 +#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__SHIFT 16 + +#define NVE4_COMPUTE_LAUNCH_DESC_20 0x00000050 +#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__MASK 0x000000ff +#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__SHIFT 0 +#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__MASK 0x60000000 +#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__SHIFT 29 +#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_16K_SHARED_48K_L1 0x20000000 +#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_32K_SHARED_32K_L1 0x40000000 +#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_48K_SHARED_16K_L1 0x60000000 + +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0(i0) (0x00000074 + 0x8*(i0)) +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__ESIZE 0x00000008 +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__LEN 0x00000008 +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__MASK 0xffffffff +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__SHIFT 0 + +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1(i0) (0x00000078 + 0x8*(i0)) +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__ESIZE 0x00000008 +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__LEN 0x00000008 +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__MASK 0x000000ff +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__SHIFT 0 +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__MASK 0xffff8000 +#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__SHIFT 15 + +#define NVE4_COMPUTE_LAUNCH_DESC_45 0x000000b4 +#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__MASK 0x000fffff +#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__SHIFT 0 +#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__MASK 0xf8000000 +#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__SHIFT 27 + +#define NVE4_COMPUTE_LAUNCH_DESC_46 0x000000b8 +#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__MASK 0x000fffff +#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__SHIFT 0 +#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__MASK 0x3f000000 +#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__SHIFT 24 + +#define NVE4_COMPUTE_LAUNCH_DESC_47 0x000000bc +#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__MASK 0x000fffff +#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__SHIFT 0 + + +#endif /* NVE4_COMPUTE_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h b/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h new file mode 100644 index 00000000000..68a742fadfe --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h @@ -0,0 +1,107 @@ +#ifndef RNNDB_NVE4_P2MF_XML +#define RNNDB_NVE4_P2MF_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- rnndb/nve4_p2mf.xml ( 1400 bytes, from 2012-04-14 21:29:11) +- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv_object.xml ( 12736 bytes, from 2012-04-14 21:30:24) +- ./rnndb/nvchipsets.xml ( 3701 bytes, from 2012-03-22 20:40:59) +- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12) + +Copyright (C) 2006-2012 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- imirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVE4_P2MF_LINE_LENGTH_IN 0x00000180 + +#define NVE4_P2MF_LINE_COUNT 0x00000184 + +#define NVE4_P2MF_DST_ADDRESS_HIGH 0x00000188 + +#define NVE4_P2MF_DST_ADDRESS_LOW 0x0000018c + +#define NVE4_P2MF_DST_TILE_MODE 0x00000194 + +#define NVE4_P2MF_DST_PITCH 0x00000198 + +#define NVE4_P2MF_DST_HEIGHT 0x0000019c + +#define NVE4_P2MF_DST_DEPTH 0x000001a0 + +#define NVE4_P2MF_DST_Z 0x000001a4 + +#define NVE4_P2MF_DST_X 0x000001a8 + +#define NVE4_P2MF_DST_Y 0x000001ac + +#define NVE4_P2MF_EXEC 0x000001b0 +#define NVE4_P2MF_EXEC_LINEAR 0x00000001 +#define NVE4_P2MF_EXEC_UNK12 0x00001000 + +#define NVE4_P2MF_DATA 0x000001b4 + + +#endif /* RNNDB_NVE4_P2MF_XML */ |