summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nvc0
diff options
context:
space:
mode:
authorChristian König <[email protected]>2011-02-24 22:02:42 +0100
committerChristian König <[email protected]>2011-02-24 22:02:42 +0100
commitb922a0ce12916a91cfc3e56714913fcf63279ff2 (patch)
treee24fa039925220882d155ccb987f7914e83f4372 /src/gallium/drivers/nvc0
parentf013b4f8f1329982727691a55cc263e3011d02bf (diff)
parentc0ad70ae31ee5501281b434d56e389fc92b13a3a (diff)
Merge remote branch 'origin/master' into pipe-video
Conflicts: configure.ac src/gallium/auxiliary/Makefile src/gallium/auxiliary/SConscript src/gallium/drivers/r600/r600_asm.c src/gallium/drivers/r600/r600_asm.h src/gallium/drivers/r600/r600_shader.c src/gallium/drivers/r600/r600_state_inlines.h src/gallium/drivers/r600/r600_texture.c
Diffstat (limited to 'src/gallium/drivers/nvc0')
-rw-r--r--src/gallium/drivers/nvc0/nvc0_3d.xml.h130
-rw-r--r--src/gallium/drivers/nvc0/nvc0_buffer.c25
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.c32
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.h12
-rw-r--r--src/gallium/drivers/nvc0/nvc0_fence.c37
-rw-r--r--src/gallium/drivers/nvc0/nvc0_fence.h3
-rw-r--r--src/gallium/drivers/nvc0/nvc0_miptree.c6
-rw-r--r--src/gallium/drivers/nvc0/nvc0_mm.c4
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc.c35
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc.h19
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_emit.c76
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_optimize.c370
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_print.c40
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_regalloc.c276
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.c45
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.h5
-rw-r--r--src/gallium/drivers/nvc0/nvc0_push.c4
-rw-r--r--src/gallium/drivers/nvc0/nvc0_query.c2
-rw-r--r--src/gallium/drivers/nvc0/nvc0_resource.h8
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.c27
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.h14
-rw-r--r--src/gallium/drivers/nvc0/nvc0_shader_state.c73
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state.c153
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state_validate.c155
-rw-r--r--src/gallium/drivers/nvc0/nvc0_stateobj.h11
-rw-r--r--src/gallium/drivers/nvc0/nvc0_surface.c46
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tex.c7
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c141
-rw-r--r--src/gallium/drivers/nvc0/nvc0_transfer.c44
-rw-r--r--src/gallium/drivers/nvc0/nvc0_vbo.c95
-rw-r--r--src/gallium/drivers/nvc0/nvc0_winsys.h2
31 files changed, 1292 insertions, 605 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
index 61932ff2b6a..73a605f94e1 100644
--- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h
+++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
@@ -84,6 +84,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210
+#define NVC0_3D_MEM_BARRIER 0x0000021c
+#define NVC0_3D_MEM_BARRIER_UNK0 0x00000001
+#define NVC0_3D_MEM_BARRIER_UNK1 0x00000002
+#define NVC0_3D_MEM_BARRIER_UNK2 0x00000004
+#define NVC0_3D_MEM_BARRIER_UNK4 0x00000010
+#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100
+#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000
+
#define NVC0_3D_TESS_MODE 0x00000320
#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f
#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0
@@ -122,11 +130,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0))
-#define NVC0_3D_TFB_UNK0700(i0) (0x00000700 + 0x10*(i0))
+#define NVC0_3D_TFB_UNK07X0(i0) (0x00000700 + 0x10*(i0))
+#define NVC0_3D_TFB_UNK07X0__ESIZE 0x00000010
+#define NVC0_3D_TFB_UNK07X0__LEN 0x00000004
#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0))
+#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010
+#define NVC0_3D_TFB_VARYING_COUNT__LEN 0x00000004
#define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0))
+#define NVC0_3D_TFB_BUFFER_STRIDE__ESIZE 0x00000010
+#define NVC0_3D_TFB_BUFFER_STRIDE__LEN 0x00000004
#define NVC0_3D_TFB_ENABLE 0x00000744
@@ -144,29 +158,29 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_RT__ESIZE 0x00000020
#define NVC0_3D_RT__LEN 0x00000008
-#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x20*(i0))
+#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x40*(i0))
-#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x20*(i0))
+#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x40*(i0))
-#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x20*(i0))
+#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x40*(i0))
-#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x20*(i0))
+#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x40*(i0))
-#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x20*(i0))
+#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x40*(i0))
-#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x20*(i0))
+#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x40*(i0))
#define NVC0_3D_RT_TILE_MODE_UNK0 0x00000001
#define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070
#define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4
#define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700
#define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8
-#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x20*(i0))
+#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x40*(i0))
#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff
#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0
#define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000
-#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x20*(i0))
+#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x40*(i0))
#define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0))
#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020
@@ -216,21 +230,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010
#define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ(i0) (0x00000d00 + 0x8*(i0))
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ__ESIZE 0x00000008
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ__LEN 0x00000008
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__MASK 0x0000ffff
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__SHIFT 0
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__MASK 0xffff0000
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__SHIFT 16
-
-#define NVC0_3D_VIEWPORT_CLIP_VERT(i0) (0x00000d04 + 0x8*(i0))
-#define NVC0_3D_VIEWPORT_CLIP_VERT__ESIZE 0x00000008
-#define NVC0_3D_VIEWPORT_CLIP_VERT__LEN 0x00000008
-#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__MASK 0x0000ffff
-#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__SHIFT 0
-#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__MASK 0xffff0000
-#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__SHIFT 16
+#define NVC0_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0))
+#define NVC0_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008
+#define NVC0_3D_CLIP_RECT_HORIZ__LEN 0x00000008
+#define NVC0_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff
+#define NVC0_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0
+#define NVC0_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000
+#define NVC0_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16
+
+#define NVC0_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0))
+#define NVC0_3D_CLIP_RECT_VERT__ESIZE 0x00000008
+#define NVC0_3D_CLIP_RECT_VERT__LEN 0x00000008
+#define NVC0_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff
+#define NVC0_3D_CLIP_RECT_VERT_MIN__SHIFT 0
+#define NVC0_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000
+#define NVC0_3D_CLIP_RECT_VERT_MAX__SHIFT 16
#define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0))
#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008
@@ -356,6 +370,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff
#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0
+#define NVC0_3D_CLEAR_FLAGS 0x000010f8
+#define NVC0_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001
+#define NVC0_3D_CLEAR_FLAGS_UNK4 0x00000010
+#define NVC0_3D_CLEAR_FLAGS_SCISSOR 0x00000100
+#define NVC0_3D_CLEAR_FLAGS_VIEWPORT 0x00001000
+
#define NVC0_3D_VERTEX_ID 0x00001118
#define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c
@@ -561,7 +581,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004
#define NVC0_3D_BLEND_ENABLE__LEN 0x00000008
-#define NVC0_3D_STENCIL_FRONT_ENABLE 0x00001380
+#define NVC0_3D_STENCIL_ENABLE 0x00001380
#define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384
#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
@@ -605,9 +625,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394
-#define NVC0_3D_STENCIL_FRONT_MASK 0x00001398
+#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x00001398
-#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c
+#define NVC0_3D_STENCIL_FRONT_MASK 0x0000139c
#define NVC0_3D_DRAW_TFB_BASE 0x000013a4
@@ -642,6 +662,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_CLIPID_HEIGHT 0x00001504
#define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ 0x00001508
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16
+
+#define NVC0_3D_CLIPID_FILL_RECT_VERT 0x0000150c
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16
+
#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510
#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001
#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002
@@ -814,8 +846,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_VERTEX_BASE_LOW 0x000015f8
#define NVC0_3D_POINT_COORD_REPLACE 0x00001604
-#define NVC0_3D_POINT_COORD_REPLACE_BITS__MASK 0x00001fff
-#define NVC0_3D_POINT_COORD_REPLACE_BITS__SHIFT 0
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__MASK 0x00000004
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__SHIFT 2
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT 0x00000000
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT 0x00000004
+#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__MASK 0x000007f8
+#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__SHIFT 3
#define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608
@@ -864,8 +900,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000
#define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001
-#define NVC0_3D_POINT_SPRITE_CTRL 0x00001660
-
#define NVC0_3D_TEX_MISC 0x00001664
#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004
@@ -928,22 +962,28 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c
#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001
-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002
-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004
-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK3 0x00000008
-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK4 0x00000010
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK 0x00000006
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT 1
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0 0x00000000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 0x00000002
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK2 0x00000004
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010
#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080
#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400
#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800
-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12 0x00001000
-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13 0x00002000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000
-#define NVC0_3D_VIEWPORT_CLIP_RECTS_EN 0x0000194c
+#define NVC0_3D_CLIP_RECTS_EN 0x0000194c
-#define NVC0_3D_VIEWPORT_CLIP_MODE 0x00001950
-#define NVC0_3D_VIEWPORT_CLIP_MODE_INSIDE_ANY 0x00000000
-#define NVC0_3D_VIEWPORT_CLIP_MODE_OUTSIDE_ALL 0x00000001
-#define NVC0_3D_VIEWPORT_CLIP_MODE_NEVER 0x00000002
+#define NVC0_3D_CLIP_RECTS_MODE 0x00001950
+#define NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000
+#define NVC0_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001
+#define NVC0_3D_CLIP_RECTS_MODE_NEVER 0x00000002
#define NVC0_3D_FP_ZORDER_CTRL 0x0000196c
#define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001
@@ -996,6 +1036,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00
#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10
+#define NVC0_3D_CLIPID_FILL 0x000019d4
+
#define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0))
#define NVC0_3D_COLOR_MASK__ESIZE 0x00000004
#define NVC0_3D_COLOR_MASK__LEN 0x00000008
@@ -1155,9 +1197,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600
-#define NVC0_3D_TFB_VARYING_LOCS(i0) (0x00002800 + 0x4*(i0))
+#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1))
#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004
-#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000080
+#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020
#define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808
diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c
index ea3e642a448..aa949bdfa36 100644
--- a/src/gallium/drivers/nvc0/nvc0_buffer.c
+++ b/src/gallium/drivers/nvc0/nvc0_buffer.c
@@ -59,15 +59,23 @@ release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence)
(*mm) = NULL;
}
-static INLINE boolean
-nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf,
- unsigned domain)
+INLINE void
+nvc0_buffer_release_gpu_storage(struct nvc0_resource *buf)
{
nouveau_bo_ref(NULL, &buf->bo);
if (buf->mm)
release_allocation(&buf->mm, buf->fence);
+ buf->domain = 0;
+}
+
+static INLINE boolean
+nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf,
+ unsigned domain)
+{
+ nvc0_buffer_release_gpu_storage(buf);
+
return nvc0_buffer_allocate(screen, buf, domain);
}
@@ -77,10 +85,7 @@ nvc0_buffer_destroy(struct pipe_screen *pscreen,
{
struct nvc0_resource *res = nvc0_resource(presource);
- nouveau_bo_ref(NULL, &res->bo);
-
- if (res->mm)
- release_allocation(&res->mm, res->fence);
+ nvc0_buffer_release_gpu_storage(res);
if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY))
FREE(res->data);
@@ -112,7 +117,7 @@ nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf,
memcpy(buf->data + start, bounce->map, size);
nouveau_bo_unmap(bounce);
- buf->status &= ~NVC0_BUFFER_STATUS_DIRTY;
+ buf->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING;
nouveau_bo_ref(NULL, &bounce);
if (mm)
@@ -151,7 +156,7 @@ nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf,
release_allocation(&mm, nvc0->screen->fence.current);
if (start == 0 && size == buf->base.width0)
- buf->status &= ~NVC0_BUFFER_STATUS_DIRTY;
+ buf->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING;
return TRUE;
}
@@ -174,7 +179,7 @@ nvc0_buffer_transfer_get(struct pipe_context *pipe,
if (buf->domain == NOUVEAU_BO_VRAM) {
if (usage & PIPE_TRANSFER_READ) {
- if (buf->status & NVC0_BUFFER_STATUS_DIRTY)
+ if (buf->status & NVC0_BUFFER_STATUS_GPU_WRITING)
nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0);
}
}
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index 2118abb5d5d..f02de4d044a 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -41,17 +41,18 @@ nvc0_flush(struct pipe_context *pipe, unsigned flags,
OUT_RING (chan, 0);
BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1);
OUT_RING (chan, 0x00);
+ } else
+ if ((flags & PIPE_FLUSH_RENDER_CACHE) && !(flags & PIPE_FLUSH_FRAME)) {
+ BEGIN_RING(chan, RING_3D(SERIALIZE), 1);
+ OUT_RING (chan, 0);
}
- if (fence) {
- nvc0_screen_fence_new(nvc0->screen, (struct nvc0_fence **)fence, TRUE);
- }
+ if (fence)
+ nvc0_fence_reference((struct nvc0_fence **)fence,
+ nvc0->screen->fence.current);
- if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) {
+ if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME))
FIRE_RING(chan);
-
- nvc0_screen_fence_next(nvc0->screen);
- }
}
static void
@@ -67,6 +68,16 @@ nvc0_destroy(struct pipe_context *pipe)
FREE(nvc0);
}
+void
+nvc0_default_flush_notify(struct nouveau_channel *chan)
+{
+ struct nvc0_context *nvc0 = chan->user_private;
+
+ nvc0_screen_fence_update(nvc0->screen, TRUE);
+
+ nvc0_screen_fence_next(nvc0->screen);
+}
+
struct pipe_context *
nvc0_create(struct pipe_screen *pscreen, void *priv)
{
@@ -91,6 +102,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
nvc0->pipe.flush = nvc0_flush;
screen->base.channel->user_private = nvc0;
+ screen->base.channel->flush_notify = nvc0_default_flush_notify;
nvc0_init_query_functions(nvc0);
nvc0_init_surface_functions(nvc0);
@@ -148,12 +160,14 @@ nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0)
{
struct resident *rsd;
struct util_dynarray *array;
- unsigned ctx, i;
+ unsigned ctx, i, n;
for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) {
array = &nvc0->residents[ctx];
- for (i = 0; i < array->size / sizeof(struct resident); ++i) {
+ n = array->size / sizeof(struct resident);
+ MARK_RING(nvc0->screen->base.channel, n, n);
+ for (i = 0; i < n; ++i) {
rsd = util_dynarray_element(array, struct resident, i);
nvc0_resource_validate(rsd->res, rsd->flags);
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index 94117988e50..1ce5554f7b7 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -54,6 +54,8 @@
#define NVC0_NEW_CONSTBUF (1 << 18)
#define NVC0_NEW_TEXTURES (1 << 19)
#define NVC0_NEW_SAMPLERS (1 << 20)
+#define NVC0_NEW_TFB (1 << 21)
+#define NVC0_NEW_TFB_BUFFERS (1 << 22)
#define NVC0_BUFCTX_CONSTANT 0
#define NVC0_BUFCTX_FRAME 1
@@ -79,6 +81,7 @@ struct nvc0_context {
uint8_t num_vtxelts;
uint8_t num_textures[5];
uint8_t num_samplers[5];
+ uint8_t tls_required; /* bitmask of shader types using l[] */
uint16_t scissor;
uint32_t uniform_buffer_bound[5];
} state;
@@ -123,6 +126,11 @@ struct nvc0_context {
boolean vbo_dirty;
boolean vbo_push_hint;
+ struct nvc0_transform_feedback_state *tfb;
+ struct pipe_resource *tfbbuf[4];
+ unsigned num_tfbbufs;
+ unsigned tfb_offset[4];
+
struct draw_context *draw;
};
@@ -149,6 +157,8 @@ nvc0_surface(struct pipe_surface *ps)
/* nvc0_context.c */
struct pipe_context *nvc0_create(struct pipe_screen *, void *);
+void nvc0_default_flush_notify(struct nouveau_channel *);
+
void nvc0_bufctx_emit_relocs(struct nvc0_context *);
void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx,
struct nvc0_resource *, uint32_t flags);
@@ -177,6 +187,8 @@ void nvc0_tevlprog_validate(struct nvc0_context *);
void nvc0_gmtyprog_validate(struct nvc0_context *);
void nvc0_fragprog_validate(struct nvc0_context *);
+void nvc0_tfb_validate(struct nvc0_context *);
+
/* nvc0_state.c */
extern void nvc0_init_state_functions(struct nvc0_context *);
diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c
index 9d2c48cf14d..f2d4b1451bf 100644
--- a/src/gallium/drivers/nvc0/nvc0_fence.c
+++ b/src/gallium/drivers/nvc0/nvc0_fence.c
@@ -55,6 +55,7 @@ nvc0_fence_emit(struct nvc0_fence *fence)
assert(fence->state == NVC0_FENCE_STATE_AVAILABLE);
+ MARK_RING (chan, 5, 2);
BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
@@ -83,7 +84,8 @@ nvc0_fence_del(struct nvc0_fence *fence)
struct nvc0_fence *it;
struct nvc0_screen *screen = fence->screen;
- if (fence->state == NVC0_FENCE_STATE_EMITTED) {
+ if (fence->state == NVC0_FENCE_STATE_EMITTED ||
+ fence->state == NVC0_FENCE_STATE_FLUSHED) {
if (fence == screen->fence.head) {
screen->fence.head = fence->next;
if (!screen->fence.head)
@@ -118,8 +120,8 @@ nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence)
fence->buffers = NULL;
}
-static void
-nvc0_screen_fence_update(struct nvc0_screen *screen)
+void
+nvc0_screen_fence_update(struct nvc0_screen *screen, boolean flushed)
{
struct nvc0_fence *fence;
struct nvc0_fence *next = NULL;
@@ -146,38 +148,43 @@ nvc0_screen_fence_update(struct nvc0_screen *screen)
screen->fence.head = next;
if (!next)
screen->fence.tail = NULL;
-}
-#define NVC0_FENCE_MAX_SPINS (1 << 17)
+ if (flushed) {
+ for (fence = next; fence; fence = fence->next)
+ fence->state = NVC0_FENCE_STATE_FLUSHED;
+ }
+}
boolean
nvc0_fence_signalled(struct nvc0_fence *fence)
{
struct nvc0_screen *screen = fence->screen;
- if (fence->state == NVC0_FENCE_STATE_EMITTED)
- nvc0_screen_fence_update(screen);
+ if (fence->state >= NVC0_FENCE_STATE_EMITTED)
+ nvc0_screen_fence_update(screen, FALSE);
return fence->state == NVC0_FENCE_STATE_SIGNALLED;
}
+#define NVC0_FENCE_MAX_SPINS (1 << 31)
+
boolean
nvc0_fence_wait(struct nvc0_fence *fence)
{
struct nvc0_screen *screen = fence->screen;
- int spins = 0;
+ uint32_t spins = 0;
- if (fence->state == NVC0_FENCE_STATE_AVAILABLE) {
+ if (fence->state < NVC0_FENCE_STATE_EMITTED) {
nvc0_fence_emit(fence);
- FIRE_RING(screen->base.channel);
-
if (fence == screen->fence.current)
nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
}
+ if (fence->state < NVC0_FENCE_STATE_FLUSHED)
+ FIRE_RING(screen->base.channel);
do {
- nvc0_screen_fence_update(screen);
+ nvc0_screen_fence_update(screen, FALSE);
if (fence->state == NVC0_FENCE_STATE_SIGNALLED)
return TRUE;
@@ -188,8 +195,9 @@ nvc0_fence_wait(struct nvc0_fence *fence)
#endif
} while (spins < NVC0_FENCE_MAX_SPINS);
- if (spins > 9000)
- NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence);
+ debug_printf("Wait on fence %u (ack = %u, next = %u) timed out !\n",
+ fence->sequence,
+ screen->fence.sequence_ack, screen->fence.sequence);
return FALSE;
}
@@ -199,5 +207,4 @@ nvc0_screen_fence_next(struct nvc0_screen *screen)
{
nvc0_fence_emit(screen->fence.current);
nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
- nvc0_screen_fence_update(screen);
}
diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h
index e63c164bda4..3d8c3f8ba60 100644
--- a/src/gallium/drivers/nvc0/nvc0_fence.h
+++ b/src/gallium/drivers/nvc0/nvc0_fence.h
@@ -7,7 +7,8 @@
#define NVC0_FENCE_STATE_AVAILABLE 0
#define NVC0_FENCE_STATE_EMITTED 1
-#define NVC0_FENCE_STATE_SIGNALLED 2
+#define NVC0_FENCE_STATE_FLUSHED 2
+#define NVC0_FENCE_STATE_SIGNALLED 3
struct nvc0_mm_allocation;
diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c
index 7c7e134146e..ea3ed9e0225 100644
--- a/src/gallium/drivers/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nvc0/nvc0_miptree.c
@@ -143,8 +143,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
switch (pt->format) {
case PIPE_FORMAT_Z16_UNORM:
tile_flags = 0x0700; /* COMPRESSED */
- tile_flags = 0x0200; /* NORMAL ? */
- tile_flags = 0x0100; /* NORMAL ? */
+ tile_flags = 0x0100; /* NORMAL */
break;
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
tile_flags = 0x5300; /* MSAA 4, COMPRESSED */
@@ -170,6 +169,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
break;
case PIPE_FORMAT_R16G16B16A16_UNORM:
tile_flags = 0xe900; /* COMPRESSED */
+ tile_flags = 0xfe00; /* NORMAL */
break;
default:
tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */
@@ -283,7 +283,7 @@ nvc0_miptree_surface_new(struct pipe_context *pipe,
pipe_reference_init(&ps->reference, 1);
pipe_resource_reference(&ps->texture, pt);
ps->context = pipe;
- ps->format = pt->format;
+ ps->format = templ->format;
ps->usage = templ->usage;
ps->u.tex.level = templ->u.tex.level;
ps->u.tex.first_layer = templ->u.tex.first_layer;
diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c
index 0629dad19c9..516d2e31b55 100644
--- a/src/gallium/drivers/nvc0/nvc0_mm.c
+++ b/src/gallium/drivers/nvc0/nvc0_mm.c
@@ -96,13 +96,13 @@ mm_bucket_by_size(struct nvc0_mman *cache, unsigned size)
static INLINE uint32_t
mm_default_slab_size(unsigned chunk_order)
{
- assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER);
-
static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] =
{
12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22
};
+ assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER);
+
return 1 << slab_order[chunk_order - MM_MIN_ORDER];
}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c
index 304a1919768..f51d289e8cd 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc.c
@@ -44,6 +44,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s,
if (ld->indirect >= 0)
return FALSE;
+ /* a few ops can use g[] sources directly, but we don't support g[] yet */
+ if (ld->src[0]->value->reg.file == NV_FILE_MEM_L ||
+ ld->src[0]->value->reg.file == NV_FILE_MEM_G)
+ return FALSE;
+
for (i = 0; i < 3 && nvi->src[i]; ++i)
if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
return FALSE;
@@ -55,15 +60,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s,
boolean
nvc0_insn_is_predicateable(struct nv_instruction *nvi)
{
- int s;
-
- if (!nv_op_predicateable(nvi->opcode))
+ if (nvi->predicate >= 0) /* already predicated */
return FALSE;
- if (nvi->predicate >= 0)
+ if (!nvc0_op_info_table[nvi->opcode].predicate &&
+ !nvc0_op_info_table[nvi->opcode].pseudo)
return FALSE;
- for (s = 0; s < 4 && nvi->src[s]; ++s)
- if (nvi->src[s]->value->reg.file == NV_FILE_IMM)
- return FALSE;
return TRUE;
}
@@ -103,6 +104,12 @@ nvc0_pc_replace_value(struct nv_pc *pc,
return n;
}
+static INLINE boolean
+is_gpr63(struct nv_value *val)
+{
+ return (val->reg.file == NV_FILE_GPR && val->reg.id == 63);
+}
+
struct nv_value *
nvc0_pc_find_constant(struct nv_ref *ref)
{
@@ -116,7 +123,7 @@ nvc0_pc_find_constant(struct nv_ref *ref)
assert(!src->insn->src[0]->mod);
src = src->insn->src[0]->value;
}
- if ((src->reg.file == NV_FILE_IMM) ||
+ if ((src->reg.file == NV_FILE_IMM) || is_gpr63(src) ||
(src->insn &&
src->insn->opcode == NV_OP_LD &&
src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
@@ -130,7 +137,7 @@ nvc0_pc_find_immediate(struct nv_ref *ref)
{
struct nv_value *src = nvc0_pc_find_constant(ref);
- return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
+ return (src && (src->reg.file == NV_FILE_IMM || is_gpr63(src))) ? src : NULL;
}
static void
@@ -187,7 +194,10 @@ nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f,
bb[p++] = b->out[j];
break;
case CFG_EDGE_LOOP_LEAVE:
- bbb[pp++] = b->out[j];
+ if (!b->out[j]->priv) {
+ bbb[pp++] = b->out[j];
+ b->out[j]->priv = 1;
+ }
break;
default:
assert(0);
@@ -499,6 +509,9 @@ nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i)
i->bb = b;
b->num_instructions++;
+
+ if (i->prev && i->prev->terminator)
+ nvc0_insns_permute(i->prev, i);
}
void
@@ -512,6 +525,8 @@ nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
ni->prev = at;
ni->next->prev = ni;
ni->prev->next = ni;
+ ni->bb = at->bb;
+ ni->bb->num_instructions++;
}
void
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h
index 969cc68c596..efa073a9201 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc.h
+++ b/src/gallium/drivers/nvc0/nvc0_pc.h
@@ -53,7 +53,8 @@
/**
* BIND forces source operand i into the same register as destination operand i,
- * and the operands will be assigned consecutive registers (needed for TEX)
+ * and the operands will be assigned consecutive registers (needed for TEX).
+ * Beware conflicts !
* SELECT forces its multiple source operands and its destination operand into
* one and the same register.
*/
@@ -204,6 +205,10 @@
#define NV_CC_C 0x11
#define NV_CC_A 0x12
#define NV_CC_S 0x13
+#define NV_CC_INVERSE(cc) ((cc) ^ 0x7)
+/* for 1 bit predicates: */
+#define NV_CC_P 0
+#define NV_CC_NOT_P 1
#define NV_PC_MAX_INSTRUCTIONS 2048
#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
@@ -259,12 +264,6 @@ nv_op_supported_src_mods(uint opcode)
return nvc0_op_info_table[opcode].mods;
}
-static INLINE boolean
-nv_op_predicateable(uint opcode)
-{
- return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE;
-}
-
static INLINE uint
nv_type_order(ubyte type)
{
@@ -310,7 +309,7 @@ struct nv_reg {
int32_t s32;
int64_t s64;
uint64_t u64;
- uint32_t u32;
+ uint32_t u32; /* expected to be 0 for $r63 */
float f32;
double f64;
} imm;
@@ -344,6 +343,8 @@ struct nv_ref {
uint8_t flags;
};
+#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0)
+
struct nv_basic_block;
struct nv_instruction {
@@ -485,7 +486,7 @@ nv_alloc_instruction(struct nv_pc *pc, uint opcode)
assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
insn->opcode = opcode;
- insn->cc = 0;
+ insn->cc = NV_CC_P;
insn->indirect = -1;
insn->predicate = -1;
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
index db8055d91cd..c10f920e6f1 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
@@ -236,7 +236,7 @@ emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op)
*/
pc->emit[0] |= (pcrel & 0x3f) << 26;
- pc->emit[1] |= (pcrel >> 6) & 0x1ffff;
+ pc->emit[1] |= (pcrel >> 6) & 0x3ffff;
}
}
@@ -393,6 +393,8 @@ emit_tex(struct nv_pc *pc, struct nv_instruction *i)
{
int src1 = i->tex_array + i->tex_dim + i->tex_cube;
+ assert(src1 < 6);
+
pc->emit[0] = 0x00000086;
pc->emit[1] = 0x80000000;
@@ -446,7 +448,7 @@ emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op)
pc->emit[0] |= op << 26;
- if (op >= 4) {
+ if (op >= 3) {
if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9;
if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7;
} else {
@@ -477,6 +479,7 @@ emit_ddx(struct nv_pc *pc, struct nv_instruction *i)
{
i->quadop = 0x99;
i->lanes = 4;
+ i->src[1] = i->src[0];
emit_quadop(pc, i);
}
@@ -485,6 +488,7 @@ emit_ddy(struct nv_pc *pc, struct nv_instruction *i)
{
i->quadop = 0xa5;
i->lanes = 5;
+ i->src[1] = i->src[0];
emit_quadop(pc, i);
}
@@ -629,25 +633,28 @@ emit_slct(struct nv_pc *pc, struct nv_instruction *i)
static void
emit_cvt(struct nv_pc *pc, struct nv_instruction *i)
{
+ uint32_t rint;
+
pc->emit[0] = 0x00000004;
pc->emit[1] = 0x10000000;
- if (i->opcode != NV_OP_CVT)
+ /* if no type conversion specified, get type from opcode */
+ if (i->opcode != NV_OP_CVT && i->ext.cvt.d == i->ext.cvt.s)
i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode);
switch (i->ext.cvt.d) {
case NV_TYPE_F32:
switch (i->ext.cvt.s) {
case NV_TYPE_F32: pc->emit[1] = 0x10000000; break;
- case NV_TYPE_S32: pc->emit[0] |= 0x200;
+ case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */
case NV_TYPE_U32: pc->emit[1] = 0x18000000; break;
}
break;
- case NV_TYPE_S32: pc->emit[0] |= 0x80;
+ case NV_TYPE_S32: pc->emit[0] |= 0x80; /* fall through */
case NV_TYPE_U32:
switch (i->ext.cvt.s) {
case NV_TYPE_F32: pc->emit[1] = 0x14000000; break;
- case NV_TYPE_S32: pc->emit[0] |= 0x200;
+ case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */
case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break;
}
break;
@@ -656,14 +663,20 @@ emit_cvt(struct nv_pc *pc, struct nv_instruction *i)
break;
}
- if (i->opcode == NV_OP_FLOOR)
- pc->emit[1] |= 0x00020000;
- else
- if (i->opcode == NV_OP_CEIL)
- pc->emit[1] |= 0x00040000;
- else
- if (i->opcode == NV_OP_TRUNC)
- pc->emit[1] |= 0x00060000;
+ rint = (i->ext.cvt.d == NV_TYPE_F32) ? 1 << 7 : 0;
+
+ if (i->opcode == NV_OP_FLOOR) {
+ pc->emit[0] |= rint;
+ pc->emit[1] |= 2 << 16;
+ } else
+ if (i->opcode == NV_OP_CEIL) {
+ pc->emit[0] |= rint;
+ pc->emit[1] |= 4 << 16;
+ } else
+ if (i->opcode == NV_OP_TRUNC) {
+ pc->emit[0] |= rint;
+ pc->emit[1] |= 6 << 16;
+ }
if (i->saturate || i->opcode == NV_OP_SAT)
pc->emit[0] |= 0x20;
@@ -793,11 +806,8 @@ emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i)
}
static void
-emit_ld_const(struct nv_pc *pc, struct nv_instruction *i)
+emit_ld_common(struct nv_pc *pc, struct nv_instruction *i)
{
- pc->emit[0] = 0x00000006;
- pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10);
-
emit_ldst_size(pc, i);
set_pred(pc, i);
@@ -808,6 +818,15 @@ emit_ld_const(struct nv_pc *pc, struct nv_instruction *i)
}
static void
+emit_ld_const(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000006;
+ pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10);
+
+ emit_ld_common(pc, i);
+}
+
+static void
emit_ld(struct nv_pc *pc, struct nv_instruction *i)
{
if (SFILE(i, 0) >= NV_FILE_MEM_C(0) &&
@@ -818,6 +837,12 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i)
} else {
emit_ld_const(pc, i);
}
+ } else
+ if (SFILE(i, 0) == NV_FILE_MEM_L) {
+ pc->emit[0] = 0x00000005;
+ pc->emit[1] = 0xc0000000;
+
+ emit_ld_common(pc, i);
} else {
NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0));
abort();
@@ -827,8 +852,19 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i)
static void
emit_st(struct nv_pc *pc, struct nv_instruction *i)
{
- NOUVEAU_ERR("emit_st: not handled yet\n");
- abort();
+ if (SFILE(i, 0) != NV_FILE_MEM_L)
+ NOUVEAU_ERR("emit_st(%u): file not handled yet\n", SFILE(i, 0));
+
+ pc->emit[0] = 0x00000005 | (0 << 8); /* write-back caching */
+ pc->emit[1] = 0xc8000000;
+
+ emit_ldst_size(pc, i);
+
+ set_pred(pc, i);
+ set_address_16(pc, i->src[0]);
+
+ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
+ DID(pc, i->src[1]->value, 14);
}
void
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
index acc72bff14c..c5a7367a5fd 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
@@ -99,6 +99,7 @@ inst_removable(struct nv_instruction *nvi)
nvc0_insn_refcount(nvi)));
}
+/* Check if we do not actually have to emit this instruction. */
static INLINE boolean
inst_is_noop(struct nv_instruction *nvi)
{
@@ -141,9 +142,10 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
struct nv_instruction *nvi, *next;
int j;
+ /* find first non-empty block emitted before b */
for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
- if (j >= 0) {
+ for (; j >= 0; --j) {
in = pc->bb_list[j];
/* check for no-op branches (BRA $PC+8) */
@@ -157,6 +159,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
nvc0_insn_delete(in->exit);
}
b->emit_pos = in->emit_pos + in->emit_size;
+
+ if (in->emit_size) /* no more no-op branches to b */
+ break;
}
pc->bb_list[pc->num_blocks++] = b;
@@ -240,10 +245,15 @@ check_swap_src_0_1(struct nv_instruction *nvi)
struct nv_ref *src0 = nvi->src[0];
struct nv_ref *src1 = nvi->src[1];
- if (!nv_op_commutative(nvi->opcode))
+ if (!nv_op_commutative(nvi->opcode) &&
+ NV_BASEOP(nvi->opcode) != NV_OP_SET &&
+ NV_BASEOP(nvi->opcode) != NV_OP_SLCT)
return;
assert(src0 && src1 && src0->value && src1->value);
+ if (src1->value->reg.file != NV_FILE_GPR)
+ return;
+
if (is_cspace_load(src0->value->insn)) {
if (!is_cspace_load(src1->value->insn)) {
nvi->src[0] = src1;
@@ -258,8 +268,13 @@ check_swap_src_0_1(struct nv_instruction *nvi)
}
}
- if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET)
- nvi->set_cond = cc_swapped[nvi->set_cond];
+ if (nvi->src[0] != src0) {
+ if (NV_BASEOP(nvi->opcode) == NV_OP_SET)
+ nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7];
+ else
+ if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT)
+ nvi->set_cond = NV_CC_INVERSE(nvi->set_cond);
+ }
}
static void
@@ -543,6 +558,7 @@ constant_operand(struct nv_pc *pc,
nv_reference(pc, nvi, s, nvi->src[t]->value);
nvi->src[s]->mod = nvi->src[t]->mod;
}
+ break;
case NV_OP_ADD_F32:
if (u.u32 == 0) {
switch (nvi->src[t]->mod) {
@@ -562,6 +578,7 @@ constant_operand(struct nv_pc *pc,
if (nvi->opcode != NV_OP_CVT)
nvi->src[0]->mod = 0;
}
+ break;
case NV_OP_ADD_B32:
if (u.u32 == 0) {
assert(nvi->src[t]->mod == 0);
@@ -582,7 +599,7 @@ constant_operand(struct nv_pc *pc,
} else
if (u.s32 > 0 && u.s32 == (1 << shift)) {
nvi->opcode = NV_OP_SHL;
- (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift;
+ (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.s32 = shift;
nv_reference(pc, nvi, 0, nvi->src[t]->value);
nv_reference(pc, nvi, 1, val);
break;
@@ -590,14 +607,14 @@ constant_operand(struct nv_pc *pc,
break;
case NV_OP_RCP:
u.f32 = 1.0f / u.f32;
- (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+ (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
nvi->opcode = NV_OP_MOV;
assert(s == 0);
nv_reference(pc, nvi, 0, val);
break;
case NV_OP_RSQ:
u.f32 = 1.0f / sqrtf(u.f32);
- (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+ (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
nvi->opcode = NV_OP_MOV;
assert(s == 0);
nv_reference(pc, nvi, 0, val);
@@ -607,25 +624,83 @@ constant_operand(struct nv_pc *pc,
}
}
+static void
+handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi)
+{
+ struct nv_value *src0 = nvi->src[0]->value;
+ struct nv_value *src1 = nvi->src[1]->value;
+
+ if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod))
+ return;
+ if (src0->reg.file != NV_FILE_GPR)
+ return;
+ nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0);
+ nvc0_insn_delete(nvi);
+}
+
+/* check if we can MUL + ADD -> MAD/FMA */
+static void
+handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi)
+{
+ struct nv_value *src0 = nvi->src[0]->value;
+ struct nv_value *src1 = nvi->src[1]->value;
+ struct nv_value *src;
+ int s;
+ uint8_t mod[4];
+
+ if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0;
+ else
+ if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1;
+ else
+ return;
+
+ if ((src0->insn && src0->insn->bb != nvi->bb) ||
+ (src1->insn && src1->insn->bb != nvi->bb))
+ return;
+
+ /* check for immediates from prior constant folding */
+ if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
+ return;
+ src = nvi->src[s]->value;
+
+ mod[0] = nvi->src[0]->mod;
+ mod[1] = nvi->src[1]->mod;
+ mod[2] = src->insn->src[0]->mod;
+ mod[3] = src->insn->src[1]->mod;
+
+ if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
+ return;
+
+ nvi->opcode = NV_OP_MAD_F32;
+
+ nv_reference(ctx->pc, nvi, s, NULL);
+ nvi->src[2] = nvi->src[!s];
+ nvi->src[!s] = NULL;
+
+ nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value);
+ nvi->src[0]->mod = mod[2] ^ mod[s];
+ nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value);
+ nvi->src[1]->mod = mod[3];
+}
+
static int
-nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
+nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b)
{
struct nv_instruction *nvi, *next;
int j;
for (nvi = b->entry; nvi; nvi = next) {
- struct nv_value *src0, *src1, *src;
- int s;
- uint8_t mod[4];
+ struct nv_value *src0, *src1;
+ uint baseop = NV_BASEOP(nvi->opcode);
next = nvi->next;
src0 = nvc0_pc_find_immediate(nvi->src[0]);
src1 = nvc0_pc_find_immediate(nvi->src[1]);
- if (src0 && src1)
+ if (src0 && src1) {
constant_expression(ctx->pc, nvi, src0, src1);
- else {
+ } else {
if (src0)
constant_operand(ctx->pc, nvi, src0, 0);
else
@@ -633,44 +708,13 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
constant_operand(ctx->pc, nvi, src1, 1);
}
- /* check if we can MUL + ADD -> MAD/FMA */
- if (nvi->opcode != NV_OP_ADD)
- continue;
-
- src0 = nvi->src[0]->value;
- src1 = nvi->src[1]->value;
-
- if (SRC_IS_MUL(src0) && src0->refc == 1)
- src = src0;
+ if (baseop == NV_OP_MIN || baseop == NV_OP_MAX)
+ handle_min_max(ctx, nvi);
else
- if (SRC_IS_MUL(src1) && src1->refc == 1)
- src = src1;
- else
- continue;
-
- /* could have an immediate from above constant_* */
- if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
- continue;
- s = (src == src0) ? 0 : 1;
-
- mod[0] = nvi->src[0]->mod;
- mod[1] = nvi->src[1]->mod;
- mod[2] = src->insn->src[0]->mod;
- mod[3] = src->insn->src[0]->mod;
-
- if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
- continue;
-
- nvi->opcode = NV_OP_MAD;
- nv_reference(ctx->pc, nvi, s, NULL);
- nvi->src[2] = nvi->src[!s];
-
- nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
- nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
- nvi->src[0]->mod = mod[2] ^ mod[s];
- nvi->src[1]->mod = mod[3];
+ if (nvi->opcode == NV_OP_ADD_F32)
+ handle_add_mul(ctx, nvi);
}
- DESCEND_ARBITRARY(j, nv_pass_lower_arith);
+ DESCEND_ARBITRARY(j, nv_pass_algebraic_opt);
return 0;
}
@@ -700,8 +744,12 @@ struct pass_reld_elim {
int alloc;
};
+/* Extend the load operation in @rec to also cover the data loaded by @ld.
+ * The two loads may not overlap but reference adjacent memory locations.
+ */
static void
-combine_load(struct mem_record *rec, struct nv_instruction *ld)
+combine_load(struct nv_pc *pc, struct mem_record *rec,
+ struct nv_instruction *ld)
{
struct nv_instruction *fv = rec->insn;
struct nv_value *mem = ld->src[0]->value;
@@ -716,7 +764,7 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld)
return;
rec->ofst = mem->reg.address;
for (j = 0; j < d; ++j)
- fv->def[d + j] = fv->def[j];
+ fv->def[mem->reg.size / 4 + j] = fv->def[j];
d = 0;
} else
if ((size == 8 && rec->ofst & 3) ||
@@ -729,6 +777,9 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld)
fv->def[d++]->insn = fv;
}
+ if (fv->src[0]->value->refc > 1)
+ nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value));
+ fv->src[0]->value->reg.address = rec->ofst;
fv->src[0]->value->reg.size = rec->size = size;
nvc0_insn_delete(ld);
@@ -793,6 +844,7 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
((it->ofst >> 4) == (ofst >> 4)) &&
((it->ofst + it->size == ofst) ||
(it->ofst - mem->reg.size == ofst))) {
+ /* only NV_OP_VFETCH can load exactly 12 bytes */
if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
continue;
if (it->ofst < ofst) {
@@ -808,7 +860,7 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
switch (ld->opcode) {
case NV_OP_EXPORT: combine_export(it, ld); break;
default:
- combine_load(it, ld);
+ combine_load(ctx->pc, it, ld);
break;
}
} else
@@ -817,6 +869,11 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
}
}
+ ctx->alloc = 0;
+ ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL;
+ for (s = 0; s < 16; ++s)
+ ctx->mem_c[s] = NULL;
+
DESCEND_ARBITRARY(s, nv_pass_mem_opt);
return 0;
}
@@ -1006,7 +1063,6 @@ nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
return 0;
}
-#if 0
/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
* Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
* BREAK and dummy ELSE block.
@@ -1027,61 +1083,187 @@ bb_is_if_else_endif(struct nv_basic_block *bb)
}
}
-/* predicate instructions and remove branch at the end */
+/* Predicate instructions and delete any branch at the end if it is
+ * not a break from a loop.
+ */
static void
predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
- struct nv_value *p, ubyte cc)
+ struct nv_value *pred, uint8_t cc)
{
+ struct nv_instruction *nvi, *prev;
+ int s;
+ if (!b->entry)
+ return;
+ for (nvi = b->entry; nvi; nvi = nvi->next) {
+ prev = nvi;
+ if (inst_is_noop(nvi))
+ continue;
+ for (s = 0; nvi->src[s]; ++s);
+ assert(s < 6);
+ nvi->predicate = s;
+ nvi->cc = cc;
+ nv_reference(pc, nvi, nvi->predicate, pred);
+ }
+ if (prev->opcode == NV_OP_BRA &&
+ b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
+ b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
+ nvc0_insn_delete(prev);
}
-#endif
-/* NOTE: Run this after register allocation, we can just cut out the cflow
- * instructions and hook the predicates to the conditional OPs if they are
- * not using immediates; better than inserting SELECT to join definitions.
- *
- * NOTE: Should adapt prior optimization to make this possible more often.
+static INLINE boolean
+may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
+{
+ if (nvi->def[0] && values_equal(nvi->def[0], pred))
+ return FALSE;
+ return nvc0_insn_is_predicateable(nvi);
+}
+
+/* Transform IF/ELSE/ENDIF constructs into predicated instructions
+ * where feasible.
*/
static int
nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
{
+ struct nv_instruction *nvi;
+ struct nv_value *pred;
+ int k;
+ int n0, n1; /* instruction counts of outgoing blocks */
+
+ if (bb_is_if_else_endif(b)) {
+ assert(b->exit && b->exit->opcode == NV_OP_BRA);
+
+ assert(b->exit->predicate >= 0);
+ pred = b->exit->src[b->exit->predicate]->value;
+
+ n1 = n0 = 0;
+ for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
+ if (!may_predicate_insn(nvi, pred))
+ break;
+ if (!nvi) {
+ /* we're after register allocation, so there always is an ELSE block */
+ for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
+ if (!may_predicate_insn(nvi, pred))
+ break;
+ }
+
+ /* 12 is an arbitrary limit */
+ if (!nvi && n0 < 12 && n1 < 12) {
+ predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
+ predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
+
+ nvc0_insn_delete(b->exit); /* delete the branch */
+
+ /* and a potential joinat before it */
+ if (b->exit && b->exit->opcode == NV_OP_JOINAT)
+ nvc0_insn_delete(b->exit);
+
+ /* remove join operations at the end of the conditional */
+ k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
+ if ((nvi = b->out[0]->out[k]->entry)) {
+ nvi->join = 0;
+ if (nvi->opcode == NV_OP_JOIN)
+ nvc0_insn_delete(nvi);
+ }
+ }
+ }
+ DESCEND_ARBITRARY(k, nv_pass_flatten);
+
return 0;
}
+/* Tests instructions for equality, but independently of sources. */
+static boolean
+is_operation_equal(struct nv_instruction *a, struct nv_instruction *b)
+{
+ if (a->opcode != b->opcode)
+ return FALSE;
+ if (nv_is_texture_op(a->opcode)) {
+ if (a->ext.tex.t != b->ext.tex.t ||
+ a->ext.tex.s != b->ext.tex.s)
+ return FALSE;
+ if (a->tex_dim != b->tex_dim ||
+ a->tex_array != b->tex_array ||
+ a->tex_cube != b->tex_cube ||
+ a->tex_shadow != b->tex_shadow ||
+ a->tex_live != b->tex_live)
+ return FALSE;
+ } else
+ if (a->opcode == NV_OP_CVT) {
+ if (a->ext.cvt.s != b->ext.cvt.s ||
+ a->ext.cvt.d != b->ext.cvt.d)
+ return FALSE;
+ } else
+ if (NV_BASEOP(a->opcode) == NV_OP_SET ||
+ NV_BASEOP(a->opcode) == NV_OP_SLCT) {
+ if (a->set_cond != b->set_cond)
+ return FALSE;
+ } else
+ if (a->opcode == NV_OP_LINTERP ||
+ a->opcode == NV_OP_PINTERP) {
+ if (a->centroid != b->centroid ||
+ a->flat != b->flat)
+ return FALSE;
+ }
+ if (a->cc != b->cc)
+ return FALSE;
+ if (a->lanes != b->lanes ||
+ a->patch != b->patch ||
+ a->saturate != b->saturate)
+ return FALSE;
+ if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */
+ return FALSE;
+ return TRUE;
+}
+
/* local common subexpression elimination, stupid O(n^2) implementation */
static int
nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
{
struct nv_instruction *ir, *ik, *next;
struct nv_instruction *entry = b->phi ? b->phi : b->entry;
- int s;
+ int s, d;
unsigned int reps;
do {
reps = 0;
for (ir = entry; ir; ir = next) {
next = ir->next;
+ if (ir->fixed)
+ continue;
for (ik = entry; ik != ir; ik = ik->next) {
- if (ir->opcode != ik->opcode || ir->fixed)
+ if (!is_operation_equal(ir, ik))
continue;
-
- if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1])
+ if (!ir->def[0] || !ik->def[0])
continue;
if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
continue;
- if (!values_equal(ik->def[0], ir->def[0]))
+ for (d = 0; d < 4; ++d) {
+ if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0))
+ break;
+ if (ir->def[d]) {
+ if (!values_equal(ik->def[0], ir->def[0]))
+ break;
+ } else {
+ d = 4;
+ break;
+ }
+ }
+ if (d != 4)
continue;
- for (s = 0; s < 3; ++s) {
+ for (s = 0; s < 5; ++s) {
struct nv_value *a, *b;
- if (!ik->src[s]) {
- if (ir->src[s])
- break;
- continue;
+ if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0))
+ break;
+ if (!ir->src[s]) {
+ s = 5;
+ break;
}
+
if (ik->src[s]->mod != ir->src[s]->mod)
break;
a = ik->src[s]->value;
@@ -1089,14 +1271,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
if (a == b)
continue;
if (a->reg.file != b->reg.file ||
- a->reg.id < 0 ||
+ a->reg.id < 0 || /* this excludes memory loads/stores */
a->reg.id != b->reg.id)
break;
}
- if (s == 3) {
+ if (s == 5) {
nvc0_insn_delete(ir);
+ for (d = 0; d < 4 && ir->def[d]; ++d)
+ nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]);
++reps;
- nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]);
break;
}
}
@@ -1110,13 +1293,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
/* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
* neighbouring registers. CSE might have messed this up.
+ * Just generate a MOV for each source to avoid conflicts if they're used in
+ * multiple NV_OP_BIND at different positions.
*/
static int
nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
{
struct nv_value *val;
struct nv_instruction *bnd, *nvi, *next;
- int s, t;
+ int s;
for (bnd = b->entry; bnd; bnd = next) {
next = bnd->next;
@@ -1124,20 +1309,17 @@ nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
continue;
for (s = 0; s < 4 && bnd->src[s]; ++s) {
val = bnd->src[s]->value;
- for (t = s + 1; t < 4 && bnd->src[t]; ++t) {
- if (bnd->src[t]->value != val)
- continue;
- nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
- nvi->def[0] = new_value_like(ctx->pc, val);
- nvi->def[0]->insn = nvi;
- nv_reference(ctx->pc, nvi, 0, val);
- nvc0_insn_insert_before(bnd, nvi);
- nv_reference(ctx->pc, bnd, t, nvi->def[0]);
- }
+ nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
+ nvi->def[0] = new_value_like(ctx->pc, val);
+ nvi->def[0]->insn = nvi;
+ nv_reference(ctx->pc, nvi, 0, val);
+ nv_reference(ctx->pc, bnd, s, nvi->def[0]);
+
+ nvc0_insn_insert_before(bnd, nvi);
}
}
- DESCEND_ARBITRARY(t, nv_pass_fix_bind);
+ DESCEND_ARBITRARY(s, nv_pass_fix_bind);
return 0;
}
@@ -1153,11 +1335,17 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
pass.n = 0;
pass.pc = pc;
+ /* Do CSE so we can just compare values by pointer in subsequent passes. */
+ pc->pass_seq++;
+ ret = nv_pass_cse(&pass, root);
+ if (ret)
+ return ret;
+
/* Do this first, so we don't have to pay attention
* to whether sources are supported memory loads.
*/
pc->pass_seq++;
- ret = nv_pass_lower_arith(&pass, root);
+ ret = nv_pass_algebraic_opt(&pass, root);
if (ret)
return ret;
@@ -1185,11 +1373,9 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
reldelim->pc = pc;
}
- pc->pass_seq++;
- ret = nv_pass_cse(&pass, root);
- if (ret)
- return ret;
-
+ /* May run DCE before load-combining since that pass will clean up
+ * after itself.
+ */
dce.pc = pc;
do {
dce.removed = 0;
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c
index b03826484e4..90c669cc4b8 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_print.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c
@@ -225,7 +225,7 @@ nvc0_print_instruction(struct nv_instruction *i)
PRINT("%s", gree);
if (NV_BASEOP(i->opcode) == NV_OP_SET)
- PRINT("set %s", nv_cond_name(i->set_cond));
+ PRINT("%s %s", nvc0_opcode_name(i->opcode), nv_cond_name(i->set_cond));
else
if (i->saturate)
PRINT("sat %s", nvc0_opcode_name(i->opcode));
@@ -278,10 +278,10 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
{ NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
{ NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
{ NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
- { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 },
{ NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
{ NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
@@ -302,7 +302,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
{ NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_TRUNC, "trunc", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
@@ -343,18 +343,18 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
{ NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
{ NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
{ NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
- { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
- { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 },
{ NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 },
- { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 },
+ { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 },
{ NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
@@ -363,13 +363,13 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
{ NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SLCT, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SLCT, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SLCT, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 },
{ NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 },
- { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
+ { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
{ NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
index d24f09a1507..f4afe083e2d 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
@@ -39,6 +39,30 @@ struct register_set {
struct nv_pc *pc;
};
+/* aliasing is allowed */
+static void
+intersect_register_sets(struct register_set *dst,
+ struct register_set *src1, struct register_set *src2)
+{
+ int i;
+
+ for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) {
+ dst->bits[i][0] = src1->bits[i][0] | src2->bits[i][0];
+ dst->bits[i][1] = src1->bits[i][1] | src2->bits[i][1];
+ }
+}
+
+static void
+mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask)
+{
+ int i;
+
+ for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) {
+ set->bits[i][0] = (set->bits[i][0] | mask) & umask;
+ set->bits[i][1] = (set->bits[i][1] | mask) & umask;
+ }
+}
+
struct nv_pc_pass {
struct nv_pc *pc;
struct nv_instruction **insns;
@@ -63,6 +87,9 @@ add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
{
struct nv_range *range, **nextp = &val->livei;
+ if (bgn == end) /* [a, a) is invalid / empty */
+ return TRUE;
+
for (range = val->livei; range; range = range->next) {
if (end < range->bgn)
break; /* insert before */
@@ -327,14 +354,14 @@ do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
assert(b->join == a->join);
}
-static INLINE void
+static INLINE boolean
try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
{
if (!join_allowed(ctx, a, b)) {
#ifdef NVC0_RA_DEBUG_JOIN
debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
#endif
- return;
+ return FALSE;
}
if (livei_have_overlap(a->join, b->join)) {
#ifdef NVC0_RA_DEBUG_JOIN
@@ -342,10 +369,27 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
livei_print(a);
livei_print(b);
#endif
- return;
+ return FALSE;
}
do_join_values(ctx, a, b);
+
+ return TRUE;
+}
+
+static void
+join_values_nofail(struct nv_pc_pass *ctx,
+ struct nv_value *a, struct nv_value *b, boolean type_only)
+{
+ if (type_only) {
+ assert(join_allowed(ctx, a, b));
+ do_join_values(ctx, a, b);
+ } else {
+ boolean ok = try_join_values(ctx, a, b);
+ if (!ok) {
+ NOUVEAU_ERR("failed to coalesce values\n");
+ }
+ }
}
static INLINE boolean
@@ -360,20 +404,32 @@ need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
return (b->num_in > 1) && (n == 2);
}
+/* Look for the @phi's operand whose definition reaches @b. */
static int
phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
struct nv_basic_block *tb)
{
+ struct nv_ref *srci, *srcj;
int i, j;
for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
- if (!nvc0_bblock_reachable_by(b, phi->src[i]->value->insn->bb, tb))
+ srci = phi->src[i];
+ /* if already replaced, check with original source first */
+ if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV)
+ srci = srci->value->insn->src[0];
+ if (!nvc0_bblock_reachable_by(b, srci->value->insn->bb, NULL))
continue;
/* NOTE: back-edges are ignored by the reachable-by check */
- if (j < 0 || !nvc0_bblock_reachable_by(phi->src[j]->value->insn->bb,
- phi->src[i]->value->insn->bb, tb))
+ if (j < 0 || !nvc0_bblock_reachable_by(srcj->value->insn->bb,
+ srci->value->insn->bb, NULL)) {
j = i;
+ srcj = srci;
+ }
}
+ if (j >= 0 && nvc0_bblock_reachable_by(b, phi->def[0]->insn->bb, NULL))
+ if (!nvc0_bblock_reachable_by(srcj->value->insn->bb,
+ phi->def[0]->insn->bb, NULL))
+ j = -1;
return j;
}
@@ -420,21 +476,23 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
ctx->pc->current_block = pn;
for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
- if ((j = phi_opnd_for_bb(i, p, b)) < 0)
- continue;
- val = i->src[j]->value;
-
- if (i->src[j]->flags) {
- /* value already encountered from a different in-block */
- val = val->insn->src[0]->value;
- while (j < 6 && i->src[j])
- ++j;
- assert(j < 6);
+ j = phi_opnd_for_bb(i, p, b);
+
+ if (j < 0) {
+ val = i->def[0];
+ } else {
+ val = i->src[j]->value;
+ if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) {
+ j = -1;
+ /* use original value, we already encountered & replaced it */
+ val = val->insn->src[0]->value;
+ }
}
+ if (j < 0) /* need an additional source ? */
+ for (j = 0; j < 6 && i->src[j] && i->src[j]->value != val; ++j);
+ assert(j < 6); /* XXX: really ugly shaders */
ni = new_instruction(ctx->pc, NV_OP_MOV);
-
- /* TODO: insert instruction at correct position in the first place */
if (ni->prev && ni->prev->target)
nvc0_insns_permute(ni->prev, ni);
@@ -442,7 +500,7 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
ni->def[0]->insn = ni;
nv_reference(ctx->pc, ni, 0, val);
nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */
- i->src[j]->flags = 1;
+ i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV;
}
if (pn != p && pn->exit) {
@@ -460,8 +518,13 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
return 0;
}
+#define JOIN_MASK_PHI (1 << 0)
+#define JOIN_MASK_SELECT (1 << 1)
+#define JOIN_MASK_MOV (1 << 2)
+#define JOIN_MASK_BIND (1 << 3)
+
static int
-pass_join_values(struct nv_pc_pass *ctx, int iter)
+pass_join_values(struct nv_pc_pass *ctx, unsigned mask)
{
int c, n;
@@ -470,36 +533,33 @@ pass_join_values(struct nv_pc_pass *ctx, int iter)
switch (i->opcode) {
case NV_OP_PHI:
- if (iter != 2)
+ if (!(mask & JOIN_MASK_PHI))
break;
for (c = 0; c < 6 && i->src[c]; ++c)
- try_join_values(ctx, i->def[0], i->src[c]->value);
+ join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE);
break;
case NV_OP_MOV:
- if ((iter == 2) && i->src[0]->value->insn &&
- !nv_is_texture_op(i->src[0]->value->join->insn->opcode))
+ if (!(mask & JOIN_MASK_MOV))
+ break;
+ if (i->src[0]->value->insn && !i->src[0]->value->insn->def[1])
try_join_values(ctx, i->def[0], i->src[0]->value);
break;
case NV_OP_SELECT:
- if (iter != 1)
+ if (!(mask & JOIN_MASK_SELECT))
break;
- for (c = 0; c < 6 && i->src[c]; ++c) {
- assert(join_allowed(ctx, i->def[0], i->src[c]->value));
- do_join_values(ctx, i->def[0], i->src[c]->value);
- }
- break;
- case NV_OP_TEX:
- case NV_OP_TXB:
- case NV_OP_TXL:
- case NV_OP_TXQ:
- /* on nvc0, TEX src and dst can differ */
+ for (c = 0; c < 6 && i->src[c]; ++c)
+ join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE);
break;
case NV_OP_BIND:
- if (iter)
+ if (!(mask & JOIN_MASK_BIND))
break;
- for (c = 0; c < 6 && i->src[c]; ++c)
- do_join_values(ctx, i->def[c], i->src[c]->value);
+ for (c = 0; c < 4 && i->src[c]; ++c)
+ join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE);
break;
+ case NV_OP_TEX:
+ case NV_OP_TXB:
+ case NV_OP_TXL:
+ case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */
default:
break;
}
@@ -621,15 +681,16 @@ static void collect_live_values(struct nv_basic_block *b, const int n)
{
int i;
- if (b->out[0]) {
- if (b->out[1]) { /* what to do about back-edges ? */
+ /* XXX: what to do about back/fake-edges (used to include both here) ? */
+ if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) {
+ if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
for (i = 0; i < n; ++i)
b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
} else {
memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
}
} else
- if (b->out[1]) {
+ if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
} else {
memset(b->live_set, 0, n * sizeof(uint32_t));
@@ -746,42 +807,46 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
elem->next = nval;
}
-static int
-pass_linear_scan(struct nv_pc_pass *ctx, int iter)
+static void
+collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head,
+ boolean assigned_only)
{
- struct nv_instruction *i;
- struct register_set f, free;
+ struct nv_value *val;
int k, n;
- struct nv_value *cur, *val, *tmp[2];
- struct nv_value active, inactive, handled, unhandled;
- make_empty_list(&active);
- make_empty_list(&inactive);
- make_empty_list(&handled);
- make_empty_list(&unhandled);
+ make_empty_list(head);
- nvc0_ctor_register_set(ctx->pc, &free);
-
- /* joined values should have range = NULL and thus not be added;
- * also, fixed memory values won't be added because they're not
- * def'd, just used
- */
for (n = 0; n < ctx->num_insns; ++n) {
- i = ctx->insns[n];
+ struct nv_instruction *i = ctx->insns[n];
+ /* for joined values, only the representative will have livei != NULL */
for (k = 0; k < 5; ++k) {
if (i->def[k] && i->def[k]->livei)
- insert_ordered_tail(&unhandled, i->def[k]);
- else
- if (0 && i->def[k])
- debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
+ if (!assigned_only || i->def[k]->reg.id >= 0)
+ insert_ordered_tail(head, i->def[k]);
}
}
- for (val = unhandled.next; val != unhandled.prev; val = val->next) {
+ for (val = head->next; val != head->prev; val = val->next) {
assert(val->join == val);
assert(val->livei->bgn <= val->next->livei->bgn);
}
+}
+
+static int
+pass_linear_scan(struct nv_pc_pass *ctx)
+{
+ struct register_set f, free;
+ struct nv_value *cur, *val, *tmp[2];
+ struct nv_value active, inactive, handled, unhandled;
+
+ make_empty_list(&active);
+ make_empty_list(&inactive);
+ make_empty_list(&handled);
+
+ nvc0_ctor_register_set(ctx->pc, &free);
+
+ collect_register_values(ctx, &unhandled, FALSE);
foreach_s(cur, tmp[0], &unhandled) {
remove_from_list(cur);
@@ -818,14 +883,7 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
reg_occupy(&f, val);
if (cur->reg.id < 0) {
- boolean mem = FALSE;
- int v = nvi_vector_size(cur->insn);
-
- if (v > 1)
- mem = !reg_assign(&f, &cur->insn->def[0], v);
- else
- if (iter)
- mem = !reg_assign(&f, &cur, 1);
+ boolean mem = !reg_assign(&f, &cur, 1);
if (mem) {
NOUVEAU_ERR("out of registers\n");
@@ -839,6 +897,68 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
return 0;
}
+/* Allocate values defined by instructions such as TEX, which have to be
+ * assigned to consecutive registers.
+ * Linear scan doesn't really work here since the values can have different
+ * live intervals.
+ */
+static int
+pass_allocate_constrained_values(struct nv_pc_pass *ctx)
+{
+ struct nv_value regvals, *val;
+ struct nv_instruction *i;
+ struct nv_value *defs[4];
+ struct register_set regs[4];
+ int n, vsize, c;
+ uint32_t mask;
+ boolean mem;
+
+ collect_register_values(ctx, &regvals, TRUE);
+
+ for (n = 0; n < ctx->num_insns; ++n) {
+ i = ctx->insns[n];
+ vsize = nvi_vector_size(i);
+ if (!(vsize > 1))
+ continue;
+ assert(vsize <= 4);
+
+ for (c = 0; c < vsize; ++c)
+ defs[c] = i->def[c]->join;
+
+ if (defs[0]->reg.id >= 0) {
+ for (c = 1; c < vsize; ++c)
+ assert(defs[c]->reg.id >= 0);
+ continue;
+ }
+
+ for (c = 0; c < vsize; ++c) {
+ nvc0_ctor_register_set(ctx->pc, &regs[c]);
+
+ foreach(val, &regvals) {
+ if (val->reg.id >= 0 && livei_have_overlap(val, defs[c]))
+ reg_occupy(&regs[c], val);
+ }
+ mask = 0x11111111;
+ if (vsize == 2) /* granularity is 2 and not 4 */
+ mask |= 0x11111111 << 2;
+ mask_register_set(&regs[c], 0, mask << c);
+
+ if (defs[c]->livei)
+ insert_ordered_tail(&regvals, defs[c]);
+ }
+ for (c = 1; c < vsize; ++c)
+ intersect_register_sets(&regs[0], &regs[0], &regs[c]);
+
+ mem = !reg_assign(&regs[0], &defs[0], vsize);
+
+ if (mem) {
+ NOUVEAU_ERR("out of registers\n");
+ abort();
+ }
+ }
+ return 0;
+}
+
static int
nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
{
@@ -862,6 +982,10 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
ret = pass_generate_phi_movs(ctx, root);
assert(!ret);
+#ifdef NVC0_RA_DEBUG_LIVEI
+ nvc0_print_function(root);
+#endif
+
for (i = 0; i < pc->loop_nesting_bound; ++i) {
pc->pass_seq++;
ret = pass_build_live_sets(ctx, root);
@@ -888,19 +1012,19 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
livei_print(&pc->values[i]);
#endif
- ret = pass_join_values(ctx, 0);
+ ret = pass_join_values(ctx, JOIN_MASK_PHI);
if (ret)
goto out;
- ret = pass_linear_scan(ctx, 0);
+ ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_BIND);
if (ret)
goto out;
- ret = pass_join_values(ctx, 1);
+ ret = pass_join_values(ctx, JOIN_MASK_MOV);
if (ret)
goto out;
- ret = pass_join_values(ctx, 2);
+ ret = pass_allocate_constrained_values(ctx);
if (ret)
goto out;
- ret = pass_linear_scan(ctx, 1);
+ ret = pass_linear_scan(ctx);
if (ret)
goto out;
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
index aefaf7b98ad..899fe147c6a 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nvc0/nvc0_program.c
@@ -185,8 +185,17 @@ nvc0_varying_location(unsigned sn, unsigned si)
return 0x2e0;
*/
case TGSI_SEMANTIC_GENERIC:
+ /* We'd really like to distinguish between TEXCOORD and GENERIC here,
+ * since only 0x300 to 0x37c can be replaced by sprite coordinates.
+ * Also, gl_PointCoord should be a system value and must be assigned to
+ * address 0x2e0. For now, let's cheat:
+ */
assert(si < 31);
- return 0x80 + (si * 16);
+ if (si <= 7)
+ return 0x300 + si * 16;
+ if (si == 9)
+ return 0x2e0;
+ return 0x80 + ((si - 8) * 16);
case TGSI_SEMANTIC_NORMAL:
return 0x360;
case TGSI_SEMANTIC_PRIMID:
@@ -256,12 +265,14 @@ prog_decl(struct nvc0_translation_info *ti,
case TGSI_FILE_INPUT:
for (i = first; i <= last; ++i) {
if (ti->prog->type == PIPE_SHADER_VERTEX) {
- sn = TGSI_SEMANTIC_GENERIC;
- si = i;
+ for (c = 0; c < 4; ++c)
+ ti->input_loc[i][c] = 0x80 + i * 16 + c * 4;
+ } else {
+ for (c = 0; c < 4; ++c)
+ ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
+ /* for sprite coordinates: */
+ ti->prog->fp.in_pos[i] = ti->input_loc[i][0] / 4;
}
- for (c = 0; c < 4; ++c)
- ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
-
if (ti->prog->type == PIPE_SHADER_FRAGMENT)
ti->interp_mode[i] = nvc0_interp_mode(decl);
}
@@ -281,6 +292,8 @@ prog_decl(struct nvc0_translation_info *ti,
} else {
for (c = 0; c < 4; ++c)
ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
+ /* for TFB_VARYING_LOCS: */
+ ti->prog->vp.out_pos[i] = ti->output_loc[i][0] / 4;
}
}
break;
@@ -288,9 +301,11 @@ prog_decl(struct nvc0_translation_info *ti,
ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]);
assert(first == last);
break;
+ case TGSI_FILE_TEMPORARY:
+ ti->temp128_nr = MAX2(ti->temp128_nr, last + 1);
+ break;
case TGSI_FILE_NULL:
case TGSI_FILE_CONSTANT:
- case TGSI_FILE_TEMPORARY:
case TGSI_FILE_SAMPLER:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_IMMEDIATE:
@@ -518,8 +533,13 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti)
if (!ti->input_access[i][c])
continue;
a = ti->input_loc[i][c] / 2;
- if ((a & ~7) == 0x70/2)
- fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */
+ if (ti->input_loc[i][c] >= 0x2c0)
+ a -= 32;
+ if (ti->input_loc[i][0] == 0x70)
+ fp->hdr[5] |= 1 << (28 + c); /* FRAG_COORD_UMASK */
+ else
+ if (ti->input_loc[i][0] == 0x2e0)
+ fp->hdr[14] |= 1 << (24 + c); /* POINT_COORD */
else
fp->hdr[4 + a / 32] |= m << (a % 32);
}
@@ -618,7 +638,7 @@ nvc0_prog_scan(struct nvc0_translation_info *ti)
if (ti->scan.writes_z)
prog->flags[0] = 0x11; /* ? */
else
- if (!ti->global_stores)
+ if (!ti->scan.uses_kill && !ti->global_stores)
prog->fp.early_z = 1;
ret = nvc0_fp_gen_header(prog, ti);
@@ -629,6 +649,11 @@ nvc0_prog_scan(struct nvc0_translation_info *ti)
break;
}
+ if (ti->require_stores) {
+ prog->hdr[0] |= 1 << 26;
+ prog->hdr[1] |= ti->temp128_nr * 16; /* l[] size */
+ }
+
assert(!ret);
return ret;
}
diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h
index e6b210d1355..f6fea29780b 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nvc0/nvc0_program.h
@@ -21,16 +21,18 @@ struct nvc0_program {
unsigned code_size;
unsigned parm_size;
- uint32_t hdr[20];
+ uint32_t hdr[20]; /* TODO: move this into code to save space */
uint32_t flags[2];
struct {
uint8_t edgeflag;
uint8_t num_ucps;
+ uint8_t out_pos[PIPE_MAX_SHADER_OUTPUTS];
} vp;
struct {
uint8_t early_z;
+ uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
} fp;
void *relocs;
@@ -74,6 +76,7 @@ struct nvc0_translation_info {
uint32_t *immd32;
ubyte *immd32_ty;
unsigned immd32_nr;
+ unsigned temp128_nr;
ubyte edgeflag_out;
struct nvc0_subroutine *subr;
unsigned num_subrs;
diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c
index 74c3451c19a..fcbb7da41a3 100644
--- a/src/gallium/drivers/nvc0/nvc0_push.c
+++ b/src/gallium/drivers/nvc0/nvc0_push.c
@@ -217,6 +217,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
struct push_context ctx;
unsigned i, index_size;
unsigned inst = info->instance_count;
+ boolean apply_bias = info->indexed && info->index_bias;
ctx.chan = nvc0->screen->base.channel;
ctx.translate = nvc0->vertex->translate;
@@ -230,7 +231,8 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
data = nvc0_resource_map_offset(nvc0, res,
vb->buffer_offset, NOUVEAU_BO_RD);
- if (info->indexed)
+
+ if (apply_bias && likely(!(nvc0->vertex->instance_bufs & (1 << i))))
data += info->index_bias * vb->stride;
ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c
index cc83fbe771c..e5e43c0e7a5 100644
--- a/src/gallium/drivers/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nvc0/nvc0_query.c
@@ -312,6 +312,7 @@ nvc0_render_condition(struct pipe_context *pipe,
if (mode == PIPE_RENDER_COND_WAIT ||
mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+ MARK_RING (chan, 5, 2);
BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
@@ -319,6 +320,7 @@ nvc0_render_condition(struct pipe_context *pipe,
OUT_RING (chan, 0x00001001);
}
+ MARK_RING (chan, 4, 2);
BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h
index 17e79642a6d..599823c0dc9 100644
--- a/src/gallium/drivers/nvc0/nvc0_resource.h
+++ b/src/gallium/drivers/nvc0/nvc0_resource.h
@@ -24,7 +24,8 @@ struct nvc0_context;
* USER_MEMORY: resource->data is a pointer to client memory and may change
* between GL calls
*/
-#define NVC0_BUFFER_STATUS_DIRTY (1 << 0)
+#define NVC0_BUFFER_STATUS_GPU_READING (1 << 0)
+#define NVC0_BUFFER_STATUS_GPU_WRITING (1 << 1)
#define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7)
/* Resources, if mapped into the GPU's address space, are guaranteed to
@@ -51,6 +52,9 @@ struct nvc0_resource {
struct nvc0_mm_allocation *mm;
};
+void
+nvc0_buffer_release_gpu_storage(struct nvc0_resource *);
+
boolean
nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *,
unsigned start, unsigned size);
@@ -87,7 +91,7 @@ nvc0_resource_map_offset(struct nvc0_context *nvc0,
nvc0_buffer_adjust_score(nvc0, res, -250);
if ((res->domain == NOUVEAU_BO_VRAM) &&
- (res->status & NVC0_BUFFER_STATUS_DIRTY))
+ (res->status & NVC0_BUFFER_STATUS_GPU_WRITING))
nvc0_buffer_download(nvc0, res, 0, res->base.width0);
if ((res->domain != NOUVEAU_BO_GART) ||
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index f608b32e1cb..f7f1fd09a12 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -75,6 +75,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 10;
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return 13;
+ case PIPE_CAP_ARRAY_TEXTURES:
+ return 1;
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
case PIPE_CAP_TEXTURE_SWIZZLE:
@@ -281,9 +283,6 @@ nvc0_magic_3d_init(struct nouveau_channel *chan)
BEGIN_RING(chan, RING_3D_(0x074c), 1);
OUT_RING (chan, 0x3f);
- BEGIN_RING(chan, RING_3D_(0x10f8), 1);
- OUT_RING (chan, 0x0101);
-
BEGIN_RING(chan, RING_3D_(0x16a8), 1);
OUT_RING (chan, (3 << 16) | 3);
BEGIN_RING(chan, RING_3D_(0x1794), 1);
@@ -476,7 +475,7 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
OUT_RING (chan, (15 << 4) | 1);
}
- screen->tls_size = 4 * 4 * 32 * 128 * 4;
+ screen->tls_size = (16 * 32) * (NVC0_CAP_MAX_PROGRAM_TEMPS * 16);
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17,
screen->tls_size, &screen->tls);
if (ret)
@@ -490,6 +489,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
OUT_RING (chan, screen->tls_size >> 32);
OUT_RING (chan, screen->tls_size);
+ BEGIN_RING(chan, RING_3D_(0x07a0), 1);
+ OUT_RING (chan, 0);
BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1);
OUT_RING (chan, 0);
@@ -532,16 +533,27 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */
OUT_RING (chan, 0x3f);
- BEGIN_RING(chan, RING_3D(VIEWPORT_CLIP_RECTS_EN), 1);
+ BEGIN_RING(chan, RING_3D(CLIP_RECTS_MODE), 1);
+ OUT_RING (chan, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY);
+ BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
+ for (i = 0; i < 8 * 2; ++i)
+ OUT_RING(chan, 0);
+ BEGIN_RING(chan, RING_3D(CLIP_RECTS_EN), 1);
OUT_RING (chan, 0);
BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1);
OUT_RING (chan, 0);
+ /* neither scissors, viewport nor stencil mask should affect clears */
+ BEGIN_RING(chan, RING_3D(CLEAR_FLAGS), 1);
+ OUT_RING (chan, 0);
+
BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
OUT_RINGf (chan, 0.0f);
OUT_RINGf (chan, 1.0f);
+ BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+ OUT_RING (chan, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1);
/* We use scissors instead of exact view volume clipping,
* so they're always enabled.
@@ -628,11 +640,14 @@ nvc0_screen_make_buffers_resident(struct nvc0_screen *screen)
const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+ MARK_RING(chan, 5, 5);
nouveau_bo_validate(chan, screen->text, flags);
nouveau_bo_validate(chan, screen->uniforms, flags);
nouveau_bo_validate(chan, screen->txc, flags);
- nouveau_bo_validate(chan, screen->tls, flags);
nouveau_bo_validate(chan, screen->mp_stack_bo, flags);
+
+ if (screen->cur_ctx && screen->cur_ctx->state.tls_required)
+ nouveau_bo_validate(chan, screen->tls, flags);
}
int
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
index 1fac142e2be..d952ff1f9b1 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -128,17 +128,25 @@ nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags)
{
struct nvc0_screen *screen = nvc0_screen(res->base.screen);
- nouveau_bo_validate(screen->base.channel, res->bo, flags);
+ if (likely(res->bo)) {
+ nouveau_bo_validate(screen->base.channel, res->bo, flags);
- nvc0_resource_fence(res, flags);
+ if (flags & NOUVEAU_BO_WR)
+ res->status |= NVC0_BUFFER_STATUS_GPU_WRITING;
+ if (flags & NOUVEAU_BO_RD)
+ res->status |= NVC0_BUFFER_STATUS_GPU_READING;
+
+ nvc0_resource_fence(res, flags);
+ }
}
boolean
nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit);
-
void
nvc0_screen_fence_next(struct nvc0_screen *);
+void
+nvc0_screen_fence_update(struct nvc0_screen *, boolean flushed);
static INLINE boolean
nvc0_screen_fence_emit(struct nvc0_screen *screen)
diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c
index 981b5488d08..357f8b80deb 100644
--- a/src/gallium/drivers/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c
@@ -27,6 +27,16 @@
#include "nvc0_context.h"
+static INLINE void
+nvc0_program_update_context_state(struct nvc0_context *nvc0,
+ struct nvc0_program *prog, int stage)
+{
+ if (prog->hdr[1])
+ nvc0->state.tls_required |= 1 << stage;
+ else
+ nvc0->state.tls_required &= ~(1 << stage);
+}
+
static boolean
nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
@@ -55,7 +65,7 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
prog->code_base + NVC0_SHADER_HEADER_SIZE,
prog->code_size, prog->code);
- BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x021c), 1);
+ BEGIN_RING(nvc0->screen->base.channel, RING_3D(MEM_BARRIER), 1);
OUT_RING (nvc0->screen->base.channel, 0x1111);
return TRUE;
@@ -77,6 +87,7 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0)
if (!nvc0_program_validate(nvc0, vp))
return;
+ nvc0_program_update_context_state(nvc0, vp, 0);
BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2);
OUT_RING (chan, 0x11);
@@ -98,6 +109,7 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
if (!nvc0_program_validate(nvc0, fp))
return;
+ nvc0_program_update_context_state(nvc0, fp, 4);
BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1);
OUT_RING (chan, fp->fp.early_z);
@@ -127,6 +139,7 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
}
if (!nvc0_program_validate(nvc0, tp))
return;
+ nvc0_program_update_context_state(nvc0, tp, 1);
BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2);
OUT_RING (chan, 0x21);
@@ -148,6 +161,7 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0)
}
if (!nvc0_program_validate(nvc0, tp))
return;
+ nvc0_program_update_context_state(nvc0, tp, 2);
BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
OUT_RING (chan, 0x31);
@@ -170,6 +184,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
}
if (!nvc0_program_validate(nvc0, gp))
return;
+ nvc0_program_update_context_state(nvc0, gp, 3);
BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
OUT_RING (chan, 0x41);
@@ -178,3 +193,59 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1);
OUT_RING (chan, gp->max_gpr);
}
+
+/* It's *is* kind of shader related. We need to inspect the program
+ * to get the output locations right.
+ */
+void
+nvc0_tfb_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_program *vp;
+ struct nvc0_transform_feedback_state *tfb = nvc0->tfb;
+ int b;
+
+ BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
+ if (!tfb) {
+ OUT_RING(chan, 0);
+ return;
+ }
+ OUT_RING(chan, 1);
+
+ vp = nvc0->vertprog ? nvc0->vertprog : nvc0->gmtyprog;
+
+ for (b = 0; b < nvc0->num_tfbbufs; ++b) {
+ uint8_t idx, var[128];
+ int i, n;
+ struct nvc0_resource *buf = nvc0_resource(nvc0->tfbbuf[b]);
+
+ BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5);
+ OUT_RING (chan, 1);
+ OUT_RESRCh(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR);
+ OUT_RESRCl(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR);
+ OUT_RING (chan, buf->base.width0 - nvc0->tfb_offset[b]);
+ OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID <- offset ? */
+
+ if (!(nvc0->dirty & NVC0_NEW_TFB))
+ continue;
+
+ BEGIN_RING(chan, RING_3D(TFB_UNK07X0(b)), 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, tfb->varying_count[b]);
+ OUT_RING (chan, tfb->stride[b]);
+
+ n = b ? tfb->varying_count[b - 1] : 0;
+ i = 0;
+ for (; i < tfb->varying_count[b]; ++i) {
+ idx = tfb->varying_index[n + i];
+ var[i] = vp->vp.out_pos[idx >> 2] + (idx & 3);
+ }
+ for (; i & 3; ++i)
+ var[i] = 0;
+
+ BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4);
+ OUT_RINGp (chan, var, i / 4);
+ }
+ for (; b < 4; ++b)
+ IMMED_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 0);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c
index c08f3693f5e..aa437195764 100644
--- a/src/gallium/drivers/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_state.c
@@ -22,6 +22,7 @@
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
+#include "util/u_transfer.h"
#include "tgsi/tgsi_parse.h"
@@ -50,40 +51,35 @@ nvc0_colormask(unsigned mask)
return ret;
}
+#define NVC0_BLEND_FACTOR_CASE(a, b) \
+ case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b
+
static INLINE uint32_t
nvc0_blend_fac(unsigned factor)
{
- static const uint16_t bf[] = {
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x00 */
- NV50_3D_BLEND_FACTOR_ONE,
- NV50_3D_BLEND_FACTOR_SRC_COLOR,
- NV50_3D_BLEND_FACTOR_SRC_ALPHA,
- NV50_3D_BLEND_FACTOR_DST_ALPHA,
- NV50_3D_BLEND_FACTOR_DST_COLOR,
- NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE,
- NV50_3D_BLEND_FACTOR_CONSTANT_COLOR,
- NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA,
- NV50_3D_BLEND_FACTOR_SRC1_COLOR,
- NV50_3D_BLEND_FACTOR_SRC1_ALPHA,
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x0b */
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x0c */
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x0d */
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x0e */
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x0f */
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x10 */
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x11 */
- NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA
- };
-
- assert(factor < (sizeof(bf) / sizeof(bf[0])));
- return bf[factor];
+ switch (factor) {
+ NVC0_BLEND_FACTOR_CASE(ONE, ONE);
+ NVC0_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE);
+ NVC0_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR);
+ NVC0_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(ZERO, ZERO);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA);
+ default:
+ return NV50_3D_BLEND_FACTOR_ZERO;
+ }
}
static void *
@@ -176,9 +172,9 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe,
return NULL;
so->pipe = *cso;
-#ifndef NVC0_SCISSORS_CLIPPING
- SB_IMMED_3D(so, SCISSOR_ENABLE(0), cso->scissor);
-#endif
+ /* Scissor enables are handled in scissor state, we will not want to
+ * always emit 16 commands, one for each scissor rectangle, here.
+ */
SB_BEGIN_3D(so, SHADE_MODEL, 1);
SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
@@ -242,7 +238,7 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe,
SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1);
SB_DATA (so, fui(cso->offset_scale));
SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1);
- SB_DATA (so, fui(cso->offset_units)); /* XXX: multiply by 2 ? */
+ SB_DATA (so, fui(cso->offset_units * 2.0f));
}
assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
@@ -283,20 +279,21 @@ nvc0_zsa_state_create(struct pipe_context *pipe,
}
if (cso->stencil[0].enabled) {
- SB_BEGIN_3D(so, STENCIL_FRONT_ENABLE, 5);
+ SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
SB_DATA (so, 1);
SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op));
SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func));
- SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2);
- SB_DATA (so, cso->stencil[0].writemask);
+ SB_BEGIN_3D(so, STENCIL_FRONT_FUNC_MASK, 2);
SB_DATA (so, cso->stencil[0].valuemask);
+ SB_DATA (so, cso->stencil[0].writemask);
} else {
- SB_IMMED_3D(so, STENCIL_FRONT_ENABLE, 0);
+ SB_IMMED_3D(so, STENCIL_ENABLE, 0);
}
if (cso->stencil[1].enabled) {
+ assert(cso->stencil[0].enabled);
SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5);
SB_DATA (so, 1);
SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op));
@@ -306,7 +303,8 @@ nvc0_zsa_state_create(struct pipe_context *pipe,
SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2);
SB_DATA (so, cso->stencil[1].writemask);
SB_DATA (so, cso->stencil[1].valuemask);
- } else {
+ } else
+ if (cso->stencil[0].enabled) {
SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0);
}
@@ -808,6 +806,74 @@ nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
nvc0->dirty |= NVC0_NEW_VERTEX;
}
+static void *
+nvc0_tfb_state_create(struct pipe_context *pipe,
+ const struct pipe_stream_output_state *pso)
+{
+ struct nvc0_transform_feedback_state *so;
+ int n = 0;
+ int i, c, b;
+
+ so = MALLOC(sizeof(*so) + pso->num_outputs * 4 * sizeof(uint8_t));
+ if (!so)
+ return NULL;
+
+ for (b = 0; b < 4; ++b) {
+ for (i = 0; i < pso->num_outputs; ++i) {
+ if (pso->output_buffer[i] != b)
+ continue;
+ for (c = 0; c < 4; ++c) {
+ if (!(pso->register_mask[i] & (1 << c)))
+ continue;
+ so->varying_count[b]++;
+ so->varying_index[n++] = (pso->register_index[i] << 2) | c;
+ }
+ }
+ so->stride[b] = so->varying_count[b] * 4;
+ }
+ if (pso->stride)
+ so->stride[0] = pso->stride;
+
+ return so;
+}
+
+static void
+nvc0_tfb_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nvc0_tfb_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ nvc0_context(pipe)->tfb = hwcso;
+ nvc0_context(pipe)->dirty |= NVC0_NEW_TFB;
+}
+
+static void
+nvc0_set_transform_feedback_buffers(struct pipe_context *pipe,
+ struct pipe_resource **buffers,
+ int *offsets,
+ int num_buffers)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ int i;
+
+ assert(num_buffers >= 0 && num_buffers <= 4); /* why signed ? */
+
+ for (i = 0; i < num_buffers; ++i) {
+ assert(offsets[i] >= 0);
+ nvc0->tfb_offset[i] = offsets[i];
+ pipe_resource_reference(&nvc0->tfbbuf[i], buffers[i]);
+ }
+ for (; i < nvc0->num_tfbbufs; ++i)
+ pipe_resource_reference(&nvc0->tfbbuf[i], NULL);
+
+ nvc0->num_tfbbufs = num_buffers;
+
+ nvc0->dirty |= NVC0_NEW_TFB_BUFFERS;
+}
+
void
nvc0_init_state_functions(struct nvc0_context *nvc0)
{
@@ -861,5 +927,12 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
nvc0->pipe.set_vertex_buffers = nvc0_set_vertex_buffers;
nvc0->pipe.set_index_buffer = nvc0_set_index_buffer;
+
+ nvc0->pipe.create_stream_output_state = nvc0_tfb_state_create;
+ nvc0->pipe.delete_stream_output_state = nvc0_tfb_state_delete;
+ nvc0->pipe.bind_stream_output_state = nvc0_tfb_state_bind;
+ nvc0->pipe.set_stream_output_buffers = nvc0_set_transform_feedback_buffers;
+
+ nvc0->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c
index 25aec0244db..70c418fad9b 100644
--- a/src/gallium/drivers/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c
@@ -25,6 +25,7 @@ nvc0_validate_zcull(struct nvc0_context *nvc0)
else
width = fb->width;
+ MARK_RING (chan, 23, 4);
BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */
OUT_RING (chan, 0);
BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */
@@ -57,6 +58,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
unsigned i;
+ boolean serialize = FALSE;
nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME);
@@ -66,12 +68,14 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
OUT_RING (chan, fb->width << 16);
OUT_RING (chan, fb->height << 16);
+ MARK_RING(chan, 9 * fb->nr_cbufs, 2 * fb->nr_cbufs);
+
for (i = 0; i < fb->nr_cbufs; ++i) {
struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture);
struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]);
struct nouveau_bo *bo = mt->base.bo;
uint32_t offset = sf->offset;
-
+
BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8);
OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
@@ -83,6 +87,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
OUT_RING (chan, sf->depth);
OUT_RING (chan, mt->layer_stride >> 2);
+ if (mt->base.status & NVC0_BUFFER_STATUS_GPU_READING)
+ serialize = TRUE;
+ mt->base.status |= NVC0_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= ~NVC0_BUFFER_STATUS_GPU_READING;
+
nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
}
@@ -93,7 +102,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
struct nouveau_bo *bo = mt->base.bo;
int unk = mt->base.base.target == PIPE_TEXTURE_2D;
uint32_t offset = sf->offset;
-
+
+ MARK_RING (chan, 12, 2);
BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
@@ -107,6 +117,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
OUT_RING (chan, sf->height);
OUT_RING (chan, (unk << 16) | sf->depth);
+ if (mt->base.status & NVC0_BUFFER_STATUS_GPU_READING)
+ serialize = TRUE;
+ mt->base.status |= NVC0_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= ~NVC0_BUFFER_STATUS_GPU_READING;
+
nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
} else {
@@ -114,11 +129,10 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
OUT_RING (chan, 0);
}
-#ifndef NVC0_SCISSORS_CLIPPING
- BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
- OUT_RING (chan, fb->width << 16);
- OUT_RING (chan, fb->height << 16);
-#endif
+ if (serialize) {
+ BEGIN_RING(chan, RING_3D(SERIALIZE), 1);
+ OUT_RING (chan, 0);
+ }
}
static void
@@ -160,65 +174,54 @@ nvc0_validate_scissor(struct nvc0_context *nvc0)
{
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct pipe_scissor_state *s = &nvc0->scissor;
-#ifdef NVC0_SCISSORS_CLIPPING
- struct pipe_viewport_state *vp = &nvc0->viewport;
- int minx, maxx, miny, maxy;
- if (!(nvc0->dirty &
- (NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | NVC0_NEW_FRAMEBUFFER)) &&
- nvc0->state.scissor == nvc0->rast->pipe.scissor)
+ if (!(nvc0->dirty & NVC0_NEW_SCISSOR) &&
+ nvc0->rast->pipe.scissor == nvc0->state.scissor)
return;
nvc0->state.scissor = nvc0->rast->pipe.scissor;
- if (nvc0->state.scissor) {
- minx = s->minx;
- maxx = s->maxx;
- miny = s->miny;
- maxy = s->maxy;
+ BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
+ if (nvc0->rast->pipe.scissor) {
+ OUT_RING(chan, (s->maxx << 16) | s->minx);
+ OUT_RING(chan, (s->maxy << 16) | s->miny);
} else {
- minx = 0;
- maxx = nvc0->framebuffer.width;
- miny = 0;
- maxy = nvc0->framebuffer.height;
+ OUT_RING(chan, (0xffff << 16) | 0);
+ OUT_RING(chan, (0xffff << 16) | 0);
}
-
- minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0])));
- maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0])));
- miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1])));
- maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1])));
-
- BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
- OUT_RING (chan, (maxx << 16) | minx);
- OUT_RING (chan, (maxy << 16) | miny);
- BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
- OUT_RING (chan, ((maxx - minx) << 16) | minx);
- OUT_RING (chan, ((maxy - miny) << 16) | miny);
-#else
- BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
- OUT_RING (chan, (s->maxx << 16) | s->minx);
- OUT_RING (chan, (s->maxy << 16) | s->miny);
-#endif
}
static void
nvc0_validate_viewport(struct nvc0_context *nvc0)
{
struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct pipe_viewport_state *vp = &nvc0->viewport;
+ int x, y, w, h;
+ float zmin, zmax;
BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3);
- OUT_RINGf (chan, nvc0->viewport.translate[0]);
- OUT_RINGf (chan, nvc0->viewport.translate[1]);
- OUT_RINGf (chan, nvc0->viewport.translate[2]);
+ OUT_RINGf (chan, vp->translate[0]);
+ OUT_RINGf (chan, vp->translate[1]);
+ OUT_RINGf (chan, vp->translate[2]);
BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3);
- OUT_RINGf (chan, nvc0->viewport.scale[0]);
- OUT_RINGf (chan, nvc0->viewport.scale[1]);
- OUT_RINGf (chan, nvc0->viewport.scale[2]);
+ OUT_RINGf (chan, vp->scale[0]);
+ OUT_RINGf (chan, vp->scale[1]);
+ OUT_RINGf (chan, vp->scale[2]);
-#ifdef NVC0_SCISSORS_CLIPPING
+ /* now set the viewport rectangle to viewport dimensions for clipping */
+
+ x = (int)(vp->translate[0] - fabsf(vp->scale[0]));
+ y = (int)(vp->translate[1] - fabsf(vp->scale[1]));
+ w = (int)fabsf(2.0f * vp->scale[0]);
+ h = (int)fabsf(2.0f * vp->scale[1]);
+ zmin = vp->translate[2] - fabsf(vp->scale[2]);
+ zmax = vp->translate[2] + fabsf(vp->scale[2]);
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
+ OUT_RING (chan, (w << 16) | x);
+ OUT_RING (chan, (h << 16) | y);
BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
- OUT_RINGf (chan, nvc0->viewport.translate[2] - nvc0->viewport.scale[2]);
- OUT_RINGf (chan, nvc0->viewport.translate[2] + nvc0->viewport.scale[2]);
-#endif
+ OUT_RINGf (chan, zmin);
+ OUT_RINGf (chan, zmax);
}
static void
@@ -227,10 +230,15 @@ nvc0_validate_clip(struct nvc0_context *nvc0)
struct nouveau_channel *chan = nvc0->screen->base.channel;
uint32_t clip;
- clip = nvc0->clip.depth_clamp ? 0x201a : 0x0002;
-#ifndef NVC0_SCISSORS_CLIPPING
- clip |= 0x1080;
-#endif
+ if (nvc0->clip.depth_clamp) {
+ clip =
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2;
+ } else {
+ clip = NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1;
+ }
BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1);
OUT_RING (chan, clip);
@@ -238,6 +246,7 @@ nvc0_validate_clip(struct nvc0_context *nvc0)
if (nvc0->clip.nr) {
struct nouveau_bo *bo = nvc0->screen->uniforms;
+ MARK_RING (chan, 6 + nvc0->clip.nr * 4, 2);
BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
OUT_RING (chan, 256);
OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
@@ -281,6 +290,32 @@ nvc0_validate_rasterizer(struct nvc0_context *nvc0)
}
static void
+nvc0_validate_sprite_coords(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ uint32_t reg;
+
+ if (nvc0->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT)
+ reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT;
+ else
+ reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT;
+
+ if (nvc0->rast->pipe.point_quad_rasterization) {
+ uint32_t en = nvc0->rast->pipe.sprite_coord_enable;
+
+ while (en) {
+ int i = ffs(en) - 1;
+ en &= ~(1 << i);
+ if (i >= 0 && i < 8)
+ reg |= 8 << i;
+ }
+ }
+
+ BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1);
+ OUT_RING (chan, reg);
+}
+
+static void
nvc0_constbufs_validate(struct nvc0_context *nvc0)
{
struct nouveau_channel *chan = nvc0->screen->base.channel;
@@ -340,6 +375,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
if (rebind) {
+ MARK_RING (chan, 4, 2);
BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
OUT_RING (chan, align(res->base.width0, 0x100));
OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
@@ -357,6 +393,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
}
nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1);
+ MARK_RING (chan, nr + 5, 2);
BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
OUT_RING (chan, align(res->base.width0, 0x100));
OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
@@ -383,13 +420,7 @@ static struct state_validate {
{ nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR },
{ nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF },
{ nvc0_validate_stipple, NVC0_NEW_STIPPLE },
-#ifdef NVC0_SCISSORS_CLIPPING
- { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT |
- NVC0_NEW_RASTERIZER |
- NVC0_NEW_FRAMEBUFFER },
-#else
- { nvc0_validate_scissor, NVC0_NEW_SCISSOR },
-#endif
+ { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER },
{ nvc0_validate_viewport, NVC0_NEW_VIEWPORT },
{ nvc0_validate_clip, NVC0_NEW_CLIP },
{ nvc0_vertprog_validate, NVC0_NEW_VERTPROG },
@@ -397,10 +428,12 @@ static struct state_validate {
{ nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
{ nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
{ nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
+ { nvc0_validate_sprite_coords, NVC0_NEW_RASTERIZER | NVC0_NEW_FRAGPROG },
{ nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
{ nvc0_validate_textures, NVC0_NEW_TEXTURES },
{ nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
- { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }
+ { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
+ { nvc0_tfb_validate, NVC0_NEW_TFB | NVC0_NEW_TFB_BUFFERS }
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h
index 6c8028aba13..57566128ab5 100644
--- a/src/gallium/drivers/nvc0/nvc0_stateobj.h
+++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h
@@ -4,8 +4,6 @@
#include "pipe/p_state.h"
-#define NVC0_SCISSORS_CLIPPING
-
#define SB_BEGIN_3D(so, m, s) \
(so)->state[(so)->size++] = \
(0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2)
@@ -67,16 +65,17 @@ struct nvc0_vertex_stateobj {
unsigned num_elements;
uint32_t instance_elts;
uint32_t instance_bufs;
+ boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */
unsigned vtx_size;
unsigned vtx_per_packet_max;
- struct nvc0_vertex_element element[1];
+ struct nvc0_vertex_element element[0];
};
/* will have to lookup index -> location qualifier from nvc0_program */
-struct nvc0_tfb_state {
- uint8_t varying_count[4];
+struct nvc0_transform_feedback_state {
uint32_t stride[4];
- uint8_t varying_indices[1];
+ uint8_t varying_count[4];
+ uint8_t varying_index[0];
};
#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
index cc0a65687dc..8898bc733a3 100644
--- a/src/gallium/drivers/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nvc0/nvc0_surface.c
@@ -33,25 +33,15 @@
#include "nv50_defs.xml.h"
+#define NVC0_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL
+
/* return TRUE for formats that can be converted among each other by NVC0_2D */
static INLINE boolean
nvc0_2d_format_faithful(enum pipe_format format)
{
- switch (format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- case PIPE_FORMAT_B8G8R8A8_SRGB:
- case PIPE_FORMAT_B8G8R8X8_SRGB:
- case PIPE_FORMAT_B5G6R5_UNORM:
- case PIPE_FORMAT_B5G5R5A1_UNORM:
- case PIPE_FORMAT_B10G10R10A2_UNORM:
- case PIPE_FORMAT_R8_UNORM:
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- case PIPE_FORMAT_R32G32B32_FLOAT:
- return TRUE;
- default:
- return FALSE;
- }
+ uint8_t id = nvc0_format_table[format].rt;
+
+ return (id >= 0xc0) && (NVC0_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
}
static INLINE uint8_t
@@ -62,7 +52,7 @@ nvc0_2d_format(enum pipe_format format)
/* Hardware values for color formats range from 0xc0 to 0xff,
* but the 2D engine doesn't support all of them.
*/
- if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0))))
+ if (nvc0_2d_format_faithful(format))
return id;
switch (util_format_get_blocksize(format)) {
@@ -72,6 +62,10 @@ nvc0_2d_format(enum pipe_format format)
return NV50_SURFACE_FORMAT_R16_UNORM;
case 4:
return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM;
+ case 8:
+ return NV50_SURFACE_FORMAT_R16G16B16A16_UNORM;
+ case 16:
+ return NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT;
default:
return 0;
}
@@ -249,15 +243,16 @@ nvc0_clear_render_target(struct pipe_context *pipe,
OUT_RING (chan, 1);
OUT_RING (chan, 0);
- /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
-
- BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
- OUT_RING (chan, (width << 16) | dstx);
- OUT_RING (chan, (height << 16) | dsty);
+ BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2);
+ OUT_RING (chan, ((dstx + width) << 16) | dstx);
+ OUT_RING (chan, ((dsty + height) << 16) | dsty);
+ IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1);
BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
OUT_RING (chan, 0x3c);
+ IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0);
+
nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
}
@@ -306,13 +301,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
OUT_RING (chan, sf->height);
OUT_RING (chan, (1 << 16) | 1);
- BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
- OUT_RING (chan, (width << 16) | dstx);
- OUT_RING (chan, (height << 16) | dsty);
+ BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2);
+ OUT_RING (chan, ((dstx + width) << 16) | dstx);
+ OUT_RING (chan, ((dsty + height) << 16) | dsty);
+ IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1);
BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
OUT_RING (chan, mode);
+ IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0);
+
nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
}
diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c
index b219f82c903..968558a5869 100644
--- a/src/gallium/drivers/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nvc0/nvc0_tex.c
@@ -196,9 +196,16 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
OUT_RINGp (chan, &tic->tic[3], 5);
need_flush = TRUE;
+ } else
+ if (res->status & NVC0_BUFFER_STATUS_GPU_WRITING) {
+ BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1);
+ OUT_RING (chan, (tic->id << 4) | 1);
}
nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+ res->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NVC0_BUFFER_STATUS_GPU_READING;
+
nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res,
NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
index 950bee2eda4..a44d330c731 100644
--- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
+++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
@@ -63,7 +63,13 @@ bld_register_access(struct bld_register *reg, unsigned i)
static INLINE void
bld_register_add_val(struct bld_register *reg, struct nv_value *val)
{
- util_dynarray_append(&reg->vals, struct nv_value *, val);
+ struct nv_basic_block *bb = val->insn->bb;
+
+ if (reg->vals.size &&
+ (util_dynarray_top(&reg->vals, struct nv_value *))->insn->bb == bb)
+ *(util_dynarray_top_ptr(&reg->vals, struct nv_value *)) = val;
+ else
+ util_dynarray_append(&reg->vals, struct nv_value *, val);
}
static INLINE boolean
@@ -127,13 +133,10 @@ struct bld_context {
static INLINE ubyte
bld_register_file(struct bld_context *bld, struct bld_register *reg)
{
- if (reg < &bld->avs[0][0]) return NV_FILE_GPR;
- else
- if (reg < &bld->pvs[0][0]) return NV_FILE_GPR;
- else
- if (reg < &bld->ovs[0][0]) return NV_FILE_PRED;
- else
- return NV_FILE_MEM_V;
+ if (reg >= &bld->pvs[0][0] &&
+ reg < &bld->ovs[0][0])
+ return NV_FILE_PRED;
+ return NV_FILE_GPR;
}
static INLINE struct nv_value *
@@ -361,6 +364,9 @@ bld_loop_phi(struct bld_context *bld, struct bld_register *reg,
struct nv_basic_block *bb = bld->pc->current_block;
struct nv_value *val = NULL;
+ if (bld->ti->require_stores) /* XXX: actually only for INDEXABLE_TEMP */
+ return NULL;
+
if (bld->loop_lvl > 1) {
--bld->loop_lvl;
if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl)))
@@ -459,6 +465,7 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
reg = (struct bld_register *)phi->target;
phi->target = NULL;
+ /* start with s == 1, src[0] is from outside the loop */
for (s = 1, n = 0; n < bb->num_in; ++n) {
if (bb->in_kind[n] != CFG_EDGE_BACK)
continue;
@@ -470,8 +477,11 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
for (i = 0; i < 4; ++i)
if (phi->src[i] && phi->src[i]->value == val)
break;
- if (i == 4)
+ if (i == 4) {
+ /* skip values we do not want to replace */
+ for (; phi->src[s] && phi->src[s]->value != phi->def[0]; ++s);
nv_reference(bld->pc, phi, s++, val);
+ }
}
bld->pc->current_block = save;
@@ -563,11 +573,12 @@ bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst,
loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
- loc->reg.id = ofst * 4;
+ loc->reg.address = ofst * 4;
nv_reference(bld->pc, insn, 0, loc);
- nv_reference(bld->pc, insn, 1, ptr);
- nv_reference(bld->pc, insn, 2, val);
+ nv_reference(bld->pc, insn, 1, val);
+ if (ptr)
+ bld_src_pointer(bld, insn, 2, ptr);
}
static struct nv_value *
@@ -579,7 +590,9 @@ bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst)
loc->reg.address = ofst * 4;
- val = bld_insn_2(bld, NV_OP_LD, loc, ptr);
+ val = bld_insn_1(bld, NV_OP_LD, loc);
+ if (ptr)
+ bld_src_pointer(bld, val->insn, 1, ptr);
return val;
}
@@ -650,7 +663,7 @@ bld_kil(struct bld_context *bld, struct nv_value *src)
static void
bld_flow(struct bld_context *bld, uint opcode,
- struct nv_value *src, struct nv_basic_block *target,
+ struct nv_value *pred, uint8_t cc, struct nv_basic_block *target,
boolean reconverge)
{
struct nv_instruction *nvi;
@@ -661,8 +674,10 @@ bld_flow(struct bld_context *bld, uint opcode,
nvi = new_instruction(bld->pc, opcode);
nvi->target = target;
nvi->terminator = 1;
- if (src)
- bld_src_predicate(bld, nvi, 0, src);
+ if (pred) {
+ nvi->cc = cc;
+ bld_src_predicate(bld, nvi, 0, pred);
+ }
}
static ubyte
@@ -878,11 +893,10 @@ emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
break;
case TGSI_FILE_TEMPORARY:
assert(idx < BLD_MAX_TEMPS);
- if (!res->insn)
+ if (!res->insn || res->insn->bb != bld->pc->current_block)
res = bld_insn_1(bld, NV_OP_MOV, res);
assert(res->reg.file == NV_FILE_GPR);
- assert(res->insn->bb = bld->pc->current_block);
if (bld->ti->require_stores)
bld_lmem_store(bld, ptr, idx * 4 + chan, res);
@@ -979,14 +993,6 @@ bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
}
static struct nv_value *
-bld_get_saved_input(struct bld_context *bld, unsigned i, unsigned c)
-{
- if (bld->saved_inputs[i][c])
- return bld->saved_inputs[i][c];
- return NULL;
-}
-
-static struct nv_value *
bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val)
{
unsigned cent = mode & NVC0_INTERP_CENTROID;
@@ -1053,9 +1059,9 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
case TGSI_FILE_INPUT:
assert(!src->Register.Dimension);
if (!ptr) {
- res = bld_get_saved_input(bld, idx, swz);
+ res = bld->saved_inputs[idx][swz];
if (res)
- return res;
+ break;
}
res = new_value(bld->pc, bld->ti->input_file, 4);
if (ptr)
@@ -1173,7 +1179,7 @@ static INLINE void
describe_texture_target(unsigned target, int *dim,
int *array, int *cube, int *shadow)
{
- *array = *cube = *shadow = 0;
+ *dim = *array = *cube = *shadow = 0;
switch (target) {
case TGSI_TEXTURE_1D:
@@ -1199,11 +1205,6 @@ describe_texture_target(unsigned target, int *dim,
*dim = 2;
*cube = 1;
break;
- /*
- case TGSI_TEXTURE_CUBE_ARRAY:
- *dim = 2;
- *cube = *array = 1;
- break;
case TGSI_TEXTURE_1D_ARRAY:
*dim = *array = 1;
break;
@@ -1211,6 +1212,7 @@ describe_texture_target(unsigned target, int *dim,
*dim = 2;
*array = 1;
break;
+ /*
case TGSI_TEXTURE_SHADOW1D_ARRAY:
*dim = *array = *shadow = 1;
break;
@@ -1220,7 +1222,7 @@ describe_texture_target(unsigned target, int *dim,
break;
case TGSI_TEXTURE_CUBE_ARRAY:
*dim = 2;
- *array = *cube = 1;
+ *cube = *array = 1;
break;
*/
default:
@@ -1338,10 +1340,6 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc,
if (array)
arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]);
- /* ensure that all inputs reside in a GPR */
- for (c = 0; c < dim + array + cube + shadow; ++c)
- (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1;
-
/* bind { layer x y z } and { lod/bias shadow } to adjacent regs */
bnd = new_instruction(bld->pc, NV_OP_BIND);
@@ -1383,21 +1381,12 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc,
nvi->tex_dim = dim;
nvi->tex_cube = cube;
nvi->tex_shadow = shadow;
+ nvi->tex_array = array;
nvi->tex_live = 0;
return nvi;
}
-/*
-static boolean
-bld_is_constant(struct nv_value *val)
-{
- if (val->reg.file == NV_FILE_IMM)
- return TRUE;
- return val->insn && nvCG_find_constant(val->insn->src[0]);
-}
-*/
-
static void
bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
const struct tgsi_full_instruction *insn)
@@ -1413,7 +1402,7 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
assert(dim + array + shadow + lodbias <= 5);
- if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP)
+ if (!cube && !array && insn->Instruction.Opcode == TGSI_OPCODE_TXP)
load_proj_tex_coords(bld, t, dim, shadow, insn);
else {
for (c = 0; c < dim + cube + array; ++c)
@@ -1504,10 +1493,10 @@ bld_instruction(struct bld_context *bld,
case TGSI_OPCODE_CMP:
FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
src0 = emit_fetch(bld, insn, 0, c);
- src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero);
src1 = emit_fetch(bld, insn, 1, c);
src2 = emit_fetch(bld, insn, 2, c);
- dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0);
+ dst0[c] = bld_insn_3(bld, NV_OP_SLCT_F32, src1, src2, src0);
+ dst0[c]->insn->set_cond = NV_CC_LT;
}
break;
case TGSI_OPCODE_COS:
@@ -1601,6 +1590,7 @@ bld_instruction(struct bld_context *bld,
case TGSI_OPCODE_IF:
{
struct nv_basic_block *b = new_basic_block(bld->pc);
+ struct nv_value *pred = emit_fetch(bld, insn, 0, 0);
assert(bld->cond_lvl < BLD_MAX_COND_NESTING);
@@ -1609,10 +1599,19 @@ bld_instruction(struct bld_context *bld,
bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
- src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ,
- emit_fetch(bld, insn, 0, 0), bld->zero);
+ if (pred->insn && NV_BASEOP(pred->insn->opcode) == NV_OP_SET) {
+ pred = bld_clone(bld, pred->insn);
+ pred->reg.size = 1;
+ pred->reg.file = NV_FILE_PRED;
+ if (pred->insn->opcode == NV_OP_FSET_F32)
+ pred->insn->opcode = NV_OP_SET_F32;
+ } else {
+ pred = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE | NV_CC_U,
+ pred, bld->zero);
+ }
+ assert(!mask);
- bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0));
+ bld_flow(bld, NV_OP_BRA, pred, NV_CC_NOT_P, NULL, (bld->cond_lvl == 0));
++bld->cond_lvl;
bld_new_block(bld, b);
@@ -1638,6 +1637,10 @@ bld_instruction(struct bld_context *bld,
{
struct nv_basic_block *b = new_basic_block(bld->pc);
+ if (bld->pc->current_block->exit &&
+ !bld->pc->current_block->exit->terminator)
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, b, FALSE);
+
--bld->cond_lvl;
nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind);
nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
@@ -1678,7 +1681,7 @@ bld_instruction(struct bld_context *bld,
{
struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
- bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */
nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
@@ -1690,7 +1693,7 @@ bld_instruction(struct bld_context *bld,
{
struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
- bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
@@ -1705,9 +1708,11 @@ bld_instruction(struct bld_context *bld,
{
struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
- bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+ if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
- nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
+ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
+ }
bld_loop_end(bld, bb); /* replace loop-side operand of the phis */
@@ -1824,11 +1829,11 @@ bld_instruction(struct bld_context *bld,
case TGSI_OPCODE_SSG:
FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */
src0 = emit_fetch(bld, insn, 0, c);
- src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero);
- temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000));
- temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f));
- dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp);
- bld_src_predicate(bld, dst0[c]->insn, 1, src1);
+ src1 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero);
+ src2 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero);
+ src1->insn->set_cond = NV_CC_GT;
+ src2->insn->set_cond = NV_CC_LT;
+ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2);
}
break;
case TGSI_OPCODE_SUB:
@@ -1892,10 +1897,10 @@ bld_instruction(struct bld_context *bld,
}
for (c = 0; c < 4; ++c)
- if ((mask & (1 << c)) &&
- ((dst0[c]->reg.file == NV_FILE_IMM) ||
- (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR)))
- dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]);
+ if (mask & (1 << c))
+ if ((dst0[c]->reg.file == NV_FILE_IMM) ||
+ (dst0[c]->reg.file == NV_FILE_GPR && dst0[c]->reg.id == 63))
+ dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]);
c = 0;
if ((mask & 0x3) == 0x3) {
diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c
index 286b382f58e..b279bdc6e7d 100644
--- a/src/gallium/drivers/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nvc0/nvc0_transfer.c
@@ -10,7 +10,8 @@ struct nvc0_transfer {
struct pipe_transfer base;
struct nvc0_m2mf_rect rect[2];
uint32_t nblocksx;
- uint32_t nblocksy;
+ uint16_t nblocksy;
+ uint16_t nlayers;
};
static void
@@ -242,23 +243,36 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
struct nvc0_miptree_level *lvl = &mt->level[level];
struct nvc0_transfer *tx;
uint32_t size;
- uint32_t w, h, d, z, layer;
+ uint32_t w, h, d, z, layer, box_h, box_y;
int ret;
+ tx = CALLOC_STRUCT(nvc0_transfer);
+ if (!tx)
+ return NULL;
+
+ box_y = box->y;
+ box_h = box->height;
+
if (mt->layout_3d) {
z = box->z;
d = u_minify(res->depth0, level);
layer = 0;
+ tx->nlayers = box->depth;
} else {
z = 0;
d = 1;
- layer = box->z;
+ if (res->target == PIPE_TEXTURE_1D ||
+ res->target == PIPE_TEXTURE_1D_ARRAY) {
+ box_y = 0;
+ box_h = 1;
+ layer = box->y;
+ tx->nlayers = box->height;
+ } else {
+ layer = box->z;
+ tx->nlayers = box->depth;
+ }
}
- tx = CALLOC_STRUCT(nvc0_transfer);
- if (!tx)
- return NULL;
-
pipe_resource_reference(&tx->base.resource, res);
tx->base.level = level;
@@ -266,7 +280,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
tx->base.box = *box;
tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
- tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
+ tx->nblocksy = util_format_get_nblocksy(res->format, box_h);
tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);
tx->base.layer_stride = tx->nblocksy * tx->base.stride;
@@ -280,7 +294,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
tx->rect[0].base = lvl->offset + layer * mt->layer_stride;
tx->rect[0].tile_mode = lvl->tile_mode;
tx->rect[0].x = util_format_get_nblocksx(res->format, box->x);
- tx->rect[0].y = util_format_get_nblocksy(res->format, box->y);
+ tx->rect[0].y = util_format_get_nblocksy(res->format, box_y);
tx->rect[0].z = z;
tx->rect[0].width = util_format_get_nblocksx(res->format, w);
tx->rect[0].height = util_format_get_nblocksy(res->format, h);
@@ -291,7 +305,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
size = tx->base.layer_stride;
ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
- size * tx->base.box.depth, &tx->rect[1].bo);
+ size * tx->nlayers, &tx->rect[1].bo);
if (ret) {
FREE(tx);
return NULL;
@@ -304,8 +318,9 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
tx->rect[1].domain = NOUVEAU_BO_GART;
if (usage & PIPE_TRANSFER_READ) {
+ unsigned base = tx->rect[0].base;
unsigned i;
- for (i = 0; i < box->depth; ++i) {
+ for (i = 0; i < tx->nlayers; ++i) {
nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0],
tx->nblocksx, tx->nblocksy);
if (mt->layout_3d)
@@ -314,9 +329,10 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
tx->rect[0].base += mt->layer_stride;
tx->rect[1].base += size;
}
+ tx->rect[0].z = z;
+ tx->rect[0].base = base;
+ tx->rect[1].base = 0;
}
- tx->rect[0].z = z;
- tx->rect[1].base = 0;
return &tx->base;
}
@@ -331,7 +347,7 @@ nvc0_miptree_transfer_del(struct pipe_context *pctx,
unsigned i;
if (tx->base.usage & PIPE_TRANSFER_WRITE) {
- for (i = 0; i < tx->base.box.depth; ++i) {
+ for (i = 0; i < tx->nlayers; ++i) {
nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1],
tx->nblocksx, tx->nblocksy);
if (mt->layout_3d)
diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c
index a51a887ed89..2db43d8704b 100644
--- a/src/gallium/drivers/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nvc0/nvc0_vbo.c
@@ -54,12 +54,13 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
assert(num_elements);
so = MALLOC(sizeof(*so) +
- (num_elements - 1) * sizeof(struct nvc0_vertex_element));
+ num_elements * sizeof(struct nvc0_vertex_element));
if (!so)
return NULL;
so->num_elements = num_elements;
so->instance_elts = 0;
so->instance_bufs = 0;
+ so->need_conversion = FALSE;
transkey.nr_elements = 0;
transkey.output_stride = 0;
@@ -83,6 +84,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
return NULL;
}
so->element[i].state = nvc0_format_table[fmt].vtx;
+ so->need_conversion = TRUE;
}
so->element[i].state |= i;
@@ -152,7 +154,7 @@ nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi,
if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) {
/* TODO: use min and max instance divisor to get a proper range */
*base = 0;
- *size = (nvc0->vtxbuf[vbi].max_index + 1) * nvc0->vtxbuf[vbi].stride;
+ *size = nvc0->vtxbuf[vbi].buffer->width0;
} else {
assert(nvc0->vbo_max_index != ~0);
*base = nvc0->vbo_min_index * nvc0->vtxbuf[vbi].stride;
@@ -171,12 +173,15 @@ nvc0_prevalidate_vbufs(struct nvc0_context *nvc0)
nvc0->vbo_fifo = nvc0->vbo_user = 0;
+ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX);
+
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
vb = &nvc0->vtxbuf[i];
if (!vb->stride)
continue;
buf = nvc0_resource(vb->buffer);
+ /* NOTE: user buffers with temporary storage count as mapped by GPU */
if (!nvc0_resource_mapped_by_gpu(vb->buffer)) {
if (nvc0->vbo_push_hint) {
nvc0->vbo_fifo = ~0;
@@ -227,16 +232,30 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0)
}
offset = vb->buffer_offset + ve->src_offset;
+ MARK_RING (chan, 6, 4);
BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);
OUT_RING (chan, i);
- OUT_RESRCh(chan, buf, size - 1, NOUVEAU_BO_RD);
- OUT_RESRCl(chan, buf, size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCh(chan, buf, base + size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, buf, base + size - 1, NOUVEAU_BO_RD);
OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD);
OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD);
}
nvc0->vbo_dirty = TRUE;
}
+static INLINE void
+nvc0_release_user_vbufs(struct nvc0_context *nvc0)
+{
+ uint32_t vbo_user = nvc0->vbo_user;
+
+ while (vbo_user) {
+ int i = ffs(vbo_user) - 1;
+ vbo_user &= ~(1 << i);
+
+ nvc0_buffer_release_gpu_storage(nvc0_resource(nvc0->vtxbuf[i].buffer));
+ }
+}
+
void
nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
{
@@ -246,7 +265,12 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
struct nvc0_vertex_element *ve;
unsigned i;
- nvc0_prevalidate_vbufs(nvc0);
+ if (unlikely(vertex->need_conversion)) {
+ nvc0->vbo_fifo = ~0;
+ nvc0->vbo_user = 0;
+ } else {
+ nvc0_prevalidate_vbufs(nvc0);
+ }
BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements);
for (i = 0; i < vertex->num_elements; ++i) {
@@ -292,6 +316,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
size = vb->buffer->width0;
offset = ve->pipe.src_offset + vb->buffer_offset;
+ MARK_RING (chan, 8, 4);
BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
OUT_RING (chan, (1 << 12) | vb->stride);
BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);
@@ -346,57 +371,10 @@ nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan)
{
struct nvc0_context *nvc0 = chan->user_private;
- nvc0_bufctx_emit_relocs(nvc0);
-}
+ nvc0_screen_fence_update(nvc0->screen, TRUE);
-#if 0
-static struct nouveau_bo *
-nvc0_tfb_setup(struct nvc0_context *nvc0)
-{
- struct nouveau_channel *chan = nvc0->screen->base.channel;
- struct nouveau_bo *tfb = NULL;
- int ret, i;
-
- ret = nouveau_bo_new(nvc0->screen->base.device,
- NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &tfb);
- if (ret)
- return NULL;
-
- ret = nouveau_bo_map(tfb, NOUVEAU_BO_WR);
- if (ret)
- return NULL;
- memset(tfb->map, 0xee, 8 * 4 * 3);
- nouveau_bo_unmap(tfb);
-
- BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(0)), 5);
- OUT_RING (chan, 1);
- OUT_RELOCh(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
- OUT_RELOCl(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
- OUT_RING (chan, tfb->size);
- OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID(0) */
- BEGIN_RING(chan, RING_3D(TFB_UNK0700(0)), 3);
- OUT_RING (chan, 0);
- OUT_RING (chan, 8); /* TFB_VARYING_COUNT(0) */
- OUT_RING (chan, 32); /* TFB_BUFFER_STRIDE(0) */
- BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(0)), 2);
- OUT_RING (chan, 0x1f1e1d1c);
- OUT_RING (chan, 0xa3a2a1a0);
- for (i = 1; i < 4; ++i) {
- BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(i)), 1);
- OUT_RING (chan, 0);
- }
- BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, RING_3D_(0x135c), 1);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, RING_3D_(0x135c), 1);
- OUT_RING (chan, 0);
-
- return tfb;
+ nvc0_bufctx_emit_relocs(nvc0);
}
-#endif
static void
nvc0_draw_arrays(struct nvc0_context *nvc0,
@@ -422,7 +400,7 @@ nvc0_draw_arrays(struct nvc0_context *nvc0,
prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
}
- chan->flush_notify = NULL;
+ chan->flush_notify = nvc0_default_flush_notify;
}
static void
@@ -592,7 +570,7 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
}
}
- chan->flush_notify = NULL;
+ chan->flush_notify = nvc0_default_flush_notify;
}
void
@@ -611,6 +589,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvc0->vbo_min_index = info->min_index;
nvc0->vbo_max_index = info->max_index;
+ if (nvc0->vbo_push_hint != !!nvc0->vbo_fifo)
+ nvc0->dirty |= NVC0_NEW_ARRAYS;
+
if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS)))
nvc0_update_user_vbufs(nvc0);
@@ -668,4 +649,6 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
info->mode, info->start, info->count,
info->instance_count, info->index_bias);
}
+
+ nvc0_release_user_vbufs(nvc0);
}
diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h
index 1544fb7a1de..45f71967eff 100644
--- a/src/gallium/drivers/nvc0/nvc0_winsys.h
+++ b/src/gallium/drivers/nvc0/nvc0_winsys.h
@@ -95,7 +95,7 @@ OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res,
unsigned delta, unsigned flags)
{
if (flags & NOUVEAU_BO_WR)
- res->status |= NVC0_BUFFER_STATUS_DIRTY;
+ res->status |= NVC0_BUFFER_STATUS_GPU_WRITING;
return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags);
}