diff options
author | Christian König <[email protected]> | 2011-02-24 22:02:42 +0100 |
---|---|---|
committer | Christian König <[email protected]> | 2011-02-24 22:02:42 +0100 |
commit | b922a0ce12916a91cfc3e56714913fcf63279ff2 (patch) | |
tree | e24fa039925220882d155ccb987f7914e83f4372 /src/gallium/drivers/nvc0 | |
parent | f013b4f8f1329982727691a55cc263e3011d02bf (diff) | |
parent | c0ad70ae31ee5501281b434d56e389fc92b13a3a (diff) |
Merge remote branch 'origin/master' into pipe-video
Conflicts:
configure.ac
src/gallium/auxiliary/Makefile
src/gallium/auxiliary/SConscript
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_state_inlines.h
src/gallium/drivers/r600/r600_texture.c
Diffstat (limited to 'src/gallium/drivers/nvc0')
31 files changed, 1292 insertions, 605 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 61932ff2b6a..73a605f94e1 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -84,6 +84,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210 +#define NVC0_3D_MEM_BARRIER 0x0000021c +#define NVC0_3D_MEM_BARRIER_UNK0 0x00000001 +#define NVC0_3D_MEM_BARRIER_UNK1 0x00000002 +#define NVC0_3D_MEM_BARRIER_UNK2 0x00000004 +#define NVC0_3D_MEM_BARRIER_UNK4 0x00000010 +#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100 +#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000 + #define NVC0_3D_TESS_MODE 0x00000320 #define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f #define NVC0_3D_TESS_MODE_PRIM__SHIFT 0 @@ -122,11 +130,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0)) -#define NVC0_3D_TFB_UNK0700(i0) (0x00000700 + 0x10*(i0)) +#define NVC0_3D_TFB_UNK07X0(i0) (0x00000700 + 0x10*(i0)) +#define NVC0_3D_TFB_UNK07X0__ESIZE 0x00000010 +#define NVC0_3D_TFB_UNK07X0__LEN 0x00000004 #define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0)) +#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010 +#define NVC0_3D_TFB_VARYING_COUNT__LEN 0x00000004 #define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0)) +#define NVC0_3D_TFB_BUFFER_STRIDE__ESIZE 0x00000010 +#define NVC0_3D_TFB_BUFFER_STRIDE__LEN 0x00000004 #define NVC0_3D_TFB_ENABLE 0x00000744 @@ -144,29 +158,29 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_RT__ESIZE 0x00000020 #define NVC0_3D_RT__LEN 0x00000008 -#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x20*(i0)) +#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x40*(i0)) -#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x20*(i0)) +#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x40*(i0)) -#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x20*(i0)) +#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x40*(i0)) -#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x20*(i0)) +#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x40*(i0)) -#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x20*(i0)) +#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x40*(i0)) -#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x20*(i0)) +#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x40*(i0)) #define NVC0_3D_RT_TILE_MODE_UNK0 0x00000001 #define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070 #define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4 #define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700 #define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8 -#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x20*(i0)) +#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x40*(i0)) #define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff #define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0 #define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000 -#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x20*(i0)) +#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x40*(i0)) #define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0)) #define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020 @@ -216,21 +230,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010 #define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ(i0) (0x00000d00 + 0x8*(i0)) -#define NVC0_3D_VIEWPORT_CLIP_HORIZ__ESIZE 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ__LEN 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__MASK 0x0000ffff -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__SHIFT 0 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__MASK 0xffff0000 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__SHIFT 16 - -#define NVC0_3D_VIEWPORT_CLIP_VERT(i0) (0x00000d04 + 0x8*(i0)) -#define NVC0_3D_VIEWPORT_CLIP_VERT__ESIZE 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_VERT__LEN 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__MASK 0x0000ffff -#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__SHIFT 0 -#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__MASK 0xffff0000 -#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__SHIFT 16 +#define NVC0_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16 + +#define NVC0_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_VERT__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_VERT_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_VERT_MAX__SHIFT 16 #define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0)) #define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008 @@ -356,6 +370,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff #define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0 +#define NVC0_3D_CLEAR_FLAGS 0x000010f8 +#define NVC0_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001 +#define NVC0_3D_CLEAR_FLAGS_UNK4 0x00000010 +#define NVC0_3D_CLEAR_FLAGS_SCISSOR 0x00000100 +#define NVC0_3D_CLEAR_FLAGS_VIEWPORT 0x00001000 + #define NVC0_3D_VERTEX_ID 0x00001118 #define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c @@ -561,7 +581,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004 #define NVC0_3D_BLEND_ENABLE__LEN 0x00000008 -#define NVC0_3D_STENCIL_FRONT_ENABLE 0x00001380 +#define NVC0_3D_STENCIL_ENABLE 0x00001380 #define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384 #define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 @@ -605,9 +625,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394 -#define NVC0_3D_STENCIL_FRONT_MASK 0x00001398 +#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x00001398 -#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c +#define NVC0_3D_STENCIL_FRONT_MASK 0x0000139c #define NVC0_3D_DRAW_TFB_BASE 0x000013a4 @@ -642,6 +662,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_CLIPID_HEIGHT 0x00001504 #define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ 0x00001508 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16 + +#define NVC0_3D_CLIPID_FILL_RECT_VERT 0x0000150c +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16 + #define NVC0_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510 #define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001 #define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002 @@ -814,8 +846,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERTEX_BASE_LOW 0x000015f8 #define NVC0_3D_POINT_COORD_REPLACE 0x00001604 -#define NVC0_3D_POINT_COORD_REPLACE_BITS__MASK 0x00001fff -#define NVC0_3D_POINT_COORD_REPLACE_BITS__SHIFT 0 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__MASK 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__SHIFT 2 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT 0x00000000 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__MASK 0x000007f8 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__SHIFT 3 #define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608 @@ -864,8 +900,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000 #define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001 -#define NVC0_3D_POINT_SPRITE_CTRL 0x00001660 - #define NVC0_3D_TEX_MISC 0x00001664 #define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004 @@ -928,22 +962,28 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK3 0x00000008 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK4 0x00000010 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK 0x00000006 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT 1 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 0x00000002 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK2 0x00000004 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12 0x00001000 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13 0x00002000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000 -#define NVC0_3D_VIEWPORT_CLIP_RECTS_EN 0x0000194c +#define NVC0_3D_CLIP_RECTS_EN 0x0000194c -#define NVC0_3D_VIEWPORT_CLIP_MODE 0x00001950 -#define NVC0_3D_VIEWPORT_CLIP_MODE_INSIDE_ANY 0x00000000 -#define NVC0_3D_VIEWPORT_CLIP_MODE_OUTSIDE_ALL 0x00000001 -#define NVC0_3D_VIEWPORT_CLIP_MODE_NEVER 0x00000002 +#define NVC0_3D_CLIP_RECTS_MODE 0x00001950 +#define NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000 +#define NVC0_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001 +#define NVC0_3D_CLIP_RECTS_MODE_NEVER 0x00000002 #define NVC0_3D_FP_ZORDER_CTRL 0x0000196c #define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001 @@ -996,6 +1036,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00 #define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10 +#define NVC0_3D_CLIPID_FILL 0x000019d4 + #define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0)) #define NVC0_3D_COLOR_MASK__ESIZE 0x00000004 #define NVC0_3D_COLOR_MASK__LEN 0x00000008 @@ -1155,9 +1197,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600 -#define NVC0_3D_TFB_VARYING_LOCS(i0) (0x00002800 + 0x4*(i0)) +#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1)) #define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 -#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000080 +#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020 #define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808 diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c index ea3e642a448..aa949bdfa36 100644 --- a/src/gallium/drivers/nvc0/nvc0_buffer.c +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c @@ -59,15 +59,23 @@ release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence) (*mm) = NULL; } -static INLINE boolean -nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf, - unsigned domain) +INLINE void +nvc0_buffer_release_gpu_storage(struct nvc0_resource *buf) { nouveau_bo_ref(NULL, &buf->bo); if (buf->mm) release_allocation(&buf->mm, buf->fence); + buf->domain = 0; +} + +static INLINE boolean +nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf, + unsigned domain) +{ + nvc0_buffer_release_gpu_storage(buf); + return nvc0_buffer_allocate(screen, buf, domain); } @@ -77,10 +85,7 @@ nvc0_buffer_destroy(struct pipe_screen *pscreen, { struct nvc0_resource *res = nvc0_resource(presource); - nouveau_bo_ref(NULL, &res->bo); - - if (res->mm) - release_allocation(&res->mm, res->fence); + nvc0_buffer_release_gpu_storage(res); if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY)) FREE(res->data); @@ -112,7 +117,7 @@ nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf, memcpy(buf->data + start, bounce->map, size); nouveau_bo_unmap(bounce); - buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + buf->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING; nouveau_bo_ref(NULL, &bounce); if (mm) @@ -151,7 +156,7 @@ nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf, release_allocation(&mm, nvc0->screen->fence.current); if (start == 0 && size == buf->base.width0) - buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + buf->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING; return TRUE; } @@ -174,7 +179,7 @@ nvc0_buffer_transfer_get(struct pipe_context *pipe, if (buf->domain == NOUVEAU_BO_VRAM) { if (usage & PIPE_TRANSFER_READ) { - if (buf->status & NVC0_BUFFER_STATUS_DIRTY) + if (buf->status & NVC0_BUFFER_STATUS_GPU_WRITING) nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0); } } diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 2118abb5d5d..f02de4d044a 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -41,17 +41,18 @@ nvc0_flush(struct pipe_context *pipe, unsigned flags, OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); OUT_RING (chan, 0x00); + } else + if ((flags & PIPE_FLUSH_RENDER_CACHE) && !(flags & PIPE_FLUSH_FRAME)) { + BEGIN_RING(chan, RING_3D(SERIALIZE), 1); + OUT_RING (chan, 0); } - if (fence) { - nvc0_screen_fence_new(nvc0->screen, (struct nvc0_fence **)fence, TRUE); - } + if (fence) + nvc0_fence_reference((struct nvc0_fence **)fence, + nvc0->screen->fence.current); - if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) { + if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) FIRE_RING(chan); - - nvc0_screen_fence_next(nvc0->screen); - } } static void @@ -67,6 +68,16 @@ nvc0_destroy(struct pipe_context *pipe) FREE(nvc0); } +void +nvc0_default_flush_notify(struct nouveau_channel *chan) +{ + struct nvc0_context *nvc0 = chan->user_private; + + nvc0_screen_fence_update(nvc0->screen, TRUE); + + nvc0_screen_fence_next(nvc0->screen); +} + struct pipe_context * nvc0_create(struct pipe_screen *pscreen, void *priv) { @@ -91,6 +102,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) nvc0->pipe.flush = nvc0_flush; screen->base.channel->user_private = nvc0; + screen->base.channel->flush_notify = nvc0_default_flush_notify; nvc0_init_query_functions(nvc0); nvc0_init_surface_functions(nvc0); @@ -148,12 +160,14 @@ nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) { struct resident *rsd; struct util_dynarray *array; - unsigned ctx, i; + unsigned ctx, i, n; for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) { array = &nvc0->residents[ctx]; - for (i = 0; i < array->size / sizeof(struct resident); ++i) { + n = array->size / sizeof(struct resident); + MARK_RING(nvc0->screen->base.channel, n, n); + for (i = 0; i < n; ++i) { rsd = util_dynarray_element(array, struct resident, i); nvc0_resource_validate(rsd->res, rsd->flags); diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 94117988e50..1ce5554f7b7 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -54,6 +54,8 @@ #define NVC0_NEW_CONSTBUF (1 << 18) #define NVC0_NEW_TEXTURES (1 << 19) #define NVC0_NEW_SAMPLERS (1 << 20) +#define NVC0_NEW_TFB (1 << 21) +#define NVC0_NEW_TFB_BUFFERS (1 << 22) #define NVC0_BUFCTX_CONSTANT 0 #define NVC0_BUFCTX_FRAME 1 @@ -79,6 +81,7 @@ struct nvc0_context { uint8_t num_vtxelts; uint8_t num_textures[5]; uint8_t num_samplers[5]; + uint8_t tls_required; /* bitmask of shader types using l[] */ uint16_t scissor; uint32_t uniform_buffer_bound[5]; } state; @@ -123,6 +126,11 @@ struct nvc0_context { boolean vbo_dirty; boolean vbo_push_hint; + struct nvc0_transform_feedback_state *tfb; + struct pipe_resource *tfbbuf[4]; + unsigned num_tfbbufs; + unsigned tfb_offset[4]; + struct draw_context *draw; }; @@ -149,6 +157,8 @@ nvc0_surface(struct pipe_surface *ps) /* nvc0_context.c */ struct pipe_context *nvc0_create(struct pipe_screen *, void *); +void nvc0_default_flush_notify(struct nouveau_channel *); + void nvc0_bufctx_emit_relocs(struct nvc0_context *); void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx, struct nvc0_resource *, uint32_t flags); @@ -177,6 +187,8 @@ void nvc0_tevlprog_validate(struct nvc0_context *); void nvc0_gmtyprog_validate(struct nvc0_context *); void nvc0_fragprog_validate(struct nvc0_context *); +void nvc0_tfb_validate(struct nvc0_context *); + /* nvc0_state.c */ extern void nvc0_init_state_functions(struct nvc0_context *); diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c index 9d2c48cf14d..f2d4b1451bf 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.c +++ b/src/gallium/drivers/nvc0/nvc0_fence.c @@ -55,6 +55,7 @@ nvc0_fence_emit(struct nvc0_fence *fence) assert(fence->state == NVC0_FENCE_STATE_AVAILABLE); + MARK_RING (chan, 5, 2); BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); @@ -83,7 +84,8 @@ nvc0_fence_del(struct nvc0_fence *fence) struct nvc0_fence *it; struct nvc0_screen *screen = fence->screen; - if (fence->state == NVC0_FENCE_STATE_EMITTED) { + if (fence->state == NVC0_FENCE_STATE_EMITTED || + fence->state == NVC0_FENCE_STATE_FLUSHED) { if (fence == screen->fence.head) { screen->fence.head = fence->next; if (!screen->fence.head) @@ -118,8 +120,8 @@ nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence) fence->buffers = NULL; } -static void -nvc0_screen_fence_update(struct nvc0_screen *screen) +void +nvc0_screen_fence_update(struct nvc0_screen *screen, boolean flushed) { struct nvc0_fence *fence; struct nvc0_fence *next = NULL; @@ -146,38 +148,43 @@ nvc0_screen_fence_update(struct nvc0_screen *screen) screen->fence.head = next; if (!next) screen->fence.tail = NULL; -} -#define NVC0_FENCE_MAX_SPINS (1 << 17) + if (flushed) { + for (fence = next; fence; fence = fence->next) + fence->state = NVC0_FENCE_STATE_FLUSHED; + } +} boolean nvc0_fence_signalled(struct nvc0_fence *fence) { struct nvc0_screen *screen = fence->screen; - if (fence->state == NVC0_FENCE_STATE_EMITTED) - nvc0_screen_fence_update(screen); + if (fence->state >= NVC0_FENCE_STATE_EMITTED) + nvc0_screen_fence_update(screen, FALSE); return fence->state == NVC0_FENCE_STATE_SIGNALLED; } +#define NVC0_FENCE_MAX_SPINS (1 << 31) + boolean nvc0_fence_wait(struct nvc0_fence *fence) { struct nvc0_screen *screen = fence->screen; - int spins = 0; + uint32_t spins = 0; - if (fence->state == NVC0_FENCE_STATE_AVAILABLE) { + if (fence->state < NVC0_FENCE_STATE_EMITTED) { nvc0_fence_emit(fence); - FIRE_RING(screen->base.channel); - if (fence == screen->fence.current) nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); } + if (fence->state < NVC0_FENCE_STATE_FLUSHED) + FIRE_RING(screen->base.channel); do { - nvc0_screen_fence_update(screen); + nvc0_screen_fence_update(screen, FALSE); if (fence->state == NVC0_FENCE_STATE_SIGNALLED) return TRUE; @@ -188,8 +195,9 @@ nvc0_fence_wait(struct nvc0_fence *fence) #endif } while (spins < NVC0_FENCE_MAX_SPINS); - if (spins > 9000) - NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence); + debug_printf("Wait on fence %u (ack = %u, next = %u) timed out !\n", + fence->sequence, + screen->fence.sequence_ack, screen->fence.sequence); return FALSE; } @@ -199,5 +207,4 @@ nvc0_screen_fence_next(struct nvc0_screen *screen) { nvc0_fence_emit(screen->fence.current); nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); - nvc0_screen_fence_update(screen); } diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h index e63c164bda4..3d8c3f8ba60 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.h +++ b/src/gallium/drivers/nvc0/nvc0_fence.h @@ -7,7 +7,8 @@ #define NVC0_FENCE_STATE_AVAILABLE 0 #define NVC0_FENCE_STATE_EMITTED 1 -#define NVC0_FENCE_STATE_SIGNALLED 2 +#define NVC0_FENCE_STATE_FLUSHED 2 +#define NVC0_FENCE_STATE_SIGNALLED 3 struct nvc0_mm_allocation; diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c index 7c7e134146e..ea3ed9e0225 100644 --- a/src/gallium/drivers/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c @@ -143,8 +143,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, switch (pt->format) { case PIPE_FORMAT_Z16_UNORM: tile_flags = 0x0700; /* COMPRESSED */ - tile_flags = 0x0200; /* NORMAL ? */ - tile_flags = 0x0100; /* NORMAL ? */ + tile_flags = 0x0100; /* NORMAL */ break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: tile_flags = 0x5300; /* MSAA 4, COMPRESSED */ @@ -170,6 +169,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, break; case PIPE_FORMAT_R16G16B16A16_UNORM: tile_flags = 0xe900; /* COMPRESSED */ + tile_flags = 0xfe00; /* NORMAL */ break; default: tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */ @@ -283,7 +283,7 @@ nvc0_miptree_surface_new(struct pipe_context *pipe, pipe_reference_init(&ps->reference, 1); pipe_resource_reference(&ps->texture, pt); ps->context = pipe; - ps->format = pt->format; + ps->format = templ->format; ps->usage = templ->usage; ps->u.tex.level = templ->u.tex.level; ps->u.tex.first_layer = templ->u.tex.first_layer; diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c index 0629dad19c9..516d2e31b55 100644 --- a/src/gallium/drivers/nvc0/nvc0_mm.c +++ b/src/gallium/drivers/nvc0/nvc0_mm.c @@ -96,13 +96,13 @@ mm_bucket_by_size(struct nvc0_mman *cache, unsigned size) static INLINE uint32_t mm_default_slab_size(unsigned chunk_order) { - assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); - static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] = { 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22 }; + assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); + return 1 << slab_order[chunk_order - MM_MIN_ORDER]; } diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index 304a1919768..f51d289e8cd 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -44,6 +44,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s, if (ld->indirect >= 0) return FALSE; + /* a few ops can use g[] sources directly, but we don't support g[] yet */ + if (ld->src[0]->value->reg.file == NV_FILE_MEM_L || + ld->src[0]->value->reg.file == NV_FILE_MEM_G) + return FALSE; + for (i = 0; i < 3 && nvi->src[i]; ++i) if (nvi->src[i]->value->reg.file == NV_FILE_IMM) return FALSE; @@ -55,15 +60,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s, boolean nvc0_insn_is_predicateable(struct nv_instruction *nvi) { - int s; - - if (!nv_op_predicateable(nvi->opcode)) + if (nvi->predicate >= 0) /* already predicated */ return FALSE; - if (nvi->predicate >= 0) + if (!nvc0_op_info_table[nvi->opcode].predicate && + !nvc0_op_info_table[nvi->opcode].pseudo) return FALSE; - for (s = 0; s < 4 && nvi->src[s]; ++s) - if (nvi->src[s]->value->reg.file == NV_FILE_IMM) - return FALSE; return TRUE; } @@ -103,6 +104,12 @@ nvc0_pc_replace_value(struct nv_pc *pc, return n; } +static INLINE boolean +is_gpr63(struct nv_value *val) +{ + return (val->reg.file == NV_FILE_GPR && val->reg.id == 63); +} + struct nv_value * nvc0_pc_find_constant(struct nv_ref *ref) { @@ -116,7 +123,7 @@ nvc0_pc_find_constant(struct nv_ref *ref) assert(!src->insn->src[0]->mod); src = src->insn->src[0]->value; } - if ((src->reg.file == NV_FILE_IMM) || + if ((src->reg.file == NV_FILE_IMM) || is_gpr63(src) || (src->insn && src->insn->opcode == NV_OP_LD && src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && @@ -130,7 +137,7 @@ nvc0_pc_find_immediate(struct nv_ref *ref) { struct nv_value *src = nvc0_pc_find_constant(ref); - return (src && src->reg.file == NV_FILE_IMM) ? src : NULL; + return (src && (src->reg.file == NV_FILE_IMM || is_gpr63(src))) ? src : NULL; } static void @@ -187,7 +194,10 @@ nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, bb[p++] = b->out[j]; break; case CFG_EDGE_LOOP_LEAVE: - bbb[pp++] = b->out[j]; + if (!b->out[j]->priv) { + bbb[pp++] = b->out[j]; + b->out[j]->priv = 1; + } break; default: assert(0); @@ -499,6 +509,9 @@ nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i) i->bb = b; b->num_instructions++; + + if (i->prev && i->prev->terminator) + nvc0_insns_permute(i->prev, i); } void @@ -512,6 +525,8 @@ nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni) ni->prev = at; ni->next->prev = ni; ni->prev->next = ni; + ni->bb = at->bb; + ni->bb->num_instructions++; } void diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 969cc68c596..efa073a9201 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -53,7 +53,8 @@ /** * BIND forces source operand i into the same register as destination operand i, - * and the operands will be assigned consecutive registers (needed for TEX) + * and the operands will be assigned consecutive registers (needed for TEX). + * Beware conflicts ! * SELECT forces its multiple source operands and its destination operand into * one and the same register. */ @@ -204,6 +205,10 @@ #define NV_CC_C 0x11 #define NV_CC_A 0x12 #define NV_CC_S 0x13 +#define NV_CC_INVERSE(cc) ((cc) ^ 0x7) +/* for 1 bit predicates: */ +#define NV_CC_P 0 +#define NV_CC_NOT_P 1 #define NV_PC_MAX_INSTRUCTIONS 2048 #define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) @@ -259,12 +264,6 @@ nv_op_supported_src_mods(uint opcode) return nvc0_op_info_table[opcode].mods; } -static INLINE boolean -nv_op_predicateable(uint opcode) -{ - return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE; -} - static INLINE uint nv_type_order(ubyte type) { @@ -310,7 +309,7 @@ struct nv_reg { int32_t s32; int64_t s64; uint64_t u64; - uint32_t u32; + uint32_t u32; /* expected to be 0 for $r63 */ float f32; double f64; } imm; @@ -344,6 +343,8 @@ struct nv_ref { uint8_t flags; }; +#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0) + struct nv_basic_block; struct nv_instruction { @@ -485,7 +486,7 @@ nv_alloc_instruction(struct nv_pc *pc, uint opcode) assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS); insn->opcode = opcode; - insn->cc = 0; + insn->cc = NV_CC_P; insn->indirect = -1; insn->predicate = -1; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index db8055d91cd..c10f920e6f1 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -236,7 +236,7 @@ emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op) */ pc->emit[0] |= (pcrel & 0x3f) << 26; - pc->emit[1] |= (pcrel >> 6) & 0x1ffff; + pc->emit[1] |= (pcrel >> 6) & 0x3ffff; } } @@ -393,6 +393,8 @@ emit_tex(struct nv_pc *pc, struct nv_instruction *i) { int src1 = i->tex_array + i->tex_dim + i->tex_cube; + assert(src1 < 6); + pc->emit[0] = 0x00000086; pc->emit[1] = 0x80000000; @@ -446,7 +448,7 @@ emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op) pc->emit[0] |= op << 26; - if (op >= 4) { + if (op >= 3) { if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9; if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7; } else { @@ -477,6 +479,7 @@ emit_ddx(struct nv_pc *pc, struct nv_instruction *i) { i->quadop = 0x99; i->lanes = 4; + i->src[1] = i->src[0]; emit_quadop(pc, i); } @@ -485,6 +488,7 @@ emit_ddy(struct nv_pc *pc, struct nv_instruction *i) { i->quadop = 0xa5; i->lanes = 5; + i->src[1] = i->src[0]; emit_quadop(pc, i); } @@ -629,25 +633,28 @@ emit_slct(struct nv_pc *pc, struct nv_instruction *i) static void emit_cvt(struct nv_pc *pc, struct nv_instruction *i) { + uint32_t rint; + pc->emit[0] = 0x00000004; pc->emit[1] = 0x10000000; - if (i->opcode != NV_OP_CVT) + /* if no type conversion specified, get type from opcode */ + if (i->opcode != NV_OP_CVT && i->ext.cvt.d == i->ext.cvt.s) i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode); switch (i->ext.cvt.d) { case NV_TYPE_F32: switch (i->ext.cvt.s) { case NV_TYPE_F32: pc->emit[1] = 0x10000000; break; - case NV_TYPE_S32: pc->emit[0] |= 0x200; + case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */ case NV_TYPE_U32: pc->emit[1] = 0x18000000; break; } break; - case NV_TYPE_S32: pc->emit[0] |= 0x80; + case NV_TYPE_S32: pc->emit[0] |= 0x80; /* fall through */ case NV_TYPE_U32: switch (i->ext.cvt.s) { case NV_TYPE_F32: pc->emit[1] = 0x14000000; break; - case NV_TYPE_S32: pc->emit[0] |= 0x200; + case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */ case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break; } break; @@ -656,14 +663,20 @@ emit_cvt(struct nv_pc *pc, struct nv_instruction *i) break; } - if (i->opcode == NV_OP_FLOOR) - pc->emit[1] |= 0x00020000; - else - if (i->opcode == NV_OP_CEIL) - pc->emit[1] |= 0x00040000; - else - if (i->opcode == NV_OP_TRUNC) - pc->emit[1] |= 0x00060000; + rint = (i->ext.cvt.d == NV_TYPE_F32) ? 1 << 7 : 0; + + if (i->opcode == NV_OP_FLOOR) { + pc->emit[0] |= rint; + pc->emit[1] |= 2 << 16; + } else + if (i->opcode == NV_OP_CEIL) { + pc->emit[0] |= rint; + pc->emit[1] |= 4 << 16; + } else + if (i->opcode == NV_OP_TRUNC) { + pc->emit[0] |= rint; + pc->emit[1] |= 6 << 16; + } if (i->saturate || i->opcode == NV_OP_SAT) pc->emit[0] |= 0x20; @@ -793,11 +806,8 @@ emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i) } static void -emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) +emit_ld_common(struct nv_pc *pc, struct nv_instruction *i) { - pc->emit[0] = 0x00000006; - pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10); - emit_ldst_size(pc, i); set_pred(pc, i); @@ -808,6 +818,15 @@ emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) } static void +emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000006; + pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10); + + emit_ld_common(pc, i); +} + +static void emit_ld(struct nv_pc *pc, struct nv_instruction *i) { if (SFILE(i, 0) >= NV_FILE_MEM_C(0) && @@ -818,6 +837,12 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i) } else { emit_ld_const(pc, i); } + } else + if (SFILE(i, 0) == NV_FILE_MEM_L) { + pc->emit[0] = 0x00000005; + pc->emit[1] = 0xc0000000; + + emit_ld_common(pc, i); } else { NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0)); abort(); @@ -827,8 +852,19 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i) static void emit_st(struct nv_pc *pc, struct nv_instruction *i) { - NOUVEAU_ERR("emit_st: not handled yet\n"); - abort(); + if (SFILE(i, 0) != NV_FILE_MEM_L) + NOUVEAU_ERR("emit_st(%u): file not handled yet\n", SFILE(i, 0)); + + pc->emit[0] = 0x00000005 | (0 << 8); /* write-back caching */ + pc->emit[1] = 0xc8000000; + + emit_ldst_size(pc, i); + + set_pred(pc, i); + set_address_16(pc, i->src[0]); + + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); + DID(pc, i->src[1]->value, 14); } void diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index acc72bff14c..c5a7367a5fd 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -99,6 +99,7 @@ inst_removable(struct nv_instruction *nvi) nvc0_insn_refcount(nvi))); } +/* Check if we do not actually have to emit this instruction. */ static INLINE boolean inst_is_noop(struct nv_instruction *nvi) { @@ -141,9 +142,10 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) struct nv_instruction *nvi, *next; int j; + /* find first non-empty block emitted before b */ for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j); - if (j >= 0) { + for (; j >= 0; --j) { in = pc->bb_list[j]; /* check for no-op branches (BRA $PC+8) */ @@ -157,6 +159,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) nvc0_insn_delete(in->exit); } b->emit_pos = in->emit_pos + in->emit_size; + + if (in->emit_size) /* no more no-op branches to b */ + break; } pc->bb_list[pc->num_blocks++] = b; @@ -240,10 +245,15 @@ check_swap_src_0_1(struct nv_instruction *nvi) struct nv_ref *src0 = nvi->src[0]; struct nv_ref *src1 = nvi->src[1]; - if (!nv_op_commutative(nvi->opcode)) + if (!nv_op_commutative(nvi->opcode) && + NV_BASEOP(nvi->opcode) != NV_OP_SET && + NV_BASEOP(nvi->opcode) != NV_OP_SLCT) return; assert(src0 && src1 && src0->value && src1->value); + if (src1->value->reg.file != NV_FILE_GPR) + return; + if (is_cspace_load(src0->value->insn)) { if (!is_cspace_load(src1->value->insn)) { nvi->src[0] = src1; @@ -258,8 +268,13 @@ check_swap_src_0_1(struct nv_instruction *nvi) } } - if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET) - nvi->set_cond = cc_swapped[nvi->set_cond]; + if (nvi->src[0] != src0) { + if (NV_BASEOP(nvi->opcode) == NV_OP_SET) + nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; + else + if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT) + nvi->set_cond = NV_CC_INVERSE(nvi->set_cond); + } } static void @@ -543,6 +558,7 @@ constant_operand(struct nv_pc *pc, nv_reference(pc, nvi, s, nvi->src[t]->value); nvi->src[s]->mod = nvi->src[t]->mod; } + break; case NV_OP_ADD_F32: if (u.u32 == 0) { switch (nvi->src[t]->mod) { @@ -562,6 +578,7 @@ constant_operand(struct nv_pc *pc, if (nvi->opcode != NV_OP_CVT) nvi->src[0]->mod = 0; } + break; case NV_OP_ADD_B32: if (u.u32 == 0) { assert(nvi->src[t]->mod == 0); @@ -582,7 +599,7 @@ constant_operand(struct nv_pc *pc, } else if (u.s32 > 0 && u.s32 == (1 << shift)) { nvi->opcode = NV_OP_SHL; - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.s32 = shift; nv_reference(pc, nvi, 0, nvi->src[t]->value); nv_reference(pc, nvi, 1, val); break; @@ -590,14 +607,14 @@ constant_operand(struct nv_pc *pc, break; case NV_OP_RCP: u.f32 = 1.0f / u.f32; - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32; nvi->opcode = NV_OP_MOV; assert(s == 0); nv_reference(pc, nvi, 0, val); break; case NV_OP_RSQ: u.f32 = 1.0f / sqrtf(u.f32); - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32; nvi->opcode = NV_OP_MOV; assert(s == 0); nv_reference(pc, nvi, 0, val); @@ -607,25 +624,83 @@ constant_operand(struct nv_pc *pc, } } +static void +handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi) +{ + struct nv_value *src0 = nvi->src[0]->value; + struct nv_value *src1 = nvi->src[1]->value; + + if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod)) + return; + if (src0->reg.file != NV_FILE_GPR) + return; + nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0); + nvc0_insn_delete(nvi); +} + +/* check if we can MUL + ADD -> MAD/FMA */ +static void +handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi) +{ + struct nv_value *src0 = nvi->src[0]->value; + struct nv_value *src1 = nvi->src[1]->value; + struct nv_value *src; + int s; + uint8_t mod[4]; + + if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0; + else + if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1; + else + return; + + if ((src0->insn && src0->insn->bb != nvi->bb) || + (src1->insn && src1->insn->bb != nvi->bb)) + return; + + /* check for immediates from prior constant folding */ + if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) + return; + src = nvi->src[s]->value; + + mod[0] = nvi->src[0]->mod; + mod[1] = nvi->src[1]->mod; + mod[2] = src->insn->src[0]->mod; + mod[3] = src->insn->src[1]->mod; + + if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) + return; + + nvi->opcode = NV_OP_MAD_F32; + + nv_reference(ctx->pc, nvi, s, NULL); + nvi->src[2] = nvi->src[!s]; + nvi->src[!s] = NULL; + + nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value); + nvi->src[0]->mod = mod[2] ^ mod[s]; + nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value); + nvi->src[1]->mod = mod[3]; +} + static int -nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) +nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_instruction *nvi, *next; int j; for (nvi = b->entry; nvi; nvi = next) { - struct nv_value *src0, *src1, *src; - int s; - uint8_t mod[4]; + struct nv_value *src0, *src1; + uint baseop = NV_BASEOP(nvi->opcode); next = nvi->next; src0 = nvc0_pc_find_immediate(nvi->src[0]); src1 = nvc0_pc_find_immediate(nvi->src[1]); - if (src0 && src1) + if (src0 && src1) { constant_expression(ctx->pc, nvi, src0, src1); - else { + } else { if (src0) constant_operand(ctx->pc, nvi, src0, 0); else @@ -633,44 +708,13 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) constant_operand(ctx->pc, nvi, src1, 1); } - /* check if we can MUL + ADD -> MAD/FMA */ - if (nvi->opcode != NV_OP_ADD) - continue; - - src0 = nvi->src[0]->value; - src1 = nvi->src[1]->value; - - if (SRC_IS_MUL(src0) && src0->refc == 1) - src = src0; + if (baseop == NV_OP_MIN || baseop == NV_OP_MAX) + handle_min_max(ctx, nvi); else - if (SRC_IS_MUL(src1) && src1->refc == 1) - src = src1; - else - continue; - - /* could have an immediate from above constant_* */ - if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) - continue; - s = (src == src0) ? 0 : 1; - - mod[0] = nvi->src[0]->mod; - mod[1] = nvi->src[1]->mod; - mod[2] = src->insn->src[0]->mod; - mod[3] = src->insn->src[0]->mod; - - if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) - continue; - - nvi->opcode = NV_OP_MAD; - nv_reference(ctx->pc, nvi, s, NULL); - nvi->src[2] = nvi->src[!s]; - - nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value); - nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value); - nvi->src[0]->mod = mod[2] ^ mod[s]; - nvi->src[1]->mod = mod[3]; + if (nvi->opcode == NV_OP_ADD_F32) + handle_add_mul(ctx, nvi); } - DESCEND_ARBITRARY(j, nv_pass_lower_arith); + DESCEND_ARBITRARY(j, nv_pass_algebraic_opt); return 0; } @@ -700,8 +744,12 @@ struct pass_reld_elim { int alloc; }; +/* Extend the load operation in @rec to also cover the data loaded by @ld. + * The two loads may not overlap but reference adjacent memory locations. + */ static void -combine_load(struct mem_record *rec, struct nv_instruction *ld) +combine_load(struct nv_pc *pc, struct mem_record *rec, + struct nv_instruction *ld) { struct nv_instruction *fv = rec->insn; struct nv_value *mem = ld->src[0]->value; @@ -716,7 +764,7 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld) return; rec->ofst = mem->reg.address; for (j = 0; j < d; ++j) - fv->def[d + j] = fv->def[j]; + fv->def[mem->reg.size / 4 + j] = fv->def[j]; d = 0; } else if ((size == 8 && rec->ofst & 3) || @@ -729,6 +777,9 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld) fv->def[d++]->insn = fv; } + if (fv->src[0]->value->refc > 1) + nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value)); + fv->src[0]->value->reg.address = rec->ofst; fv->src[0]->value->reg.size = rec->size = size; nvc0_insn_delete(ld); @@ -793,6 +844,7 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) ((it->ofst >> 4) == (ofst >> 4)) && ((it->ofst + it->size == ofst) || (it->ofst - mem->reg.size == ofst))) { + /* only NV_OP_VFETCH can load exactly 12 bytes */ if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12) continue; if (it->ofst < ofst) { @@ -808,7 +860,7 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) switch (ld->opcode) { case NV_OP_EXPORT: combine_export(it, ld); break; default: - combine_load(it, ld); + combine_load(ctx->pc, it, ld); break; } } else @@ -817,6 +869,11 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) } } + ctx->alloc = 0; + ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL; + for (s = 0; s < 16; ++s) + ctx->mem_c[s] = NULL; + DESCEND_ARBITRARY(s, nv_pass_mem_opt); return 0; } @@ -1006,7 +1063,6 @@ nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b) return 0; } -#if 0 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE. * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with * BREAK and dummy ELSE block. @@ -1027,61 +1083,187 @@ bb_is_if_else_endif(struct nv_basic_block *bb) } } -/* predicate instructions and remove branch at the end */ +/* Predicate instructions and delete any branch at the end if it is + * not a break from a loop. + */ static void predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b, - struct nv_value *p, ubyte cc) + struct nv_value *pred, uint8_t cc) { + struct nv_instruction *nvi, *prev; + int s; + if (!b->entry) + return; + for (nvi = b->entry; nvi; nvi = nvi->next) { + prev = nvi; + if (inst_is_noop(nvi)) + continue; + for (s = 0; nvi->src[s]; ++s); + assert(s < 6); + nvi->predicate = s; + nvi->cc = cc; + nv_reference(pc, nvi, nvi->predicate, pred); + } + if (prev->opcode == NV_OP_BRA && + b->out_kind[0] != CFG_EDGE_LOOP_LEAVE && + b->out_kind[1] != CFG_EDGE_LOOP_LEAVE) + nvc0_insn_delete(prev); } -#endif -/* NOTE: Run this after register allocation, we can just cut out the cflow - * instructions and hook the predicates to the conditional OPs if they are - * not using immediates; better than inserting SELECT to join definitions. - * - * NOTE: Should adapt prior optimization to make this possible more often. +static INLINE boolean +may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred) +{ + if (nvi->def[0] && values_equal(nvi->def[0], pred)) + return FALSE; + return nvc0_insn_is_predicateable(nvi); +} + +/* Transform IF/ELSE/ENDIF constructs into predicated instructions + * where feasible. */ static int nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) { + struct nv_instruction *nvi; + struct nv_value *pred; + int k; + int n0, n1; /* instruction counts of outgoing blocks */ + + if (bb_is_if_else_endif(b)) { + assert(b->exit && b->exit->opcode == NV_OP_BRA); + + assert(b->exit->predicate >= 0); + pred = b->exit->src[b->exit->predicate]->value; + + n1 = n0 = 0; + for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0) + if (!may_predicate_insn(nvi, pred)) + break; + if (!nvi) { + /* we're after register allocation, so there always is an ELSE block */ + for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1) + if (!may_predicate_insn(nvi, pred)) + break; + } + + /* 12 is an arbitrary limit */ + if (!nvi && n0 < 12 && n1 < 12) { + predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc); + predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc); + + nvc0_insn_delete(b->exit); /* delete the branch */ + + /* and a potential joinat before it */ + if (b->exit && b->exit->opcode == NV_OP_JOINAT) + nvc0_insn_delete(b->exit); + + /* remove join operations at the end of the conditional */ + k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0; + if ((nvi = b->out[0]->out[k]->entry)) { + nvi->join = 0; + if (nvi->opcode == NV_OP_JOIN) + nvc0_insn_delete(nvi); + } + } + } + DESCEND_ARBITRARY(k, nv_pass_flatten); + return 0; } +/* Tests instructions for equality, but independently of sources. */ +static boolean +is_operation_equal(struct nv_instruction *a, struct nv_instruction *b) +{ + if (a->opcode != b->opcode) + return FALSE; + if (nv_is_texture_op(a->opcode)) { + if (a->ext.tex.t != b->ext.tex.t || + a->ext.tex.s != b->ext.tex.s) + return FALSE; + if (a->tex_dim != b->tex_dim || + a->tex_array != b->tex_array || + a->tex_cube != b->tex_cube || + a->tex_shadow != b->tex_shadow || + a->tex_live != b->tex_live) + return FALSE; + } else + if (a->opcode == NV_OP_CVT) { + if (a->ext.cvt.s != b->ext.cvt.s || + a->ext.cvt.d != b->ext.cvt.d) + return FALSE; + } else + if (NV_BASEOP(a->opcode) == NV_OP_SET || + NV_BASEOP(a->opcode) == NV_OP_SLCT) { + if (a->set_cond != b->set_cond) + return FALSE; + } else + if (a->opcode == NV_OP_LINTERP || + a->opcode == NV_OP_PINTERP) { + if (a->centroid != b->centroid || + a->flat != b->flat) + return FALSE; + } + if (a->cc != b->cc) + return FALSE; + if (a->lanes != b->lanes || + a->patch != b->patch || + a->saturate != b->saturate) + return FALSE; + if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */ + return FALSE; + return TRUE; +} + /* local common subexpression elimination, stupid O(n^2) implementation */ static int nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_instruction *ir, *ik, *next; struct nv_instruction *entry = b->phi ? b->phi : b->entry; - int s; + int s, d; unsigned int reps; do { reps = 0; for (ir = entry; ir; ir = next) { next = ir->next; + if (ir->fixed) + continue; for (ik = entry; ik != ir; ik = ik->next) { - if (ir->opcode != ik->opcode || ir->fixed) + if (!is_operation_equal(ir, ik)) continue; - - if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1]) + if (!ir->def[0] || !ik->def[0]) continue; if (ik->indirect != ir->indirect || ik->predicate != ir->predicate) continue; - if (!values_equal(ik->def[0], ir->def[0])) + for (d = 0; d < 4; ++d) { + if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0)) + break; + if (ir->def[d]) { + if (!values_equal(ik->def[0], ir->def[0])) + break; + } else { + d = 4; + break; + } + } + if (d != 4) continue; - for (s = 0; s < 3; ++s) { + for (s = 0; s < 5; ++s) { struct nv_value *a, *b; - if (!ik->src[s]) { - if (ir->src[s]) - break; - continue; + if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0)) + break; + if (!ir->src[s]) { + s = 5; + break; } + if (ik->src[s]->mod != ir->src[s]->mod) break; a = ik->src[s]->value; @@ -1089,14 +1271,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) if (a == b) continue; if (a->reg.file != b->reg.file || - a->reg.id < 0 || + a->reg.id < 0 || /* this excludes memory loads/stores */ a->reg.id != b->reg.id) break; } - if (s == 3) { + if (s == 5) { nvc0_insn_delete(ir); + for (d = 0; d < 4 && ir->def[d]; ++d) + nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]); ++reps; - nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]); break; } } @@ -1110,13 +1293,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy * neighbouring registers. CSE might have messed this up. + * Just generate a MOV for each source to avoid conflicts if they're used in + * multiple NV_OP_BIND at different positions. */ static int nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_value *val; struct nv_instruction *bnd, *nvi, *next; - int s, t; + int s; for (bnd = b->entry; bnd; bnd = next) { next = bnd->next; @@ -1124,20 +1309,17 @@ nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) continue; for (s = 0; s < 4 && bnd->src[s]; ++s) { val = bnd->src[s]->value; - for (t = s + 1; t < 4 && bnd->src[t]; ++t) { - if (bnd->src[t]->value != val) - continue; - nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); - nvi->def[0] = new_value_like(ctx->pc, val); - nvi->def[0]->insn = nvi; - nv_reference(ctx->pc, nvi, 0, val); - nvc0_insn_insert_before(bnd, nvi); - nv_reference(ctx->pc, bnd, t, nvi->def[0]); - } + nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); + nvi->def[0] = new_value_like(ctx->pc, val); + nvi->def[0]->insn = nvi; + nv_reference(ctx->pc, nvi, 0, val); + nv_reference(ctx->pc, bnd, s, nvi->def[0]); + + nvc0_insn_insert_before(bnd, nvi); } } - DESCEND_ARBITRARY(t, nv_pass_fix_bind); + DESCEND_ARBITRARY(s, nv_pass_fix_bind); return 0; } @@ -1153,11 +1335,17 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) pass.n = 0; pass.pc = pc; + /* Do CSE so we can just compare values by pointer in subsequent passes. */ + pc->pass_seq++; + ret = nv_pass_cse(&pass, root); + if (ret) + return ret; + /* Do this first, so we don't have to pay attention * to whether sources are supported memory loads. */ pc->pass_seq++; - ret = nv_pass_lower_arith(&pass, root); + ret = nv_pass_algebraic_opt(&pass, root); if (ret) return ret; @@ -1185,11 +1373,9 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) reldelim->pc = pc; } - pc->pass_seq++; - ret = nv_pass_cse(&pass, root); - if (ret) - return ret; - + /* May run DCE before load-combining since that pass will clean up + * after itself. + */ dce.pc = pc; do { dce.removed = 0; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index b03826484e4..90c669cc4b8 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -225,7 +225,7 @@ nvc0_print_instruction(struct nv_instruction *i) PRINT("%s", gree); if (NV_BASEOP(i->opcode) == NV_OP_SET) - PRINT("set %s", nv_cond_name(i->set_cond)); + PRINT("%s %s", nvc0_opcode_name(i->opcode), nv_cond_name(i->set_cond)); else if (i->saturate) PRINT("sat %s", nvc0_opcode_name(i->opcode)); @@ -278,10 +278,10 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, { NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, - { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 }, { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, @@ -302,7 +302,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_TRUNC, "trunc", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, @@ -343,18 +343,18 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, - { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, - { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 }, { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 }, + { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 }, { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, @@ -363,13 +363,13 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SLCT, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SLCT, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SLCT, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 }, { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index d24f09a1507..f4afe083e2d 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -39,6 +39,30 @@ struct register_set { struct nv_pc *pc; }; +/* aliasing is allowed */ +static void +intersect_register_sets(struct register_set *dst, + struct register_set *src1, struct register_set *src2) +{ + int i; + + for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) { + dst->bits[i][0] = src1->bits[i][0] | src2->bits[i][0]; + dst->bits[i][1] = src1->bits[i][1] | src2->bits[i][1]; + } +} + +static void +mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask) +{ + int i; + + for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) { + set->bits[i][0] = (set->bits[i][0] | mask) & umask; + set->bits[i][1] = (set->bits[i][1] | mask) & umask; + } +} + struct nv_pc_pass { struct nv_pc *pc; struct nv_instruction **insns; @@ -63,6 +87,9 @@ add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range) { struct nv_range *range, **nextp = &val->livei; + if (bgn == end) /* [a, a) is invalid / empty */ + return TRUE; + for (range = val->livei; range; range = range->next) { if (end < range->bgn) break; /* insert before */ @@ -327,14 +354,14 @@ do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) assert(b->join == a->join); } -static INLINE void +static INLINE boolean try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) { if (!join_allowed(ctx, a, b)) { #ifdef NVC0_RA_DEBUG_JOIN debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n); #endif - return; + return FALSE; } if (livei_have_overlap(a->join, b->join)) { #ifdef NVC0_RA_DEBUG_JOIN @@ -342,10 +369,27 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) livei_print(a); livei_print(b); #endif - return; + return FALSE; } do_join_values(ctx, a, b); + + return TRUE; +} + +static void +join_values_nofail(struct nv_pc_pass *ctx, + struct nv_value *a, struct nv_value *b, boolean type_only) +{ + if (type_only) { + assert(join_allowed(ctx, a, b)); + do_join_values(ctx, a, b); + } else { + boolean ok = try_join_values(ctx, a, b); + if (!ok) { + NOUVEAU_ERR("failed to coalesce values\n"); + } + } } static INLINE boolean @@ -360,20 +404,32 @@ need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) return (b->num_in > 1) && (n == 2); } +/* Look for the @phi's operand whose definition reaches @b. */ static int phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, struct nv_basic_block *tb) { + struct nv_ref *srci, *srcj; int i, j; for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) { - if (!nvc0_bblock_reachable_by(b, phi->src[i]->value->insn->bb, tb)) + srci = phi->src[i]; + /* if already replaced, check with original source first */ + if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV) + srci = srci->value->insn->src[0]; + if (!nvc0_bblock_reachable_by(b, srci->value->insn->bb, NULL)) continue; /* NOTE: back-edges are ignored by the reachable-by check */ - if (j < 0 || !nvc0_bblock_reachable_by(phi->src[j]->value->insn->bb, - phi->src[i]->value->insn->bb, tb)) + if (j < 0 || !nvc0_bblock_reachable_by(srcj->value->insn->bb, + srci->value->insn->bb, NULL)) { j = i; + srcj = srci; + } } + if (j >= 0 && nvc0_bblock_reachable_by(b, phi->def[0]->insn->bb, NULL)) + if (!nvc0_bblock_reachable_by(srcj->value->insn->bb, + phi->def[0]->insn->bb, NULL)) + j = -1; return j; } @@ -420,21 +476,23 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) ctx->pc->current_block = pn; for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { - if ((j = phi_opnd_for_bb(i, p, b)) < 0) - continue; - val = i->src[j]->value; - - if (i->src[j]->flags) { - /* value already encountered from a different in-block */ - val = val->insn->src[0]->value; - while (j < 6 && i->src[j]) - ++j; - assert(j < 6); + j = phi_opnd_for_bb(i, p, b); + + if (j < 0) { + val = i->def[0]; + } else { + val = i->src[j]->value; + if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) { + j = -1; + /* use original value, we already encountered & replaced it */ + val = val->insn->src[0]->value; + } } + if (j < 0) /* need an additional source ? */ + for (j = 0; j < 6 && i->src[j] && i->src[j]->value != val; ++j); + assert(j < 6); /* XXX: really ugly shaders */ ni = new_instruction(ctx->pc, NV_OP_MOV); - - /* TODO: insert instruction at correct position in the first place */ if (ni->prev && ni->prev->target) nvc0_insns_permute(ni->prev, ni); @@ -442,7 +500,7 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) ni->def[0]->insn = ni; nv_reference(ctx->pc, ni, 0, val); nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */ - i->src[j]->flags = 1; + i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV; } if (pn != p && pn->exit) { @@ -460,8 +518,13 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) return 0; } +#define JOIN_MASK_PHI (1 << 0) +#define JOIN_MASK_SELECT (1 << 1) +#define JOIN_MASK_MOV (1 << 2) +#define JOIN_MASK_BIND (1 << 3) + static int -pass_join_values(struct nv_pc_pass *ctx, int iter) +pass_join_values(struct nv_pc_pass *ctx, unsigned mask) { int c, n; @@ -470,36 +533,33 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) switch (i->opcode) { case NV_OP_PHI: - if (iter != 2) + if (!(mask & JOIN_MASK_PHI)) break; for (c = 0; c < 6 && i->src[c]; ++c) - try_join_values(ctx, i->def[0], i->src[c]->value); + join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE); break; case NV_OP_MOV: - if ((iter == 2) && i->src[0]->value->insn && - !nv_is_texture_op(i->src[0]->value->join->insn->opcode)) + if (!(mask & JOIN_MASK_MOV)) + break; + if (i->src[0]->value->insn && !i->src[0]->value->insn->def[1]) try_join_values(ctx, i->def[0], i->src[0]->value); break; case NV_OP_SELECT: - if (iter != 1) + if (!(mask & JOIN_MASK_SELECT)) break; - for (c = 0; c < 6 && i->src[c]; ++c) { - assert(join_allowed(ctx, i->def[0], i->src[c]->value)); - do_join_values(ctx, i->def[0], i->src[c]->value); - } - break; - case NV_OP_TEX: - case NV_OP_TXB: - case NV_OP_TXL: - case NV_OP_TXQ: - /* on nvc0, TEX src and dst can differ */ + for (c = 0; c < 6 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE); break; case NV_OP_BIND: - if (iter) + if (!(mask & JOIN_MASK_BIND)) break; - for (c = 0; c < 6 && i->src[c]; ++c) - do_join_values(ctx, i->def[c], i->src[c]->value); + for (c = 0; c < 4 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE); break; + case NV_OP_TEX: + case NV_OP_TXB: + case NV_OP_TXL: + case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */ default: break; } @@ -621,15 +681,16 @@ static void collect_live_values(struct nv_basic_block *b, const int n) { int i; - if (b->out[0]) { - if (b->out[1]) { /* what to do about back-edges ? */ + /* XXX: what to do about back/fake-edges (used to include both here) ? */ + if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) { + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { for (i = 0; i < n; ++i) b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i]; } else { memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t)); } } else - if (b->out[1]) { + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t)); } else { memset(b->live_set, 0, n * sizeof(uint32_t)); @@ -746,42 +807,46 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval) elem->next = nval; } -static int -pass_linear_scan(struct nv_pc_pass *ctx, int iter) +static void +collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head, + boolean assigned_only) { - struct nv_instruction *i; - struct register_set f, free; + struct nv_value *val; int k, n; - struct nv_value *cur, *val, *tmp[2]; - struct nv_value active, inactive, handled, unhandled; - make_empty_list(&active); - make_empty_list(&inactive); - make_empty_list(&handled); - make_empty_list(&unhandled); + make_empty_list(head); - nvc0_ctor_register_set(ctx->pc, &free); - - /* joined values should have range = NULL and thus not be added; - * also, fixed memory values won't be added because they're not - * def'd, just used - */ for (n = 0; n < ctx->num_insns; ++n) { - i = ctx->insns[n]; + struct nv_instruction *i = ctx->insns[n]; + /* for joined values, only the representative will have livei != NULL */ for (k = 0; k < 5; ++k) { if (i->def[k] && i->def[k]->livei) - insert_ordered_tail(&unhandled, i->def[k]); - else - if (0 && i->def[k]) - debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n); + if (!assigned_only || i->def[k]->reg.id >= 0) + insert_ordered_tail(head, i->def[k]); } } - for (val = unhandled.next; val != unhandled.prev; val = val->next) { + for (val = head->next; val != head->prev; val = val->next) { assert(val->join == val); assert(val->livei->bgn <= val->next->livei->bgn); } +} + +static int +pass_linear_scan(struct nv_pc_pass *ctx) +{ + struct register_set f, free; + struct nv_value *cur, *val, *tmp[2]; + struct nv_value active, inactive, handled, unhandled; + + make_empty_list(&active); + make_empty_list(&inactive); + make_empty_list(&handled); + + nvc0_ctor_register_set(ctx->pc, &free); + + collect_register_values(ctx, &unhandled, FALSE); foreach_s(cur, tmp[0], &unhandled) { remove_from_list(cur); @@ -818,14 +883,7 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) reg_occupy(&f, val); if (cur->reg.id < 0) { - boolean mem = FALSE; - int v = nvi_vector_size(cur->insn); - - if (v > 1) - mem = !reg_assign(&f, &cur->insn->def[0], v); - else - if (iter) - mem = !reg_assign(&f, &cur, 1); + boolean mem = !reg_assign(&f, &cur, 1); if (mem) { NOUVEAU_ERR("out of registers\n"); @@ -839,6 +897,68 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) return 0; } +/* Allocate values defined by instructions such as TEX, which have to be + * assigned to consecutive registers. + * Linear scan doesn't really work here since the values can have different + * live intervals. + */ +static int +pass_allocate_constrained_values(struct nv_pc_pass *ctx) +{ + struct nv_value regvals, *val; + struct nv_instruction *i; + struct nv_value *defs[4]; + struct register_set regs[4]; + int n, vsize, c; + uint32_t mask; + boolean mem; + + collect_register_values(ctx, ®vals, TRUE); + + for (n = 0; n < ctx->num_insns; ++n) { + i = ctx->insns[n]; + vsize = nvi_vector_size(i); + if (!(vsize > 1)) + continue; + assert(vsize <= 4); + + for (c = 0; c < vsize; ++c) + defs[c] = i->def[c]->join; + + if (defs[0]->reg.id >= 0) { + for (c = 1; c < vsize; ++c) + assert(defs[c]->reg.id >= 0); + continue; + } + + for (c = 0; c < vsize; ++c) { + nvc0_ctor_register_set(ctx->pc, ®s[c]); + + foreach(val, ®vals) { + if (val->reg.id >= 0 && livei_have_overlap(val, defs[c])) + reg_occupy(®s[c], val); + } + mask = 0x11111111; + if (vsize == 2) /* granularity is 2 and not 4 */ + mask |= 0x11111111 << 2; + mask_register_set(®s[c], 0, mask << c); + + if (defs[c]->livei) + insert_ordered_tail(®vals, defs[c]); + } + for (c = 1; c < vsize; ++c) + intersect_register_sets(®s[0], ®s[0], ®s[c]); + + mem = !reg_assign(®s[0], &defs[0], vsize); + + if (mem) { + NOUVEAU_ERR("out of registers\n"); + abort(); + } + } + return 0; +} + static int nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) { @@ -862,6 +982,10 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) ret = pass_generate_phi_movs(ctx, root); assert(!ret); +#ifdef NVC0_RA_DEBUG_LIVEI + nvc0_print_function(root); +#endif + for (i = 0; i < pc->loop_nesting_bound; ++i) { pc->pass_seq++; ret = pass_build_live_sets(ctx, root); @@ -888,19 +1012,19 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) livei_print(&pc->values[i]); #endif - ret = pass_join_values(ctx, 0); + ret = pass_join_values(ctx, JOIN_MASK_PHI); if (ret) goto out; - ret = pass_linear_scan(ctx, 0); + ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_BIND); if (ret) goto out; - ret = pass_join_values(ctx, 1); + ret = pass_join_values(ctx, JOIN_MASK_MOV); if (ret) goto out; - ret = pass_join_values(ctx, 2); + ret = pass_allocate_constrained_values(ctx); if (ret) goto out; - ret = pass_linear_scan(ctx, 1); + ret = pass_linear_scan(ctx); if (ret) goto out; diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index aefaf7b98ad..899fe147c6a 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -185,8 +185,17 @@ nvc0_varying_location(unsigned sn, unsigned si) return 0x2e0; */ case TGSI_SEMANTIC_GENERIC: + /* We'd really like to distinguish between TEXCOORD and GENERIC here, + * since only 0x300 to 0x37c can be replaced by sprite coordinates. + * Also, gl_PointCoord should be a system value and must be assigned to + * address 0x2e0. For now, let's cheat: + */ assert(si < 31); - return 0x80 + (si * 16); + if (si <= 7) + return 0x300 + si * 16; + if (si == 9) + return 0x2e0; + return 0x80 + ((si - 8) * 16); case TGSI_SEMANTIC_NORMAL: return 0x360; case TGSI_SEMANTIC_PRIMID: @@ -256,12 +265,14 @@ prog_decl(struct nvc0_translation_info *ti, case TGSI_FILE_INPUT: for (i = first; i <= last; ++i) { if (ti->prog->type == PIPE_SHADER_VERTEX) { - sn = TGSI_SEMANTIC_GENERIC; - si = i; + for (c = 0; c < 4; ++c) + ti->input_loc[i][c] = 0x80 + i * 16 + c * 4; + } else { + for (c = 0; c < 4; ++c) + ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + /* for sprite coordinates: */ + ti->prog->fp.in_pos[i] = ti->input_loc[i][0] / 4; } - for (c = 0; c < 4; ++c) - ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; - if (ti->prog->type == PIPE_SHADER_FRAGMENT) ti->interp_mode[i] = nvc0_interp_mode(decl); } @@ -281,6 +292,8 @@ prog_decl(struct nvc0_translation_info *ti, } else { for (c = 0; c < 4; ++c) ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + /* for TFB_VARYING_LOCS: */ + ti->prog->vp.out_pos[i] = ti->output_loc[i][0] / 4; } } break; @@ -288,9 +301,11 @@ prog_decl(struct nvc0_translation_info *ti, ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]); assert(first == last); break; + case TGSI_FILE_TEMPORARY: + ti->temp128_nr = MAX2(ti->temp128_nr, last + 1); + break; case TGSI_FILE_NULL: case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: case TGSI_FILE_SAMPLER: case TGSI_FILE_ADDRESS: case TGSI_FILE_IMMEDIATE: @@ -518,8 +533,13 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) if (!ti->input_access[i][c]) continue; a = ti->input_loc[i][c] / 2; - if ((a & ~7) == 0x70/2) - fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */ + if (ti->input_loc[i][c] >= 0x2c0) + a -= 32; + if (ti->input_loc[i][0] == 0x70) + fp->hdr[5] |= 1 << (28 + c); /* FRAG_COORD_UMASK */ + else + if (ti->input_loc[i][0] == 0x2e0) + fp->hdr[14] |= 1 << (24 + c); /* POINT_COORD */ else fp->hdr[4 + a / 32] |= m << (a % 32); } @@ -618,7 +638,7 @@ nvc0_prog_scan(struct nvc0_translation_info *ti) if (ti->scan.writes_z) prog->flags[0] = 0x11; /* ? */ else - if (!ti->global_stores) + if (!ti->scan.uses_kill && !ti->global_stores) prog->fp.early_z = 1; ret = nvc0_fp_gen_header(prog, ti); @@ -629,6 +649,11 @@ nvc0_prog_scan(struct nvc0_translation_info *ti) break; } + if (ti->require_stores) { + prog->hdr[0] |= 1 << 26; + prog->hdr[1] |= ti->temp128_nr * 16; /* l[] size */ + } + assert(!ret); return ret; } diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h index e6b210d1355..f6fea29780b 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -21,16 +21,18 @@ struct nvc0_program { unsigned code_size; unsigned parm_size; - uint32_t hdr[20]; + uint32_t hdr[20]; /* TODO: move this into code to save space */ uint32_t flags[2]; struct { uint8_t edgeflag; uint8_t num_ucps; + uint8_t out_pos[PIPE_MAX_SHADER_OUTPUTS]; } vp; struct { uint8_t early_z; + uint8_t in_pos[PIPE_MAX_SHADER_INPUTS]; } fp; void *relocs; @@ -74,6 +76,7 @@ struct nvc0_translation_info { uint32_t *immd32; ubyte *immd32_ty; unsigned immd32_nr; + unsigned temp128_nr; ubyte edgeflag_out; struct nvc0_subroutine *subr; unsigned num_subrs; diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index 74c3451c19a..fcbb7da41a3 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -217,6 +217,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) struct push_context ctx; unsigned i, index_size; unsigned inst = info->instance_count; + boolean apply_bias = info->indexed && info->index_bias; ctx.chan = nvc0->screen->base.channel; ctx.translate = nvc0->vertex->translate; @@ -230,7 +231,8 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) data = nvc0_resource_map_offset(nvc0, res, vb->buffer_offset, NOUVEAU_BO_RD); - if (info->indexed) + + if (apply_bias && likely(!(nvc0->vertex->instance_bufs & (1 << i)))) data += info->index_bias * vb->stride; ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c index cc83fbe771c..e5e43c0e7a5 100644 --- a/src/gallium/drivers/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nvc0/nvc0_query.c @@ -312,6 +312,7 @@ nvc0_render_condition(struct pipe_context *pipe, if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + MARK_RING (chan, 5, 2); BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4); OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); @@ -319,6 +320,7 @@ nvc0_render_condition(struct pipe_context *pipe, OUT_RING (chan, 0x00001001); } + MARK_RING (chan, 4, 2); BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3); OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index 17e79642a6d..599823c0dc9 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -24,7 +24,8 @@ struct nvc0_context; * USER_MEMORY: resource->data is a pointer to client memory and may change * between GL calls */ -#define NVC0_BUFFER_STATUS_DIRTY (1 << 0) +#define NVC0_BUFFER_STATUS_GPU_READING (1 << 0) +#define NVC0_BUFFER_STATUS_GPU_WRITING (1 << 1) #define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7) /* Resources, if mapped into the GPU's address space, are guaranteed to @@ -51,6 +52,9 @@ struct nvc0_resource { struct nvc0_mm_allocation *mm; }; +void +nvc0_buffer_release_gpu_storage(struct nvc0_resource *); + boolean nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *, unsigned start, unsigned size); @@ -87,7 +91,7 @@ nvc0_resource_map_offset(struct nvc0_context *nvc0, nvc0_buffer_adjust_score(nvc0, res, -250); if ((res->domain == NOUVEAU_BO_VRAM) && - (res->status & NVC0_BUFFER_STATUS_DIRTY)) + (res->status & NVC0_BUFFER_STATUS_GPU_WRITING)) nvc0_buffer_download(nvc0, res, 0, res->base.width0); if ((res->domain != NOUVEAU_BO_GART) || diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index f608b32e1cb..f7f1fd09a12 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -75,6 +75,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 10; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; + case PIPE_CAP_ARRAY_TEXTURES: + return 1; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_TEXTURE_SWIZZLE: @@ -281,9 +283,6 @@ nvc0_magic_3d_init(struct nouveau_channel *chan) BEGIN_RING(chan, RING_3D_(0x074c), 1); OUT_RING (chan, 0x3f); - BEGIN_RING(chan, RING_3D_(0x10f8), 1); - OUT_RING (chan, 0x0101); - BEGIN_RING(chan, RING_3D_(0x16a8), 1); OUT_RING (chan, (3 << 16) | 3); BEGIN_RING(chan, RING_3D_(0x1794), 1); @@ -476,7 +475,7 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RING (chan, (15 << 4) | 1); } - screen->tls_size = 4 * 4 * 32 * 128 * 4; + screen->tls_size = (16 * 32) * (NVC0_CAP_MAX_PROGRAM_TEMPS * 16); ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, screen->tls_size, &screen->tls); if (ret) @@ -490,6 +489,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RING (chan, screen->tls_size >> 32); OUT_RING (chan, screen->tls_size); + BEGIN_RING(chan, RING_3D_(0x07a0), 1); + OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1); OUT_RING (chan, 0); @@ -532,16 +533,27 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */ OUT_RING (chan, 0x3f); - BEGIN_RING(chan, RING_3D(VIEWPORT_CLIP_RECTS_EN), 1); + BEGIN_RING(chan, RING_3D(CLIP_RECTS_MODE), 1); + OUT_RING (chan, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 8 * 2); + for (i = 0; i < 8 * 2; ++i) + OUT_RING(chan, 0); + BEGIN_RING(chan, RING_3D(CLIP_RECTS_EN), 1); OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1); OUT_RING (chan, 0); + /* neither scissors, viewport nor stencil mask should affect clears */ + BEGIN_RING(chan, RING_3D(CLEAR_FLAGS), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); OUT_RING (chan, 1); BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); OUT_RINGf (chan, 0.0f); OUT_RINGf (chan, 1.0f); + BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); + OUT_RING (chan, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1); /* We use scissors instead of exact view volume clipping, * so they're always enabled. @@ -628,11 +640,14 @@ nvc0_screen_make_buffers_resident(struct nvc0_screen *screen) const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + MARK_RING(chan, 5, 5); nouveau_bo_validate(chan, screen->text, flags); nouveau_bo_validate(chan, screen->uniforms, flags); nouveau_bo_validate(chan, screen->txc, flags); - nouveau_bo_validate(chan, screen->tls, flags); nouveau_bo_validate(chan, screen->mp_stack_bo, flags); + + if (screen->cur_ctx && screen->cur_ctx->state.tls_required) + nouveau_bo_validate(chan, screen->tls, flags); } int diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 1fac142e2be..d952ff1f9b1 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -128,17 +128,25 @@ nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) { struct nvc0_screen *screen = nvc0_screen(res->base.screen); - nouveau_bo_validate(screen->base.channel, res->bo, flags); + if (likely(res->bo)) { + nouveau_bo_validate(screen->base.channel, res->bo, flags); - nvc0_resource_fence(res, flags); + if (flags & NOUVEAU_BO_WR) + res->status |= NVC0_BUFFER_STATUS_GPU_WRITING; + if (flags & NOUVEAU_BO_RD) + res->status |= NVC0_BUFFER_STATUS_GPU_READING; + + nvc0_resource_fence(res, flags); + } } boolean nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit); - void nvc0_screen_fence_next(struct nvc0_screen *); +void +nvc0_screen_fence_update(struct nvc0_screen *, boolean flushed); static INLINE boolean nvc0_screen_fence_emit(struct nvc0_screen *screen) diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index 981b5488d08..357f8b80deb 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -27,6 +27,16 @@ #include "nvc0_context.h" +static INLINE void +nvc0_program_update_context_state(struct nvc0_context *nvc0, + struct nvc0_program *prog, int stage) +{ + if (prog->hdr[1]) + nvc0->state.tls_required |= 1 << stage; + else + nvc0->state.tls_required &= ~(1 << stage); +} + static boolean nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) { @@ -55,7 +65,7 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) prog->code_base + NVC0_SHADER_HEADER_SIZE, prog->code_size, prog->code); - BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x021c), 1); + BEGIN_RING(nvc0->screen->base.channel, RING_3D(MEM_BARRIER), 1); OUT_RING (nvc0->screen->base.channel, 0x1111); return TRUE; @@ -77,6 +87,7 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0) if (!nvc0_program_validate(nvc0, vp)) return; + nvc0_program_update_context_state(nvc0, vp, 0); BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2); OUT_RING (chan, 0x11); @@ -98,6 +109,7 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) if (!nvc0_program_validate(nvc0, fp)) return; + nvc0_program_update_context_state(nvc0, fp, 4); BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1); OUT_RING (chan, fp->fp.early_z); @@ -127,6 +139,7 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0) } if (!nvc0_program_validate(nvc0, tp)) return; + nvc0_program_update_context_state(nvc0, tp, 1); BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2); OUT_RING (chan, 0x21); @@ -148,6 +161,7 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0) } if (!nvc0_program_validate(nvc0, tp)) return; + nvc0_program_update_context_state(nvc0, tp, 2); BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); OUT_RING (chan, 0x31); @@ -170,6 +184,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) } if (!nvc0_program_validate(nvc0, gp)) return; + nvc0_program_update_context_state(nvc0, gp, 3); BEGIN_RING(chan, RING_3D(GP_SELECT), 1); OUT_RING (chan, 0x41); @@ -178,3 +193,59 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1); OUT_RING (chan, gp->max_gpr); } + +/* It's *is* kind of shader related. We need to inspect the program + * to get the output locations right. + */ +void +nvc0_tfb_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *vp; + struct nvc0_transform_feedback_state *tfb = nvc0->tfb; + int b; + + BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); + if (!tfb) { + OUT_RING(chan, 0); + return; + } + OUT_RING(chan, 1); + + vp = nvc0->vertprog ? nvc0->vertprog : nvc0->gmtyprog; + + for (b = 0; b < nvc0->num_tfbbufs; ++b) { + uint8_t idx, var[128]; + int i, n; + struct nvc0_resource *buf = nvc0_resource(nvc0->tfbbuf[b]); + + BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5); + OUT_RING (chan, 1); + OUT_RESRCh(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR); + OUT_RESRCl(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR); + OUT_RING (chan, buf->base.width0 - nvc0->tfb_offset[b]); + OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID <- offset ? */ + + if (!(nvc0->dirty & NVC0_NEW_TFB)) + continue; + + BEGIN_RING(chan, RING_3D(TFB_UNK07X0(b)), 3); + OUT_RING (chan, 0); + OUT_RING (chan, tfb->varying_count[b]); + OUT_RING (chan, tfb->stride[b]); + + n = b ? tfb->varying_count[b - 1] : 0; + i = 0; + for (; i < tfb->varying_count[b]; ++i) { + idx = tfb->varying_index[n + i]; + var[i] = vp->vp.out_pos[idx >> 2] + (idx & 3); + } + for (; i & 3; ++i) + var[i] = 0; + + BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4); + OUT_RINGp (chan, var, i / 4); + } + for (; b < 4; ++b) + IMMED_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 0); +} diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index c08f3693f5e..aa437195764 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -22,6 +22,7 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" @@ -50,40 +51,35 @@ nvc0_colormask(unsigned mask) return ret; } +#define NVC0_BLEND_FACTOR_CASE(a, b) \ + case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b + static INLINE uint32_t nvc0_blend_fac(unsigned factor) { - static const uint16_t bf[] = { - NV50_3D_BLEND_FACTOR_ZERO, /* 0x00 */ - NV50_3D_BLEND_FACTOR_ONE, - NV50_3D_BLEND_FACTOR_SRC_COLOR, - NV50_3D_BLEND_FACTOR_SRC_ALPHA, - NV50_3D_BLEND_FACTOR_DST_ALPHA, - NV50_3D_BLEND_FACTOR_DST_COLOR, - NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE, - NV50_3D_BLEND_FACTOR_CONSTANT_COLOR, - NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA, - NV50_3D_BLEND_FACTOR_SRC1_COLOR, - NV50_3D_BLEND_FACTOR_SRC1_ALPHA, - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0b */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0c */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0d */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0e */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0f */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x10 */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x11 */ - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, - NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, - NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA - }; - - assert(factor < (sizeof(bf) / sizeof(bf[0]))); - return bf[factor]; + switch (factor) { + NVC0_BLEND_FACTOR_CASE(ONE, ONE); + NVC0_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE); + NVC0_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA); + NVC0_BLEND_FACTOR_CASE(ZERO, ZERO); + NVC0_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA); + default: + return NV50_3D_BLEND_FACTOR_ZERO; + } } static void * @@ -176,9 +172,9 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, return NULL; so->pipe = *cso; -#ifndef NVC0_SCISSORS_CLIPPING - SB_IMMED_3D(so, SCISSOR_ENABLE(0), cso->scissor); -#endif + /* Scissor enables are handled in scissor state, we will not want to + * always emit 16 commands, one for each scissor rectangle, here. + */ SB_BEGIN_3D(so, SHADE_MODEL, 1); SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT : @@ -242,7 +238,7 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1); SB_DATA (so, fui(cso->offset_scale)); SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1); - SB_DATA (so, fui(cso->offset_units)); /* XXX: multiply by 2 ? */ + SB_DATA (so, fui(cso->offset_units * 2.0f)); } assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); @@ -283,20 +279,21 @@ nvc0_zsa_state_create(struct pipe_context *pipe, } if (cso->stencil[0].enabled) { - SB_BEGIN_3D(so, STENCIL_FRONT_ENABLE, 5); + SB_BEGIN_3D(so, STENCIL_ENABLE, 5); SB_DATA (so, 1); SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op)); SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func)); - SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2); - SB_DATA (so, cso->stencil[0].writemask); + SB_BEGIN_3D(so, STENCIL_FRONT_FUNC_MASK, 2); SB_DATA (so, cso->stencil[0].valuemask); + SB_DATA (so, cso->stencil[0].writemask); } else { - SB_IMMED_3D(so, STENCIL_FRONT_ENABLE, 0); + SB_IMMED_3D(so, STENCIL_ENABLE, 0); } if (cso->stencil[1].enabled) { + assert(cso->stencil[0].enabled); SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5); SB_DATA (so, 1); SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op)); @@ -306,7 +303,8 @@ nvc0_zsa_state_create(struct pipe_context *pipe, SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2); SB_DATA (so, cso->stencil[1].writemask); SB_DATA (so, cso->stencil[1].valuemask); - } else { + } else + if (cso->stencil[0].enabled) { SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0); } @@ -808,6 +806,74 @@ nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso) nvc0->dirty |= NVC0_NEW_VERTEX; } +static void * +nvc0_tfb_state_create(struct pipe_context *pipe, + const struct pipe_stream_output_state *pso) +{ + struct nvc0_transform_feedback_state *so; + int n = 0; + int i, c, b; + + so = MALLOC(sizeof(*so) + pso->num_outputs * 4 * sizeof(uint8_t)); + if (!so) + return NULL; + + for (b = 0; b < 4; ++b) { + for (i = 0; i < pso->num_outputs; ++i) { + if (pso->output_buffer[i] != b) + continue; + for (c = 0; c < 4; ++c) { + if (!(pso->register_mask[i] & (1 << c))) + continue; + so->varying_count[b]++; + so->varying_index[n++] = (pso->register_index[i] << 2) | c; + } + } + so->stride[b] = so->varying_count[b] * 4; + } + if (pso->stride) + so->stride[0] = pso->stride; + + return so; +} + +static void +nvc0_tfb_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvc0_tfb_state_bind(struct pipe_context *pipe, void *hwcso) +{ + nvc0_context(pipe)->tfb = hwcso; + nvc0_context(pipe)->dirty |= NVC0_NEW_TFB; +} + +static void +nvc0_set_transform_feedback_buffers(struct pipe_context *pipe, + struct pipe_resource **buffers, + int *offsets, + int num_buffers) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + int i; + + assert(num_buffers >= 0 && num_buffers <= 4); /* why signed ? */ + + for (i = 0; i < num_buffers; ++i) { + assert(offsets[i] >= 0); + nvc0->tfb_offset[i] = offsets[i]; + pipe_resource_reference(&nvc0->tfbbuf[i], buffers[i]); + } + for (; i < nvc0->num_tfbbufs; ++i) + pipe_resource_reference(&nvc0->tfbbuf[i], NULL); + + nvc0->num_tfbbufs = num_buffers; + + nvc0->dirty |= NVC0_NEW_TFB_BUFFERS; +} + void nvc0_init_state_functions(struct nvc0_context *nvc0) { @@ -861,5 +927,12 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) nvc0->pipe.set_vertex_buffers = nvc0_set_vertex_buffers; nvc0->pipe.set_index_buffer = nvc0_set_index_buffer; + + nvc0->pipe.create_stream_output_state = nvc0_tfb_state_create; + nvc0->pipe.delete_stream_output_state = nvc0_tfb_state_delete; + nvc0->pipe.bind_stream_output_state = nvc0_tfb_state_bind; + nvc0->pipe.set_stream_output_buffers = nvc0_set_transform_feedback_buffers; + + nvc0->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 25aec0244db..70c418fad9b 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -25,6 +25,7 @@ nvc0_validate_zcull(struct nvc0_context *nvc0) else width = fb->width; + MARK_RING (chan, 23, 4); BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */ OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */ @@ -57,6 +58,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) struct nouveau_channel *chan = nvc0->screen->base.channel; struct pipe_framebuffer_state *fb = &nvc0->framebuffer; unsigned i; + boolean serialize = FALSE; nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME); @@ -66,12 +68,14 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, fb->width << 16); OUT_RING (chan, fb->height << 16); + MARK_RING(chan, 9 * fb->nr_cbufs, 2 * fb->nr_cbufs); + for (i = 0; i < fb->nr_cbufs; ++i) { struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture); struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]); struct nouveau_bo *bo = mt->base.bo; uint32_t offset = sf->offset; - + BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8); OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); @@ -83,6 +87,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, sf->depth); OUT_RING (chan, mt->layer_stride >> 2); + if (mt->base.status & NVC0_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NVC0_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= ~NVC0_BUFFER_STATUS_GPU_READING; + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); } @@ -93,7 +102,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0) struct nouveau_bo *bo = mt->base.bo; int unk = mt->base.base.target == PIPE_TEXTURE_2D; uint32_t offset = sf->offset; - + + MARK_RING (chan, 12, 2); BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); @@ -107,6 +117,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, sf->height); OUT_RING (chan, (unk << 16) | sf->depth); + if (mt->base.status & NVC0_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NVC0_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= ~NVC0_BUFFER_STATUS_GPU_READING; + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); } else { @@ -114,11 +129,10 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, 0); } -#ifndef NVC0_SCISSORS_CLIPPING - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, fb->width << 16); - OUT_RING (chan, fb->height << 16); -#endif + if (serialize) { + BEGIN_RING(chan, RING_3D(SERIALIZE), 1); + OUT_RING (chan, 0); + } } static void @@ -160,65 +174,54 @@ nvc0_validate_scissor(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; struct pipe_scissor_state *s = &nvc0->scissor; -#ifdef NVC0_SCISSORS_CLIPPING - struct pipe_viewport_state *vp = &nvc0->viewport; - int minx, maxx, miny, maxy; - if (!(nvc0->dirty & - (NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | NVC0_NEW_FRAMEBUFFER)) && - nvc0->state.scissor == nvc0->rast->pipe.scissor) + if (!(nvc0->dirty & NVC0_NEW_SCISSOR) && + nvc0->rast->pipe.scissor == nvc0->state.scissor) return; nvc0->state.scissor = nvc0->rast->pipe.scissor; - if (nvc0->state.scissor) { - minx = s->minx; - maxx = s->maxx; - miny = s->miny; - maxy = s->maxy; + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); + if (nvc0->rast->pipe.scissor) { + OUT_RING(chan, (s->maxx << 16) | s->minx); + OUT_RING(chan, (s->maxy << 16) | s->miny); } else { - minx = 0; - maxx = nvc0->framebuffer.width; - miny = 0; - maxy = nvc0->framebuffer.height; + OUT_RING(chan, (0xffff << 16) | 0); + OUT_RING(chan, (0xffff << 16) | 0); } - - minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0]))); - maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0]))); - miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1]))); - maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1]))); - - BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); - OUT_RING (chan, (maxx << 16) | minx); - OUT_RING (chan, (maxy << 16) | miny); - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, ((maxx - minx) << 16) | minx); - OUT_RING (chan, ((maxy - miny) << 16) | miny); -#else - BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); - OUT_RING (chan, (s->maxx << 16) | s->minx); - OUT_RING (chan, (s->maxy << 16) | s->miny); -#endif } static void nvc0_validate_viewport(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; + struct pipe_viewport_state *vp = &nvc0->viewport; + int x, y, w, h; + float zmin, zmax; BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3); - OUT_RINGf (chan, nvc0->viewport.translate[0]); - OUT_RINGf (chan, nvc0->viewport.translate[1]); - OUT_RINGf (chan, nvc0->viewport.translate[2]); + OUT_RINGf (chan, vp->translate[0]); + OUT_RINGf (chan, vp->translate[1]); + OUT_RINGf (chan, vp->translate[2]); BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3); - OUT_RINGf (chan, nvc0->viewport.scale[0]); - OUT_RINGf (chan, nvc0->viewport.scale[1]); - OUT_RINGf (chan, nvc0->viewport.scale[2]); + OUT_RINGf (chan, vp->scale[0]); + OUT_RINGf (chan, vp->scale[1]); + OUT_RINGf (chan, vp->scale[2]); -#ifdef NVC0_SCISSORS_CLIPPING + /* now set the viewport rectangle to viewport dimensions for clipping */ + + x = (int)(vp->translate[0] - fabsf(vp->scale[0])); + y = (int)(vp->translate[1] - fabsf(vp->scale[1])); + w = (int)fabsf(2.0f * vp->scale[0]); + h = (int)fabsf(2.0f * vp->scale[1]); + zmin = vp->translate[2] - fabsf(vp->scale[2]); + zmax = vp->translate[2] + fabsf(vp->scale[2]); + + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); + OUT_RING (chan, (w << 16) | x); + OUT_RING (chan, (h << 16) | y); BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); - OUT_RINGf (chan, nvc0->viewport.translate[2] - nvc0->viewport.scale[2]); - OUT_RINGf (chan, nvc0->viewport.translate[2] + nvc0->viewport.scale[2]); -#endif + OUT_RINGf (chan, zmin); + OUT_RINGf (chan, zmax); } static void @@ -227,10 +230,15 @@ nvc0_validate_clip(struct nvc0_context *nvc0) struct nouveau_channel *chan = nvc0->screen->base.channel; uint32_t clip; - clip = nvc0->clip.depth_clamp ? 0x201a : 0x0002; -#ifndef NVC0_SCISSORS_CLIPPING - clip |= 0x1080; -#endif + if (nvc0->clip.depth_clamp) { + clip = + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2; + } else { + clip = NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1; + } BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); OUT_RING (chan, clip); @@ -238,6 +246,7 @@ nvc0_validate_clip(struct nvc0_context *nvc0) if (nvc0->clip.nr) { struct nouveau_bo *bo = nvc0->screen->uniforms; + MARK_RING (chan, 6 + nvc0->clip.nr * 4, 2); BEGIN_RING(chan, RING_3D(CB_SIZE), 3); OUT_RING (chan, 256); OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); @@ -281,6 +290,32 @@ nvc0_validate_rasterizer(struct nvc0_context *nvc0) } static void +nvc0_validate_sprite_coords(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + uint32_t reg; + + if (nvc0->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT) + reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT; + else + reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT; + + if (nvc0->rast->pipe.point_quad_rasterization) { + uint32_t en = nvc0->rast->pipe.sprite_coord_enable; + + while (en) { + int i = ffs(en) - 1; + en &= ~(1 << i); + if (i >= 0 && i < 8) + reg |= 8 << i; + } + } + + BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1); + OUT_RING (chan, reg); +} + +static void nvc0_constbufs_validate(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; @@ -340,6 +375,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); if (rebind) { + MARK_RING (chan, 4, 2); BEGIN_RING(chan, RING_3D(CB_SIZE), 3); OUT_RING (chan, align(res->base.width0, 0x100)); OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); @@ -357,6 +393,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) } nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1); + MARK_RING (chan, nr + 5, 2); BEGIN_RING(chan, RING_3D(CB_SIZE), 3); OUT_RING (chan, align(res->base.width0, 0x100)); OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); @@ -383,13 +420,7 @@ static struct state_validate { { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR }, { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF }, { nvc0_validate_stipple, NVC0_NEW_STIPPLE }, -#ifdef NVC0_SCISSORS_CLIPPING - { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | - NVC0_NEW_RASTERIZER | - NVC0_NEW_FRAMEBUFFER }, -#else - { nvc0_validate_scissor, NVC0_NEW_SCISSOR }, -#endif + { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER }, { nvc0_validate_viewport, NVC0_NEW_VIEWPORT }, { nvc0_validate_clip, NVC0_NEW_CLIP }, { nvc0_vertprog_validate, NVC0_NEW_VERTPROG }, @@ -397,10 +428,12 @@ static struct state_validate { { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, + { nvc0_validate_sprite_coords, NVC0_NEW_RASTERIZER | NVC0_NEW_FRAGPROG }, { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, { nvc0_validate_textures, NVC0_NEW_TEXTURES }, { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, - { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS } + { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, + { nvc0_tfb_validate, NVC0_NEW_TFB | NVC0_NEW_TFB_BUFFERS } }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index 6c8028aba13..57566128ab5 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -4,8 +4,6 @@ #include "pipe/p_state.h" -#define NVC0_SCISSORS_CLIPPING - #define SB_BEGIN_3D(so, m, s) \ (so)->state[(so)->size++] = \ (0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2) @@ -67,16 +65,17 @@ struct nvc0_vertex_stateobj { unsigned num_elements; uint32_t instance_elts; uint32_t instance_bufs; + boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */ unsigned vtx_size; unsigned vtx_per_packet_max; - struct nvc0_vertex_element element[1]; + struct nvc0_vertex_element element[0]; }; /* will have to lookup index -> location qualifier from nvc0_program */ -struct nvc0_tfb_state { - uint8_t varying_count[4]; +struct nvc0_transform_feedback_state { uint32_t stride[4]; - uint8_t varying_indices[1]; + uint8_t varying_count[4]; + uint8_t varying_index[0]; }; #endif diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index cc0a65687dc..8898bc733a3 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -33,25 +33,15 @@ #include "nv50_defs.xml.h" +#define NVC0_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL + /* return TRUE for formats that can be converted among each other by NVC0_2D */ static INLINE boolean nvc0_2d_format_faithful(enum pipe_format format) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_B8G8R8X8_SRGB: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - return TRUE; - default: - return FALSE; - } + uint8_t id = nvc0_format_table[format].rt; + + return (id >= 0xc0) && (NVC0_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))); } static INLINE uint8_t @@ -62,7 +52,7 @@ nvc0_2d_format(enum pipe_format format) /* Hardware values for color formats range from 0xc0 to 0xff, * but the 2D engine doesn't support all of them. */ - if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0)))) + if (nvc0_2d_format_faithful(format)) return id; switch (util_format_get_blocksize(format)) { @@ -72,6 +62,10 @@ nvc0_2d_format(enum pipe_format format) return NV50_SURFACE_FORMAT_R16_UNORM; case 4: return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM; + case 8: + return NV50_SURFACE_FORMAT_R16G16B16A16_UNORM; + case 16: + return NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT; default: return 0; } @@ -249,15 +243,16 @@ nvc0_clear_render_target(struct pipe_context *pipe, OUT_RING (chan, 1); OUT_RING (chan, 0); - /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ - - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, (width << 16) | dstx); - OUT_RING (chan, (height << 16) | dsty); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2); + OUT_RING (chan, ((dstx + width) << 16) | dstx); + OUT_RING (chan, ((dsty + height) << 16) | dsty); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1); BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); OUT_RING (chan, 0x3c); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0); + nv50->dirty |= NVC0_NEW_FRAMEBUFFER; } @@ -306,13 +301,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, OUT_RING (chan, sf->height); OUT_RING (chan, (1 << 16) | 1); - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, (width << 16) | dstx); - OUT_RING (chan, (height << 16) | dsty); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2); + OUT_RING (chan, ((dstx + width) << 16) | dstx); + OUT_RING (chan, ((dsty + height) << 16) | dsty); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1); BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); OUT_RING (chan, mode); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0); + nv50->dirty |= NVC0_NEW_FRAMEBUFFER; } diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c index b219f82c903..968558a5869 100644 --- a/src/gallium/drivers/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nvc0/nvc0_tex.c @@ -196,9 +196,16 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) OUT_RINGp (chan, &tic->tic[3], 5); need_flush = TRUE; + } else + if (res->status & NVC0_BUFFER_STATUS_GPU_WRITING) { + BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); + OUT_RING (chan, (tic->id << 4) | 1); } nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + res->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING; + res->status |= NVC0_BUFFER_STATUS_GPU_READING; + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 950bee2eda4..a44d330c731 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -63,7 +63,13 @@ bld_register_access(struct bld_register *reg, unsigned i) static INLINE void bld_register_add_val(struct bld_register *reg, struct nv_value *val) { - util_dynarray_append(®->vals, struct nv_value *, val); + struct nv_basic_block *bb = val->insn->bb; + + if (reg->vals.size && + (util_dynarray_top(®->vals, struct nv_value *))->insn->bb == bb) + *(util_dynarray_top_ptr(®->vals, struct nv_value *)) = val; + else + util_dynarray_append(®->vals, struct nv_value *, val); } static INLINE boolean @@ -127,13 +133,10 @@ struct bld_context { static INLINE ubyte bld_register_file(struct bld_context *bld, struct bld_register *reg) { - if (reg < &bld->avs[0][0]) return NV_FILE_GPR; - else - if (reg < &bld->pvs[0][0]) return NV_FILE_GPR; - else - if (reg < &bld->ovs[0][0]) return NV_FILE_PRED; - else - return NV_FILE_MEM_V; + if (reg >= &bld->pvs[0][0] && + reg < &bld->ovs[0][0]) + return NV_FILE_PRED; + return NV_FILE_GPR; } static INLINE struct nv_value * @@ -361,6 +364,9 @@ bld_loop_phi(struct bld_context *bld, struct bld_register *reg, struct nv_basic_block *bb = bld->pc->current_block; struct nv_value *val = NULL; + if (bld->ti->require_stores) /* XXX: actually only for INDEXABLE_TEMP */ + return NULL; + if (bld->loop_lvl > 1) { --bld->loop_lvl; if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl))) @@ -459,6 +465,7 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) reg = (struct bld_register *)phi->target; phi->target = NULL; + /* start with s == 1, src[0] is from outside the loop */ for (s = 1, n = 0; n < bb->num_in; ++n) { if (bb->in_kind[n] != CFG_EDGE_BACK) continue; @@ -470,8 +477,11 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) for (i = 0; i < 4; ++i) if (phi->src[i] && phi->src[i]->value == val) break; - if (i == 4) + if (i == 4) { + /* skip values we do not want to replace */ + for (; phi->src[s] && phi->src[s]->value != phi->def[0]; ++s); nv_reference(bld->pc, phi, s++, val); + } } bld->pc->current_block = save; @@ -563,11 +573,12 @@ bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst, loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); - loc->reg.id = ofst * 4; + loc->reg.address = ofst * 4; nv_reference(bld->pc, insn, 0, loc); - nv_reference(bld->pc, insn, 1, ptr); - nv_reference(bld->pc, insn, 2, val); + nv_reference(bld->pc, insn, 1, val); + if (ptr) + bld_src_pointer(bld, insn, 2, ptr); } static struct nv_value * @@ -579,7 +590,9 @@ bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst) loc->reg.address = ofst * 4; - val = bld_insn_2(bld, NV_OP_LD, loc, ptr); + val = bld_insn_1(bld, NV_OP_LD, loc); + if (ptr) + bld_src_pointer(bld, val->insn, 1, ptr); return val; } @@ -650,7 +663,7 @@ bld_kil(struct bld_context *bld, struct nv_value *src) static void bld_flow(struct bld_context *bld, uint opcode, - struct nv_value *src, struct nv_basic_block *target, + struct nv_value *pred, uint8_t cc, struct nv_basic_block *target, boolean reconverge) { struct nv_instruction *nvi; @@ -661,8 +674,10 @@ bld_flow(struct bld_context *bld, uint opcode, nvi = new_instruction(bld->pc, opcode); nvi->target = target; nvi->terminator = 1; - if (src) - bld_src_predicate(bld, nvi, 0, src); + if (pred) { + nvi->cc = cc; + bld_src_predicate(bld, nvi, 0, pred); + } } static ubyte @@ -878,11 +893,10 @@ emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst, break; case TGSI_FILE_TEMPORARY: assert(idx < BLD_MAX_TEMPS); - if (!res->insn) + if (!res->insn || res->insn->bb != bld->pc->current_block) res = bld_insn_1(bld, NV_OP_MOV, res); assert(res->reg.file == NV_FILE_GPR); - assert(res->insn->bb = bld->pc->current_block); if (bld->ti->require_stores) bld_lmem_store(bld, ptr, idx * 4 + chan, res); @@ -979,14 +993,6 @@ bld_new_block(struct bld_context *bld, struct nv_basic_block *b) } static struct nv_value * -bld_get_saved_input(struct bld_context *bld, unsigned i, unsigned c) -{ - if (bld->saved_inputs[i][c]) - return bld->saved_inputs[i][c]; - return NULL; -} - -static struct nv_value * bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val) { unsigned cent = mode & NVC0_INTERP_CENTROID; @@ -1053,9 +1059,9 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, case TGSI_FILE_INPUT: assert(!src->Register.Dimension); if (!ptr) { - res = bld_get_saved_input(bld, idx, swz); + res = bld->saved_inputs[idx][swz]; if (res) - return res; + break; } res = new_value(bld->pc, bld->ti->input_file, 4); if (ptr) @@ -1173,7 +1179,7 @@ static INLINE void describe_texture_target(unsigned target, int *dim, int *array, int *cube, int *shadow) { - *array = *cube = *shadow = 0; + *dim = *array = *cube = *shadow = 0; switch (target) { case TGSI_TEXTURE_1D: @@ -1199,11 +1205,6 @@ describe_texture_target(unsigned target, int *dim, *dim = 2; *cube = 1; break; - /* - case TGSI_TEXTURE_CUBE_ARRAY: - *dim = 2; - *cube = *array = 1; - break; case TGSI_TEXTURE_1D_ARRAY: *dim = *array = 1; break; @@ -1211,6 +1212,7 @@ describe_texture_target(unsigned target, int *dim, *dim = 2; *array = 1; break; + /* case TGSI_TEXTURE_SHADOW1D_ARRAY: *dim = *array = *shadow = 1; break; @@ -1220,7 +1222,7 @@ describe_texture_target(unsigned target, int *dim, break; case TGSI_TEXTURE_CUBE_ARRAY: *dim = 2; - *array = *cube = 1; + *cube = *array = 1; break; */ default: @@ -1338,10 +1340,6 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, if (array) arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]); - /* ensure that all inputs reside in a GPR */ - for (c = 0; c < dim + array + cube + shadow; ++c) - (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1; - /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */ bnd = new_instruction(bld->pc, NV_OP_BIND); @@ -1383,21 +1381,12 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, nvi->tex_dim = dim; nvi->tex_cube = cube; nvi->tex_shadow = shadow; + nvi->tex_array = array; nvi->tex_live = 0; return nvi; } -/* -static boolean -bld_is_constant(struct nv_value *val) -{ - if (val->reg.file == NV_FILE_IMM) - return TRUE; - return val->insn && nvCG_find_constant(val->insn->src[0]); -} -*/ - static void bld_tex(struct bld_context *bld, struct nv_value *dst0[4], const struct tgsi_full_instruction *insn) @@ -1413,7 +1402,7 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4], assert(dim + array + shadow + lodbias <= 5); - if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP) + if (!cube && !array && insn->Instruction.Opcode == TGSI_OPCODE_TXP) load_proj_tex_coords(bld, t, dim, shadow, insn); else { for (c = 0; c < dim + cube + array; ++c) @@ -1504,10 +1493,10 @@ bld_instruction(struct bld_context *bld, case TGSI_OPCODE_CMP: FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { src0 = emit_fetch(bld, insn, 0, c); - src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero); src1 = emit_fetch(bld, insn, 1, c); src2 = emit_fetch(bld, insn, 2, c); - dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0); + dst0[c] = bld_insn_3(bld, NV_OP_SLCT_F32, src1, src2, src0); + dst0[c]->insn->set_cond = NV_CC_LT; } break; case TGSI_OPCODE_COS: @@ -1601,6 +1590,7 @@ bld_instruction(struct bld_context *bld, case TGSI_OPCODE_IF: { struct nv_basic_block *b = new_basic_block(bld->pc); + struct nv_value *pred = emit_fetch(bld, insn, 0, 0); assert(bld->cond_lvl < BLD_MAX_COND_NESTING); @@ -1609,10 +1599,19 @@ bld_instruction(struct bld_context *bld, bld->join_bb[bld->cond_lvl] = bld->pc->current_block; bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; - src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ, - emit_fetch(bld, insn, 0, 0), bld->zero); + if (pred->insn && NV_BASEOP(pred->insn->opcode) == NV_OP_SET) { + pred = bld_clone(bld, pred->insn); + pred->reg.size = 1; + pred->reg.file = NV_FILE_PRED; + if (pred->insn->opcode == NV_OP_FSET_F32) + pred->insn->opcode = NV_OP_SET_F32; + } else { + pred = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE | NV_CC_U, + pred, bld->zero); + } + assert(!mask); - bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0)); + bld_flow(bld, NV_OP_BRA, pred, NV_CC_NOT_P, NULL, (bld->cond_lvl == 0)); ++bld->cond_lvl; bld_new_block(bld, b); @@ -1638,6 +1637,10 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *b = new_basic_block(bld->pc); + if (bld->pc->current_block->exit && + !bld->pc->current_block->exit->terminator) + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, b, FALSE); + --bld->cond_lvl; nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind); nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); @@ -1678,7 +1681,7 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); @@ -1690,7 +1693,7 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); @@ -1705,9 +1708,11 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */ + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); - nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + } bld_loop_end(bld, bb); /* replace loop-side operand of the phis */ @@ -1824,11 +1829,11 @@ bld_instruction(struct bld_context *bld, case TGSI_OPCODE_SSG: FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */ src0 = emit_fetch(bld, insn, 0, c); - src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero); - temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000)); - temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f)); - dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp); - bld_src_predicate(bld, dst0[c]->insn, 1, src1); + src1 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero); + src2 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero); + src1->insn->set_cond = NV_CC_GT; + src2->insn->set_cond = NV_CC_LT; + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2); } break; case TGSI_OPCODE_SUB: @@ -1892,10 +1897,10 @@ bld_instruction(struct bld_context *bld, } for (c = 0; c < 4; ++c) - if ((mask & (1 << c)) && - ((dst0[c]->reg.file == NV_FILE_IMM) || - (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR))) - dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); + if (mask & (1 << c)) + if ((dst0[c]->reg.file == NV_FILE_IMM) || + (dst0[c]->reg.file == NV_FILE_GPR && dst0[c]->reg.id == 63)) + dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); c = 0; if ((mask & 0x3) == 0x3) { diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index 286b382f58e..b279bdc6e7d 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -10,7 +10,8 @@ struct nvc0_transfer { struct pipe_transfer base; struct nvc0_m2mf_rect rect[2]; uint32_t nblocksx; - uint32_t nblocksy; + uint16_t nblocksy; + uint16_t nlayers; }; static void @@ -242,23 +243,36 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, struct nvc0_miptree_level *lvl = &mt->level[level]; struct nvc0_transfer *tx; uint32_t size; - uint32_t w, h, d, z, layer; + uint32_t w, h, d, z, layer, box_h, box_y; int ret; + tx = CALLOC_STRUCT(nvc0_transfer); + if (!tx) + return NULL; + + box_y = box->y; + box_h = box->height; + if (mt->layout_3d) { z = box->z; d = u_minify(res->depth0, level); layer = 0; + tx->nlayers = box->depth; } else { z = 0; d = 1; - layer = box->z; + if (res->target == PIPE_TEXTURE_1D || + res->target == PIPE_TEXTURE_1D_ARRAY) { + box_y = 0; + box_h = 1; + layer = box->y; + tx->nlayers = box->height; + } else { + layer = box->z; + tx->nlayers = box->depth; + } } - tx = CALLOC_STRUCT(nvc0_transfer); - if (!tx) - return NULL; - pipe_resource_reference(&tx->base.resource, res); tx->base.level = level; @@ -266,7 +280,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->base.box = *box; tx->nblocksx = util_format_get_nblocksx(res->format, box->width); - tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + tx->nblocksy = util_format_get_nblocksy(res->format, box_h); tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); tx->base.layer_stride = tx->nblocksy * tx->base.stride; @@ -280,7 +294,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[0].base = lvl->offset + layer * mt->layer_stride; tx->rect[0].tile_mode = lvl->tile_mode; tx->rect[0].x = util_format_get_nblocksx(res->format, box->x); - tx->rect[0].y = util_format_get_nblocksy(res->format, box->y); + tx->rect[0].y = util_format_get_nblocksy(res->format, box_y); tx->rect[0].z = z; tx->rect[0].width = util_format_get_nblocksx(res->format, w); tx->rect[0].height = util_format_get_nblocksy(res->format, h); @@ -291,7 +305,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, size = tx->base.layer_stride; ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, - size * tx->base.box.depth, &tx->rect[1].bo); + size * tx->nlayers, &tx->rect[1].bo); if (ret) { FREE(tx); return NULL; @@ -304,8 +318,9 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[1].domain = NOUVEAU_BO_GART; if (usage & PIPE_TRANSFER_READ) { + unsigned base = tx->rect[0].base; unsigned i; - for (i = 0; i < box->depth; ++i) { + for (i = 0; i < tx->nlayers; ++i) { nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], tx->nblocksx, tx->nblocksy); if (mt->layout_3d) @@ -314,9 +329,10 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[0].base += mt->layer_stride; tx->rect[1].base += size; } + tx->rect[0].z = z; + tx->rect[0].base = base; + tx->rect[1].base = 0; } - tx->rect[0].z = z; - tx->rect[1].base = 0; return &tx->base; } @@ -331,7 +347,7 @@ nvc0_miptree_transfer_del(struct pipe_context *pctx, unsigned i; if (tx->base.usage & PIPE_TRANSFER_WRITE) { - for (i = 0; i < tx->base.box.depth; ++i) { + for (i = 0; i < tx->nlayers; ++i) { nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1], tx->nblocksx, tx->nblocksy); if (mt->layout_3d) diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index a51a887ed89..2db43d8704b 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -54,12 +54,13 @@ nvc0_vertex_state_create(struct pipe_context *pipe, assert(num_elements); so = MALLOC(sizeof(*so) + - (num_elements - 1) * sizeof(struct nvc0_vertex_element)); + num_elements * sizeof(struct nvc0_vertex_element)); if (!so) return NULL; so->num_elements = num_elements; so->instance_elts = 0; so->instance_bufs = 0; + so->need_conversion = FALSE; transkey.nr_elements = 0; transkey.output_stride = 0; @@ -83,6 +84,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, return NULL; } so->element[i].state = nvc0_format_table[fmt].vtx; + so->need_conversion = TRUE; } so->element[i].state |= i; @@ -152,7 +154,7 @@ nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi, if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) { /* TODO: use min and max instance divisor to get a proper range */ *base = 0; - *size = (nvc0->vtxbuf[vbi].max_index + 1) * nvc0->vtxbuf[vbi].stride; + *size = nvc0->vtxbuf[vbi].buffer->width0; } else { assert(nvc0->vbo_max_index != ~0); *base = nvc0->vbo_min_index * nvc0->vtxbuf[vbi].stride; @@ -171,12 +173,15 @@ nvc0_prevalidate_vbufs(struct nvc0_context *nvc0) nvc0->vbo_fifo = nvc0->vbo_user = 0; + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX); + for (i = 0; i < nvc0->num_vtxbufs; ++i) { vb = &nvc0->vtxbuf[i]; if (!vb->stride) continue; buf = nvc0_resource(vb->buffer); + /* NOTE: user buffers with temporary storage count as mapped by GPU */ if (!nvc0_resource_mapped_by_gpu(vb->buffer)) { if (nvc0->vbo_push_hint) { nvc0->vbo_fifo = ~0; @@ -227,16 +232,30 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0) } offset = vb->buffer_offset + ve->src_offset; + MARK_RING (chan, 6, 4); BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); OUT_RING (chan, i); - OUT_RESRCh(chan, buf, size - 1, NOUVEAU_BO_RD); - OUT_RESRCl(chan, buf, size - 1, NOUVEAU_BO_RD); + OUT_RESRCh(chan, buf, base + size - 1, NOUVEAU_BO_RD); + OUT_RESRCl(chan, buf, base + size - 1, NOUVEAU_BO_RD); OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD); OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD); } nvc0->vbo_dirty = TRUE; } +static INLINE void +nvc0_release_user_vbufs(struct nvc0_context *nvc0) +{ + uint32_t vbo_user = nvc0->vbo_user; + + while (vbo_user) { + int i = ffs(vbo_user) - 1; + vbo_user &= ~(1 << i); + + nvc0_buffer_release_gpu_storage(nvc0_resource(nvc0->vtxbuf[i].buffer)); + } +} + void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) { @@ -246,7 +265,12 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) struct nvc0_vertex_element *ve; unsigned i; - nvc0_prevalidate_vbufs(nvc0); + if (unlikely(vertex->need_conversion)) { + nvc0->vbo_fifo = ~0; + nvc0->vbo_user = 0; + } else { + nvc0_prevalidate_vbufs(nvc0); + } BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements); for (i = 0; i < vertex->num_elements; ++i) { @@ -292,6 +316,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) size = vb->buffer->width0; offset = ve->pipe.src_offset + vb->buffer_offset; + MARK_RING (chan, 8, 4); BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); OUT_RING (chan, (1 << 12) | vb->stride); BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); @@ -346,57 +371,10 @@ nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) { struct nvc0_context *nvc0 = chan->user_private; - nvc0_bufctx_emit_relocs(nvc0); -} + nvc0_screen_fence_update(nvc0->screen, TRUE); -#if 0 -static struct nouveau_bo * -nvc0_tfb_setup(struct nvc0_context *nvc0) -{ - struct nouveau_channel *chan = nvc0->screen->base.channel; - struct nouveau_bo *tfb = NULL; - int ret, i; - - ret = nouveau_bo_new(nvc0->screen->base.device, - NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &tfb); - if (ret) - return NULL; - - ret = nouveau_bo_map(tfb, NOUVEAU_BO_WR); - if (ret) - return NULL; - memset(tfb->map, 0xee, 8 * 4 * 3); - nouveau_bo_unmap(tfb); - - BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(0)), 5); - OUT_RING (chan, 1); - OUT_RELOCh(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RELOCl(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RING (chan, tfb->size); - OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID(0) */ - BEGIN_RING(chan, RING_3D(TFB_UNK0700(0)), 3); - OUT_RING (chan, 0); - OUT_RING (chan, 8); /* TFB_VARYING_COUNT(0) */ - OUT_RING (chan, 32); /* TFB_BUFFER_STRIDE(0) */ - BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(0)), 2); - OUT_RING (chan, 0x1f1e1d1c); - OUT_RING (chan, 0xa3a2a1a0); - for (i = 1; i < 4; ++i) { - BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(i)), 1); - OUT_RING (chan, 0); - } - BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, RING_3D_(0x135c), 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, RING_3D_(0x135c), 1); - OUT_RING (chan, 0); - - return tfb; + nvc0_bufctx_emit_relocs(nvc0); } -#endif static void nvc0_draw_arrays(struct nvc0_context *nvc0, @@ -422,7 +400,7 @@ nvc0_draw_arrays(struct nvc0_context *nvc0, prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } - chan->flush_notify = NULL; + chan->flush_notify = nvc0_default_flush_notify; } static void @@ -592,7 +570,7 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, } } - chan->flush_notify = NULL; + chan->flush_notify = nvc0_default_flush_notify; } void @@ -611,6 +589,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvc0->vbo_min_index = info->min_index; nvc0->vbo_max_index = info->max_index; + if (nvc0->vbo_push_hint != !!nvc0->vbo_fifo) + nvc0->dirty |= NVC0_NEW_ARRAYS; + if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS))) nvc0_update_user_vbufs(nvc0); @@ -668,4 +649,6 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start, info->count, info->instance_count, info->index_bias); } + + nvc0_release_user_vbufs(nvc0); } diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h index 1544fb7a1de..45f71967eff 100644 --- a/src/gallium/drivers/nvc0/nvc0_winsys.h +++ b/src/gallium/drivers/nvc0/nvc0_winsys.h @@ -95,7 +95,7 @@ OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res, unsigned delta, unsigned flags) { if (flags & NOUVEAU_BO_WR) - res->status |= NVC0_BUFFER_STATUS_DIRTY; + res->status |= NVC0_BUFFER_STATUS_GPU_WRITING; return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); } |