diff options
Diffstat (limited to 'src/gallium')
21 files changed, 1365 insertions, 921 deletions
diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index 685fa00b455..14c11b278ad 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -6149,6 +6149,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_FP_REG_CONTROL_UNK1_MASK 0xffff0000 #define NV34TCL_FP_REG_CONTROL_UNK0_SHIFT 0 #define NV34TCL_FP_REG_CONTROL_UNK0_MASK 0x0000ffff +#define NV34TCL_EDGEFLAG_ENABLE 0x0000145c #define NV34TCL_VP_CLIP_PLANES_ENABLE 0x00001478 #define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 (1 << 1) #define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 (1 << 5) @@ -6182,10 +6183,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_VTXFMT__SIZE 0x00000010 #define NV34TCL_VTXFMT_TYPE_SHIFT 0 #define NV34TCL_VTXFMT_TYPE_MASK 0x0000000f -#define NV34TCL_VTXFMT_TYPE_FLOAT 0x00000002 -#define NV34TCL_VTXFMT_TYPE_HALF 0x00000003 -#define NV34TCL_VTXFMT_TYPE_UBYTE 0x00000004 -#define NV34TCL_VTXFMT_TYPE_USHORT 0x00000005 +#define NV34TCL_VTXFMT_TYPE_16_SNORM 0x00000001 +#define NV34TCL_VTXFMT_TYPE_32_FLOAT 0x00000002 +#define NV34TCL_VTXFMT_TYPE_16_FLOAT 0x00000003 +#define NV34TCL_VTXFMT_TYPE_8_UNORM 0x00000004 +#define NV34TCL_VTXFMT_TYPE_16_SSCALED 0x00000005 +#define NV34TCL_VTXFMT_TYPE_11_11_10_SNORM 0x00000006 +#define NV34TCL_VTXFMT_TYPE_8_USCALED 0x00000007 #define NV34TCL_VTXFMT_SIZE_SHIFT 4 #define NV34TCL_VTXFMT_SIZE_MASK 0x000000f0 #define NV34TCL_VTXFMT_STRIDE_SHIFT 8 diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h deleted file mode 100644 index b165f7a611a..00000000000 --- a/src/gallium/drivers/nouveau/nouveau_util.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef __NOUVEAU_UTIL_H__ -#define __NOUVEAU_UTIL_H__ - -/* Determine how many vertices can be pushed into the command stream. - * Where the remaining space isn't large enough to represent all verices, - * split the buffer at primitive boundaries. - * - * Returns a count of vertices that can be rendered, and an index to - * restart drawing at after a flush. - */ -static INLINE unsigned -nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp, - unsigned mode, unsigned start, unsigned count, - unsigned *restart) -{ - int max, adj = 0; - - max = remaining - overhead; - if (max < 0) - return 0; - - max *= vpp; - if (max >= count) - return count; - - switch (mode) { - case PIPE_PRIM_POINTS: - break; - case PIPE_PRIM_LINES: - max = max & 1; - break; - case PIPE_PRIM_TRIANGLES: - max = max - (max % 3); - break; - case PIPE_PRIM_QUADS: - max = max & ~3; - break; - case PIPE_PRIM_LINE_LOOP: - case PIPE_PRIM_LINE_STRIP: - if (max < 2) - max = 0; - adj = 1; - break; - case PIPE_PRIM_POLYGON: - case PIPE_PRIM_TRIANGLE_STRIP: - case PIPE_PRIM_TRIANGLE_FAN: - if (max < 3) - max = 0; - adj = 2; - break; - case PIPE_PRIM_QUAD_STRIP: - if (max < 4) - max = 0; - adj = 3; - break; - default: - assert(0); - } - - *restart = start + max - adj; - return max; -} - -/* Integer base-2 logarithm, rounded towards zero. */ -static INLINE unsigned log2i(unsigned i) -{ - unsigned r = 0; - - if (i & 0xffff0000) { - i >>= 16; - r += 16; - } - if (i & 0x0000ff00) { - i >>= 8; - r += 8; - } - if (i & 0x000000f0) { - i >>= 4; - r += 4; - } - if (i & 0x0000000c) { - i >>= 2; - r += 2; - } - if (i & 0x00000002) { - r += 1; - } - return r; -} - -#endif diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index 2834f8984c7..6cbbad699eb 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -14,6 +14,7 @@ C_SOURCES = \ nv30_fragtex.c \ nv40_fragtex.c \ nvfx_miptree.c \ + nvfx_push.c \ nvfx_query.c \ nvfx_resource.c \ nvfx_screen.c \ diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c index 63c578a0ce1..db8a8fc4b08 100644 --- a/src/gallium/drivers/nvfx/nv30_fragtex.c +++ b/src/gallium/drivers/nvfx/nv30_fragtex.c @@ -1,7 +1,6 @@ #include "util/u_format.h" #include "nvfx_context.h" -#include "nouveau/nouveau_util.h" #include "nvfx_tex.h" #include "nvfx_resource.h" @@ -44,9 +43,9 @@ nv30_sampler_view_init(struct pipe_context *pipe, txf = sv->u.init_fmt; txf |= (level != sv->base.last_level ? NV34TCL_TX_FORMAT_MIPMAP : 0); - txf |= log2i(u_minify(pt->width0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT; - txf |= log2i(u_minify(pt->height0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT; - txf |= log2i(u_minify(pt->depth0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT; + txf |= util_logbase2(u_minify(pt->width0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT; + txf |= util_logbase2(u_minify(pt->height0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT; + txf |= util_logbase2(u_minify(pt->depth0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT; txf |= 0x10000; sv->u.nv30.fmt[0] = tf->fmt[0] | txf; diff --git a/src/gallium/drivers/nvfx/nvfx_buffer.c b/src/gallium/drivers/nvfx/nvfx_buffer.c index 44680e51959..89bb8570efd 100644 --- a/src/gallium/drivers/nvfx/nvfx_buffer.c +++ b/src/gallium/drivers/nvfx/nvfx_buffer.c @@ -6,13 +6,16 @@ #include "nouveau/nouveau_screen.h" #include "nouveau/nouveau_winsys.h" #include "nvfx_resource.h" +#include "nvfx_screen.h" void nvfx_buffer_destroy(struct pipe_screen *pscreen, struct pipe_resource *presource) { - struct nvfx_resource *buffer = nvfx_resource(presource); + struct nvfx_buffer *buffer = nvfx_buffer(presource); - nouveau_screen_bo_release(pscreen, buffer->bo); + if(!(buffer->base.base.flags & NVFX_RESOURCE_FLAG_USER)) + align_free(buffer->data); + nouveau_screen_bo_release(pscreen, buffer->base.bo); FREE(buffer); } @@ -20,31 +23,22 @@ struct pipe_resource * nvfx_buffer_create(struct pipe_screen *pscreen, const struct pipe_resource *template) { - struct nvfx_resource *buffer; + struct nvfx_screen* screen = nvfx_screen(pscreen); + struct nvfx_buffer* buffer; - buffer = CALLOC_STRUCT(nvfx_resource); + buffer = CALLOC_STRUCT(nvfx_buffer); if (!buffer) return NULL; - buffer->base = *template; - buffer->base.flags |= NVFX_RESOURCE_FLAG_LINEAR; - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.screen = pscreen; + buffer->base.base = *template; + buffer->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; + pipe_reference_init(&buffer->base.base.reference, 1); + buffer->base.base.screen = pscreen; + buffer->size = util_format_get_stride(template->format, template->width0); + buffer->bytes_to_draw_until_static = buffer->size * screen->static_reuse_threshold; + buffer->data = align_malloc(buffer->size, 16); - buffer->bo = nouveau_screen_bo_new(pscreen, - 16, - buffer->base.usage, - buffer->base.bind, - buffer->base.width0); - - if (buffer->bo == NULL) - goto fail; - - return &buffer->base; - -fail: - FREE(buffer); - return NULL; + return &buffer->base.base; } @@ -54,29 +48,49 @@ nvfx_user_buffer_create(struct pipe_screen *pscreen, unsigned bytes, unsigned usage) { - struct nvfx_resource *buffer; + struct nvfx_screen* screen = nvfx_screen(pscreen); + struct nvfx_buffer* buffer; - buffer = CALLOC_STRUCT(nvfx_resource); + buffer = CALLOC_STRUCT(nvfx_buffer); if (!buffer) return NULL; - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.flags = NVFX_RESOURCE_FLAG_LINEAR; - buffer->base.screen = pscreen; - buffer->base.format = PIPE_FORMAT_R8_UNORM; - buffer->base.usage = PIPE_USAGE_IMMUTABLE; - buffer->base.bind = usage; - buffer->base.width0 = bytes; - buffer->base.height0 = 1; - buffer->base.depth0 = 1; - - buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes); - if (!buffer->bo) - goto fail; - - return &buffer->base; + pipe_reference_init(&buffer->base.base.reference, 1); + buffer->base.base.flags = NVFX_RESOURCE_FLAG_LINEAR | NVFX_RESOURCE_FLAG_USER; + buffer->base.base.screen = pscreen; + buffer->base.base.format = PIPE_FORMAT_R8_UNORM; + buffer->base.base.usage = PIPE_USAGE_IMMUTABLE; + buffer->base.base.bind = usage; + buffer->base.base.width0 = bytes; + buffer->base.base.height0 = 1; + buffer->base.base.depth0 = 1; + buffer->data = ptr; + buffer->size = bytes; + buffer->bytes_to_draw_until_static = bytes * screen->static_reuse_threshold; + buffer->dirty_end = bytes; + + return &buffer->base.base; +} -fail: - FREE(buffer); - return NULL; +void nvfx_buffer_upload(struct nvfx_buffer* buffer) +{ + unsigned dirty = buffer->dirty_end - buffer->dirty_begin; + if(!buffer->base.bo) + { + buffer->base.bo = nouveau_screen_bo_new(buffer->base.base.screen, + 16, + buffer->base.base.usage, + buffer->base.base.bind, + buffer->base.base.width0); + } + + if(dirty) + { + // TODO: may want to use a temporary in some cases + nouveau_bo_map(buffer->base.bo, NOUVEAU_BO_WR + | (buffer->dirty_unsynchronized ? NOUVEAU_BO_NOSYNC : 0)); + memcpy(buffer->base.bo->map + buffer->dirty_begin, buffer->data + buffer->dirty_begin, dirty); + nouveau_bo_unmap(buffer->base.bo); + buffer->dirty_begin = buffer->dirty_end = 0; + } } diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 1980176b23e..94c854b22b8 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -76,7 +76,9 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) nvfx_init_surface_functions(nvfx); nvfx_init_state_functions(nvfx); nvfx_init_sampling_functions(nvfx); + nvfx_init_vbo_functions(nvfx); nvfx_init_resource_functions(&nvfx->pipe); + nvfx_init_transfer_functions(&nvfx->pipe); /* Create, configure, and install fallback swtnl path */ nvfx->draw = draw_create(&nvfx->pipe); @@ -89,6 +91,7 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) /* set these to that we init them on first validation */ nvfx->state.scissor_enabled = ~0; nvfx->state.stipple_enabled = ~0; + nvfx->use_vertex_buffers = -1; LIST_INITHEAD(&nvfx->render_cache); diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index bce19df044d..8899bf991e1 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -44,6 +44,7 @@ #define NVFX_NEW_SR (1 << 13) #define NVFX_NEW_VERTCONST (1 << 14) #define NVFX_NEW_FRAGCONST (1 << 15) +#define NVFX_NEW_INDEX (1 << 16) struct nvfx_rasterizer_state { struct pipe_rasterizer_state pipe; @@ -71,9 +72,53 @@ struct nvfx_state { unsigned render_temps; }; +struct nvfx_per_vertex_element { + unsigned idx; + unsigned vertex_buffer_index; + unsigned src_offset; +}; + +struct nvfx_low_frequency_element { + unsigned idx; + unsigned vertex_buffer_index; + unsigned src_offset; + void (*fetch_rgba_float)(float *dst, const uint8_t *src, unsigned i, unsigned j); + unsigned ncomp; +}; + +struct nvfx_per_instance_element { + struct nvfx_low_frequency_element base; + unsigned instance_divisor; +}; + +struct nvfx_per_vertex_buffer_info +{ + unsigned vertex_buffer_index; + unsigned per_vertex_size; +}; + struct nvfx_vtxelt_state { struct pipe_vertex_element pipe[16]; unsigned num_elements; + unsigned vtxfmt[16]; + + unsigned num_per_vertex_buffer_infos; + struct nvfx_per_vertex_buffer_info per_vertex_buffer_info[16]; + + unsigned num_per_vertex; + struct nvfx_per_vertex_element per_vertex[16]; + + unsigned num_per_instance; + struct nvfx_per_instance_element per_instance[16]; + + unsigned num_constant; + struct nvfx_low_frequency_element constant[16]; + + boolean needs_translate; + struct translate* translate; + + unsigned vertex_length; + unsigned max_vertices_per_packet; }; struct nvfx_render_target { @@ -127,8 +172,6 @@ struct nvfx_context { struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; struct pipe_index_buffer idxbuf; - struct pipe_resource *idxbuf_buffer; - unsigned idxbuf_format; struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; @@ -137,8 +180,14 @@ struct nvfx_context { struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned vtxbuf_nr; struct nvfx_vtxelt_state *vtxelt; + int base_vertex; + boolean use_index_buffer; + /* -1 = hardware input setup is outdated + * 0 = hardware input setup is for inline vertices + * 1 = hardware input setup is for hardware vertices + */ + int use_vertex_buffers; - unsigned vbo_bo; unsigned hw_vtxelt_nr; uint8_t hw_samplers; uint32_t hw_txf[8]; @@ -180,11 +229,7 @@ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, /* nvfx_draw.c */ extern struct draw_stage *nvfx_draw_render_stage(struct nvfx_context *nvfx); -extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe, - struct pipe_resource *idxbuf, - unsigned ib_size, int ib_bias, - unsigned mode, - unsigned start, unsigned count); +extern void nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info); extern void nvfx_vtxfmt_validate(struct nvfx_context *nvfx); /* nvfx_fb.c */ @@ -245,17 +290,53 @@ extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx); extern void nvfx_state_emit(struct nvfx_context *nvfx); /* nvfx_transfer.c */ -extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx); +extern void nvfx_init_transfer_functions(struct pipe_context *pipe); /* nvfx_vbo.c */ extern boolean nvfx_vbo_validate(struct nvfx_context *nvfx); extern void nvfx_vbo_relocate(struct nvfx_context *nvfx); +extern void nvfx_idxbuf_validate(struct nvfx_context* nvfx); +extern void nvfx_idxbuf_relocate(struct nvfx_context* nvfx); extern void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); +extern void nvfx_init_vbo_functions(struct nvfx_context *nvfx); +extern unsigned nvfx_vertex_formats[]; /* nvfx_vertprog.c */ extern boolean nvfx_vertprog_validate(struct nvfx_context *nvfx); extern void nvfx_vertprog_destroy(struct nvfx_context *, struct nvfx_vertex_program *); +/* nvfx_push.c */ +extern void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info); + +/* must WAIT_RING(chan, ncomp + 1) or equivalent beforehand! */ +static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, unsigned attrib, float* v, unsigned ncomp) +{ + switch (ncomp) { + case 4: + OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4)); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + OUT_RING(chan, fui(v[2])); + OUT_RING(chan, fui(v[3])); + break; + case 3: + OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3)); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + OUT_RING(chan, fui(v[2])); + break; + case 2: + OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2)); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + break; + case 1: + OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1)); + OUT_RING(chan, fui(v[0])); + break; + } +} + #endif diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c index 22cff370b77..331e28418ad 100644 --- a/src/gallium/drivers/nvfx/nvfx_draw.c +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -9,6 +9,7 @@ #include "draw/draw_pipe.h" #include "nvfx_context.h" +#include "nvfx_resource.h" /* Simple, but crappy, swtnl path, hopefully we wont need to hit this very * often at all. Uses "quadro style" vertex submission + a fixed vertex @@ -39,30 +40,21 @@ nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v) unsigned idx = nvfx->swtnl.draw[i]; unsigned hw = nvfx->swtnl.hw[i]; + WAIT_RING(chan, 5); switch (nvfx->swtnl.emit[i]) { case EMIT_OMIT: break; case EMIT_1F: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_1F(hw), 1); - OUT_RING (chan, fui(v->data[idx][0])); + nvfx_emit_vtx_attr(chan, hw, v->data[idx], 1); break; case EMIT_2F: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_2F_X(hw), 2); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); + nvfx_emit_vtx_attr(chan, hw, v->data[idx], 2); break; case EMIT_3F: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_3F_X(hw), 3); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - OUT_RING (chan, fui(v->data[idx][2])); + nvfx_emit_vtx_attr(chan, hw, v->data[idx], 3); break; case EMIT_4F: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - OUT_RING (chan, fui(v->data[idx][2])); - OUT_RING (chan, fui(v->data[idx][3])); + nvfx_emit_vtx_attr(chan, hw, v->data[idx], 4); break; case 0xff: BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); @@ -231,15 +223,9 @@ nvfx_draw_render_stage(struct nvfx_context *nvfx) } void -nvfx_draw_elements_swtnl(struct pipe_context *pipe, - struct pipe_resource *idxbuf, - unsigned idxbuf_size, int idxbuf_bias, - unsigned mode, unsigned start, unsigned count) +nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info) { struct nvfx_context *nvfx = nvfx_context(pipe); - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; - struct pipe_transfer *ib_transfer = NULL; - struct pipe_transfer *cb_transfer = NULL; unsigned i; void *map; @@ -247,18 +233,15 @@ nvfx_draw_elements_swtnl(struct pipe_context *pipe, return; nvfx_state_emit(nvfx); + /* these must be passed without adding the offsets */ for (i = 0; i < nvfx->vtxbuf_nr; i++) { - map = pipe_buffer_map(pipe, nvfx->vtxbuf[i].buffer, - PIPE_TRANSFER_READ, - &vb_transfer[i]); + map = nvfx_buffer(nvfx->vtxbuf[i].buffer)->data; draw_set_mapped_vertex_buffer(nvfx->draw, i, map); } - if (idxbuf) { - map = pipe_buffer_map(pipe, idxbuf, - PIPE_TRANSFER_READ, - &ib_transfer); - draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, idxbuf_bias, map); + if (info->indexed) { + map = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset; + draw_set_mapped_element_buffer_range(nvfx->draw, nvfx->idxbuf.index_size, info->index_bias, info->min_index, info->max_index, map); } else { draw_set_mapped_element_buffer(nvfx->draw, 0, 0, NULL); } @@ -266,28 +249,14 @@ nvfx_draw_elements_swtnl(struct pipe_context *pipe, if (nvfx->constbuf[PIPE_SHADER_VERTEX]) { const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX]; - map = pipe_buffer_map(pipe, - nvfx->constbuf[PIPE_SHADER_VERTEX], - PIPE_TRANSFER_READ, - &cb_transfer); + map = nvfx_buffer(nvfx->constbuf[PIPE_SHADER_VERTEX])->data; draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0, map, nr); } - draw_arrays(nvfx->draw, mode, start, count); - - for (i = 0; i < nvfx->vtxbuf_nr; i++) - pipe_buffer_unmap(pipe, nvfx->vtxbuf[i].buffer, vb_transfer[i]); - - if (idxbuf) - pipe_buffer_unmap(pipe, idxbuf, ib_transfer); - - if (nvfx->constbuf[PIPE_SHADER_VERTEX]) - pipe_buffer_unmap(pipe, nvfx->constbuf[PIPE_SHADER_VERTEX], - cb_transfer); + draw_arrays_instanced(nvfx->draw, info->mode, info->start, info->count, info->start_instance, info->instance_count); draw_flush(nvfx->draw); - pipe->flush(pipe, 0, NULL); } static INLINE void diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index ee41f03b9b8..ae4fe3aa262 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -9,6 +9,7 @@ #include "nvfx_context.h" #include "nvfx_shader.h" +#include "nvfx_resource.h" #define MAX_CONSTS 128 #define MAX_IMM 32 @@ -925,10 +926,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) { struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT]; - // TODO: avoid using transfers, just directly the buffer - struct pipe_transfer* transfer; - // TODO: does this check make any sense, or should we do this unconditionally? - uint32_t* map = pipe_buffer_map(&nvfx->pipe, constbuf, PIPE_TRANSFER_READ, &transfer); + uint32_t* map = (uint32_t*)nvfx_buffer(constbuf)->data; uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset); uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset); int i; @@ -942,7 +940,6 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) nvfx_fp_memcpy(&fpmap[off], &map[idx], 4 * sizeof(uint32_t)); } } - pipe_buffer_unmap(&nvfx->pipe, constbuf, transfer); } } diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c new file mode 100644 index 00000000000..52e891c6678 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_push.c @@ -0,0 +1,402 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_split_prim.h" +#include "translate/translate.h" + +#include "nvfx_context.h" +#include "nvfx_resource.h" + +struct push_context { + struct nouveau_channel* chan; + + void *idxbuf; + int32_t idxbias; + + float edgeflag; + int edgeflag_attr; + + unsigned vertex_length; + unsigned max_vertices_per_packet; + + struct translate* translate; +}; + +static void +emit_edgeflag(void *priv, boolean enabled) +{ + struct push_context* ctx = priv; + struct nouveau_channel *chan = ctx->chan; + + OUT_RING(chan, RING_3D(NV34TCL_EDGEFLAG_ENABLE, 1)); + OUT_RING(chan, enabled ? 1 : 0); +} + +static void +emit_vertices_lookup8(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + uint8_t* elts = (uint8_t*)ctx->idxbuf + start; + + while(count) + { + unsigned push = MIN2(count, ctx->max_vertices_per_packet); + unsigned length = push * ctx->vertex_length; + + OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length)); + ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur); + ctx->chan->cur += length; + + count -= push; + elts += push; + } +} + +static void +emit_vertices_lookup16(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + uint16_t* elts = (uint16_t*)ctx->idxbuf + start; + + while(count) + { + unsigned push = MIN2(count, ctx->max_vertices_per_packet); + unsigned length = push * ctx->vertex_length; + + OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length)); + ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur); + ctx->chan->cur += length; + + count -= push; + elts += push; + } +} + +static void +emit_vertices_lookup32(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + uint32_t* elts = (uint32_t*)ctx->idxbuf + start; + + while(count) + { + unsigned push = MIN2(count, ctx->max_vertices_per_packet); + unsigned length = push * ctx->vertex_length; + + OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length)); + ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur); + ctx->chan->cur += length; + + count -= push; + elts += push; + } +} + +static void +emit_vertices(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + + while(count) + { + unsigned push = MIN2(count, ctx->max_vertices_per_packet); + unsigned length = push * ctx->vertex_length; + + OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length)); + ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur); + ctx->chan->cur += length; + + count -= push; + start += push; + } +} + +static void +emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg) +{ + struct push_context* ctx = priv; + struct nouveau_channel *chan = ctx->chan; + unsigned nr = (vc & 0xff); + if (nr) { + OUT_RING(chan, RING_3D(reg, 1)); + OUT_RING (chan, ((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + OUT_RING(chan, RING_3D_NI(reg, push)); + while (push--) { + OUT_RING(chan, ((0x100 - 1) << 24) | start); + start += 0x100; + } + } +} + +static void +emit_ib_ranges(void* priv, unsigned start, unsigned vc) +{ + emit_ranges(priv, start, vc, NV34TCL_VB_INDEX_BATCH); +} + +static void +emit_vb_ranges(void* priv, unsigned start, unsigned vc) +{ + emit_ranges(priv, start, vc, NV34TCL_VB_VERTEX_BATCH); +} + +static INLINE void +emit_elt8(void* priv, unsigned start, unsigned vc) +{ + struct push_context* ctx = priv; + struct nouveau_channel *chan = ctx->chan; + uint8_t *elts = (uint8_t *)ctx->idxbuf + start; + int idxbias = ctx->idxbias; + + if (vc & 1) { + OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1)); + OUT_RING (chan, elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + unsigned push = MIN2(vc, 2047 * 2); + + OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1)); + for (i = 0; i < push; i+=2) + OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias)); + + vc -= push; + elts += push; + } +} + +static INLINE void +emit_elt16(void* priv, unsigned start, unsigned vc) +{ + struct push_context* ctx = priv; + struct nouveau_channel *chan = ctx->chan; + uint16_t *elts = (uint16_t *)ctx->idxbuf + start; + int idxbias = ctx->idxbias; + + if (vc & 1) { + OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1)); + OUT_RING (chan, elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + unsigned push = MIN2(vc, 2047 * 2); + + OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1)); + for (i = 0; i < push; i+=2) + OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias)); + + vc -= push; + elts += push; + } +} + +static INLINE void +emit_elt32(void* priv, unsigned start, unsigned vc) +{ + struct push_context* ctx = priv; + struct nouveau_channel *chan = ctx->chan; + uint32_t *elts = (uint32_t *)ctx->idxbuf + start; + int idxbias = ctx->idxbias; + + while (vc) { + unsigned push = MIN2(vc, 2047); + + OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push)); + assert(AVAIL_RING(chan) >= push); + if(idxbias) + { + for(unsigned i = 0; i < push; ++i) + OUT_RING(chan, elts[i] + idxbias); + } + else + OUT_RINGp(chan, elts, push); + + vc -= push; + elts += push; + } +} + +void +nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nouveau_channel *chan = nvfx->screen->base.channel; + struct push_context ctx; + struct util_split_prim s; + unsigned instances_left = info->instance_count; + int vtx_value; + unsigned hw_mode = nvgl_primitive(info->mode); + int i; + struct + { + uint8_t* map; + unsigned step; + } per_instance[16]; + unsigned p_overhead = 0 + + 4 /* begin/end */ + + 4; /* potential edgeflag enable/disable */ + + ctx.chan = nvfx->screen->base.channel; + ctx.translate = nvfx->vtxelt->translate; + ctx.idxbuf = NULL; + ctx.vertex_length = nvfx->vtxelt->vertex_length; + ctx.max_vertices_per_packet = nvfx->vtxelt->max_vertices_per_packet; + ctx.edgeflag = 0.5f; + // TODO: figure out if we really want to handle this, and do so in that case + ctx.edgeflag_attr = 0xff; // nvfx->vertprog->cfg.edgeflag_in; + + if(!nvfx->use_vertex_buffers) + { + for(i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i) + { + struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; + uint8_t* data = nvfx_buffer(vb->buffer)->data + vb->buffer_offset; + if(info->indexed) + data += info->index_bias * vb->stride; + ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); + } + + if(ctx.edgeflag_attr < 16) + vtx_value = -(ctx.vertex_length + 3); /* vertex data and edgeflag header and value */ + else + { + p_overhead += 1; /* initial vertex_data header */ + vtx_value = -ctx.vertex_length; /* vertex data and edgeflag header and value */ + } + + if (info->indexed) { + // XXX: this case and is broken and probably need a new VTX_ATTR push path + if (nvfx->idxbuf.index_size == 1) + s.emit = emit_vertices_lookup8; + else if (nvfx->idxbuf.index_size == 2) + s.emit = emit_vertices_lookup16; + else + s.emit = emit_vertices_lookup32; + } else + s.emit = emit_vertices; + } + else + { + if(!info->indexed || nvfx->use_index_buffer) + { + s.emit = info->indexed ? emit_ib_ranges : emit_vb_ranges; + p_overhead += 3; + vtx_value = 0; + } + else if (nvfx->idxbuf.index_size == 4) + { + s.emit = emit_elt32; + p_overhead += 1; + vtx_value = 8; + } + else + { + s.emit = (nvfx->idxbuf.index_size == 2) ? emit_elt16 : emit_elt8; + p_overhead += 3; + vtx_value = 7; + } + } + + ctx.idxbias = info->index_bias; + if(nvfx->use_vertex_buffers) + ctx.idxbias -= nvfx->base_vertex; + + /* map index buffer, if present */ + if (info->indexed && !nvfx->use_index_buffer) + ctx.idxbuf = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset; + + s.priv = &ctx; + s.edge = emit_edgeflag; + + for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i) + { + struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index]; + float v[4]; + per_instance[i].step = info->start_instance % ve->instance_divisor; + per_instance[i].map = nvfx_buffer(vb->buffer)->data + vb->buffer_offset + ve->base.src_offset; + + nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0); + + WAIT_RING(chan, 5); + nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp); + } + + /* per-instance loop */ + while (instances_left--) { + int max_verts; + boolean done; + + util_split_prim_init(&s, info->mode, info->start, info->count); + nvfx_state_emit(nvfx); + for(;;) { + max_verts = AVAIL_RING(chan); + max_verts -= p_overhead; + + /* if vtx_value < 0, each vertex is -vtx_value words long + * otherwise, each vertex is 2^(vtx_value) / 255 words long (this is an approximation) + */ + if(vtx_value < 0) + { + max_verts /= -vtx_value; + max_verts -= (max_verts >> 10); /* vertex data headers */ + } + else + { + if(max_verts >= (1 << 23)) /* avoid overflow here */ + max_verts = (1 << 23); + max_verts = (max_verts * 255) >> vtx_value; + } + + //printf("avail %u max_verts %u\n", AVAIL_RING(chan), max_verts); + + if(max_verts >= 16) + { + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING(chan, hw_mode); + done = util_split_prim_next(&s, max_verts); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING(chan, 0); + + if(done) + break; + } + + FIRE_RING(chan); + nvfx_state_emit(nvfx); + } + + /* set data for the next instance, if any changed */ + for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i) + { + struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index]; + + if(++per_instance[i].step == ve->instance_divisor) + { + float v[4]; + per_instance[i].map += vb->stride; + per_instance[i].step = 0; + + nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0); + WAIT_RING(chan, 5); + nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp); + } + } + } +} diff --git a/src/gallium/drivers/nvfx/nvfx_resource.c b/src/gallium/drivers/nvfx/nvfx_resource.c index 1c921b47100..3a46e0a7a57 100644 --- a/src/gallium/drivers/nvfx/nvfx_resource.c +++ b/src/gallium/drivers/nvfx/nvfx_resource.c @@ -59,12 +59,6 @@ nvfx_resource_get_handle(struct pipe_screen *pscreen, void nvfx_init_resource_functions(struct pipe_context *pipe) { - pipe->get_transfer = nvfx_transfer_new; - pipe->transfer_map = nvfx_transfer_map; - pipe->transfer_flush_region = u_default_transfer_flush_region; - pipe->transfer_unmap = nvfx_transfer_unmap; - pipe->transfer_destroy = util_staging_transfer_destroy; - pipe->transfer_inline_write = u_default_transfer_inline_write; pipe->is_resource_referenced = nvfx_resource_is_referenced; } diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h index ff86f6d9cb6..583be4de2ae 100644 --- a/src/gallium/drivers/nvfx/nvfx_resource.h +++ b/src/gallium/drivers/nvfx/nvfx_resource.h @@ -17,8 +17,23 @@ struct nvfx_resource { struct nouveau_bo *bo; }; +static INLINE +struct nvfx_resource *nvfx_resource(struct pipe_resource *resource) +{ + return (struct nvfx_resource *)resource; +} + #define NVFX_RESOURCE_FLAG_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) +#define NVFX_RESOURCE_FLAG_USER (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) + +/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */ +static INLINE boolean +nvfx_resource_mapped_by_gpu(struct pipe_resource *resource) +{ + return nvfx_resource(resource)->bo->handle; +} +/* is resource in VRAM? */ static inline int nvfx_resource_on_gpu(struct pipe_resource* pr) { @@ -63,12 +78,6 @@ struct nvfx_surface { struct nvfx_miptree* temp; }; -static INLINE -struct nvfx_resource *nvfx_resource(struct pipe_resource *resource) -{ - return (struct nvfx_resource *)resource; -} - static INLINE struct nouveau_bo * nvfx_surface_buffer(struct pipe_surface *surf) { @@ -106,22 +115,6 @@ nvfx_miptree_from_handle(struct pipe_screen *pscreen, const struct pipe_resource *template, struct winsys_handle *whandle); -struct pipe_resource * -nvfx_buffer_create(struct pipe_screen *pscreen, - const struct pipe_resource *template); - -void -nvfx_buffer_destroy(struct pipe_screen *pscreen, - struct pipe_resource *presource); - -struct pipe_resource * -nvfx_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes, - unsigned usage); - - - void nvfx_miptree_surface_del(struct pipe_surface *ps); @@ -173,4 +166,58 @@ nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf); void nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf); +struct nvfx_buffer +{ + struct nvfx_resource base; + uint8_t* data; + unsigned size; + + /* the range of data not yet uploaded to the GPU bo */ + unsigned dirty_begin; + unsigned dirty_end; + + /* whether all transfers were unsynchronized */ + boolean dirty_unsynchronized; + + /* whether it would have been profitable to upload + * the latest updated data to the GPU immediately */ + boolean last_update_static; + + /* how many bytes we need to draw before we deem + * the buffer to be static + */ + long long bytes_to_draw_until_static; +}; + +static inline struct nvfx_buffer* nvfx_buffer(struct pipe_resource* pr) +{ + return (struct nvfx_buffer*)pr; +} + +/* this is an heuristic to determine whether we are better off uploading the + * buffer to the GPU, or just continuing pushing it on the FIFO + */ +static inline boolean nvfx_buffer_seems_static(struct nvfx_buffer* buffer) +{ + return buffer->last_update_static + || buffer->bytes_to_draw_until_static < 0; +} + +struct pipe_resource * +nvfx_buffer_create(struct pipe_screen *pscreen, + const struct pipe_resource *template); + +void +nvfx_buffer_destroy(struct pipe_screen *pscreen, + struct pipe_resource *presource); + +struct pipe_resource * +nvfx_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes, + unsigned usage); + +void +nvfx_buffer_upload(struct nvfx_buffer* buffer); + #endif diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index a1b8361a9a4..7e3caf8d2e3 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -163,11 +163,11 @@ nvfx_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param) } static boolean -nvfx_screen_surface_format_supported(struct pipe_screen *pscreen, +nvfx_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, unsigned geom_flags) + unsigned bind, unsigned geom_flags) { struct nvfx_screen *screen = nvfx_screen(pscreen); struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front; @@ -175,7 +175,7 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen, if (sample_count > 1) return FALSE; - if (tex_usage & PIPE_BIND_RENDER_TARGET) { + if (bind & PIPE_BIND_RENDER_TARGET) { switch (format) { case PIPE_FORMAT_B8G8R8A8_UNORM: case PIPE_FORMAT_B8G8R8X8_UNORM: @@ -186,7 +186,7 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen, } } - if (tex_usage & PIPE_BIND_DEPTH_STENCIL) { + if (bind & PIPE_BIND_DEPTH_STENCIL) { switch (format) { case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: @@ -201,7 +201,7 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen, } } - if (tex_usage & PIPE_BIND_SAMPLER_VIEW) { + if (bind & PIPE_BIND_SAMPLER_VIEW) { struct nvfx_texture_format* tf = &nvfx_texture_formats[format]; if(util_format_is_s3tc(format) && !util_format_s3tc_enabled) return FALSE; @@ -218,6 +218,22 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen, } } + // note that we do actually support everything through translate + if (bind & PIPE_BIND_VERTEX_BUFFER) { + unsigned type = nvfx_vertex_formats[format]; + if(!type) + return FALSE; + } + + if (bind & PIPE_BIND_INDEX_BUFFER) { + // 8-bit indices supported, but not in hardware index buffer + if(format != PIPE_FORMAT_R16_USCALED && format != PIPE_FORMAT_R32_USCALED) + return FALSE; + } + + if(bind & PIPE_BIND_STREAM_OUTPUT) + return FALSE; + return TRUE; } @@ -387,7 +403,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->destroy = nvfx_screen_destroy; pscreen->get_param = nvfx_screen_get_param; pscreen->get_paramf = nvfx_screen_get_paramf; - pscreen->is_format_supported = nvfx_screen_surface_format_supported; + pscreen->is_format_supported = nvfx_screen_is_format_supported; pscreen->context_create = nvfx_create; switch (dev->chipset & 0xf0) { @@ -419,6 +435,11 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE); + screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE); + + screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384); + screen->inline_cost_per_hardware_cost = atof(debug_get_option("NVFX_INLINE_COST_PER_HARDWARE_COST", "1.0")); + screen->static_reuse_threshold = atof(debug_get_option("NVFX_STATIC_REUSE_THRESHOLD", "2.0")); screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen); diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h index 4dedbe9cb40..473a1127752 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.h +++ b/src/gallium/drivers/nvfx/nvfx_screen.h @@ -16,6 +16,7 @@ struct nvfx_screen { unsigned is_nv4x; /* either 0 or ~0 */ boolean force_swtnl; + boolean trace_draw; unsigned vertex_buffer_reloc_flags; unsigned index_buffer_reloc_flags; @@ -33,6 +34,18 @@ struct nvfx_screen { struct nouveau_resource *vp_data_heap; struct nv04_2d_context* eng2d; + + /* Once the amount of bytes drawn from the buffer reaches the updated size times this value, + * we will assume that the buffer will be drawn an huge number of times before the + * next modification + */ + float static_reuse_threshold; + + /* Cost of allocating a buffer in terms of the cost of copying a byte to an hardware buffer */ + unsigned buffer_allocation_cost; + + /* inline_cost/hardware_cost conversion ration */ + float inline_cost_per_hardware_cost; }; static INLINE struct nvfx_screen * diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index d459f9a8801..25d29720a85 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -441,83 +441,6 @@ nvfx_set_viewport_state(struct pipe_context *pipe, nvfx->draw_dirty |= NVFX_NEW_VIEWPORT; } -static void -nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_buffer *vb) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - for(unsigned i = 0; i < count; ++i) - { - pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer); - nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset; - nvfx->vtxbuf[i].max_index = vb[i].max_index; - nvfx->vtxbuf[i].stride = vb[i].stride; - } - - for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i) - pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0); - - nvfx->vtxbuf_nr = count; - - nvfx->dirty |= NVFX_NEW_ARRAYS; - nvfx->draw_dirty |= NVFX_NEW_ARRAYS; -} - -static void -nvfx_set_index_buffer(struct pipe_context *pipe, - const struct pipe_index_buffer *ib) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - /* TODO make this more like a state */ - - if(ib) - { - pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer); - nvfx->idxbuf.index_size = ib->index_size; - nvfx->idxbuf.offset = ib->offset; - } - else - { - pipe_resource_reference(&nvfx->idxbuf.buffer, 0); - nvfx->idxbuf.index_size = 0; - nvfx->idxbuf.offset = 0; - } -} - -static void * -nvfx_vtxelts_state_create(struct pipe_context *pipe, - unsigned num_elements, - const struct pipe_vertex_element *elements) -{ - struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state); - - assert(num_elements < 16); /* not doing fallbacks yet */ - cso->num_elements = num_elements; - memcpy(cso->pipe, elements, num_elements * sizeof(*elements)); - -/* nvfx_vtxelt_construct(cso);*/ - - return (void *)cso; -} - -static void -nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); -} - -static void -nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->vtxelt = hwcso; - nvfx->dirty |= NVFX_NEW_ARRAYS; - /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/ -} - void nvfx_init_state_functions(struct nvfx_context *nvfx) { @@ -553,11 +476,4 @@ nvfx_init_state_functions(struct nvfx_context *nvfx) nvfx->pipe.set_polygon_stipple = nvfx_set_polygon_stipple; nvfx->pipe.set_scissor_state = nvfx_set_scissor_state; nvfx->pipe.set_viewport_state = nvfx_set_viewport_state; - - nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create; - nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete; - nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; - - nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers; - nvfx->pipe.set_index_buffer = nvfx_set_index_buffer; } diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index dc70f3de870..b9d18977919 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -8,6 +8,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; unsigned dirty; + unsigned still_dirty = 0; int all_swizzled = -1; boolean flush_tex_cache = FALSE; @@ -52,11 +53,19 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) return FALSE; } - if(dirty & (NVFX_NEW_ARRAYS)) + if(dirty & NVFX_NEW_ARRAYS) { if(!nvfx_vbo_validate(nvfx)) return FALSE; } + + if(dirty & NVFX_NEW_INDEX) + { + if(nvfx->use_index_buffer) + nvfx_idxbuf_validate(nvfx); + else + still_dirty = NVFX_NEW_INDEX; + } } else { @@ -64,7 +73,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP)) nvfx_vertprog_validate(nvfx); - if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG)) + if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_INDEX | NVFX_NEW_FRAGPROG)) nvfx_vtxfmt_validate(nvfx); } @@ -118,7 +127,24 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) OUT_RING(chan, 1); } } - nvfx->dirty = 0; + + nvfx->dirty = dirty & still_dirty; + + unsigned render_temps = nvfx->state.render_temps; + if(render_temps) + { + for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i) + { + if(render_temps & (1 << i)) + util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]), + (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]); + } + + if(render_temps & 0x80) + util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf), + (struct util_dirty_surface*)nvfx->framebuffer.zsbuf); + } + return TRUE; } @@ -137,21 +163,6 @@ nvfx_state_emit(struct nvfx_context *nvfx) ; MARK_RING(chan, max_relocs * 2, max_relocs * 2); nvfx_state_relocate(nvfx); - - unsigned render_temps = nvfx->state.render_temps; - if(render_temps) - { - for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i) - { - if(render_temps & (1 << i)) - util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]), - (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]); - } - - if(render_temps & 0x80) - util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf), - (struct util_dirty_surface*)nvfx->framebuffer.zsbuf); - } } void @@ -161,7 +172,11 @@ nvfx_state_relocate(struct nvfx_context *nvfx) nvfx_fragtex_relocate(nvfx); nvfx_fragprog_relocate(nvfx); if (nvfx->render_mode == HW) + { nvfx_vbo_relocate(nvfx); + if(nvfx->use_index_buffer) + nvfx_idxbuf_relocate(nvfx); + } } boolean diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c index 80b0f21575f..28bbd36c2e8 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_fb.c +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -1,6 +1,5 @@ #include "nvfx_context.h" #include "nvfx_resource.h" -#include "nouveau/nouveau_util.h" #include "util/u_format.h" static inline boolean @@ -125,8 +124,8 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + (util_logbase2(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (util_logbase2(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); } else rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index 7efdd954b4b..135978ad274 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -36,7 +36,6 @@ #include "util/u_blitter.h" #include "nouveau/nouveau_winsys.h" -#include "nouveau/nouveau_util.h" #include "nouveau/nouveau_screen.h" #include "nvfx_context.h" #include "nvfx_screen.h" @@ -62,7 +61,7 @@ nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format) break; default: assert(util_is_pot(bits)); - int shift = log2i(bits) - 3; + int shift = util_logbase2(bits) - 3; assert(shift >= 2); rgn->bpps = 2; shift -= 2; @@ -365,25 +364,29 @@ nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int { struct nvfx_surface* ns = (struct nvfx_surface*)surf; struct pipe_subresource tempsr, surfsr; - struct pipe_resource *idxbuf_buffer; - unsigned idxbuf_format; + struct nvfx_context* nvfx = nvfx_context(pipe); + + // TODO: we really should do this validation before setting these variable in draw calls + unsigned use_vertex_buffers = nvfx->use_vertex_buffers; + boolean use_index_buffer = nvfx->use_index_buffer; + unsigned base_vertex = nvfx->base_vertex; tempsr.face = 0; tempsr.level = 0; surfsr.face = surf->face; surfsr.level = surf->level; - // TODO: do this properly, in blitter save - idxbuf_buffer = ((struct nvfx_context*)pipe)->idxbuf_buffer; - idxbuf_format = ((struct nvfx_context*)pipe)->idxbuf_format; - if(to_temp) nvfx_resource_copy_region(pipe, &ns->temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height); else nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &ns->temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height); - ((struct nvfx_context*)pipe)->idxbuf_buffer = idxbuf_buffer; - ((struct nvfx_context*)pipe)->idxbuf_format = idxbuf_format; + nvfx->use_vertex_buffers = use_vertex_buffers; + nvfx->use_index_buffer = use_index_buffer; + nvfx->base_vertex = base_vertex; + + nvfx->dirty |= NVFX_NEW_ARRAYS; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; } void diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c index e9c3dd7e551..ca4462ef9dc 100644 --- a/src/gallium/drivers/nvfx/nvfx_transfer.c +++ b/src/gallium/drivers/nvfx/nvfx_transfer.c @@ -26,25 +26,44 @@ nvfx_transfer_new(struct pipe_context *pipe, unsigned usage, const struct pipe_box *box) { - struct nvfx_staging_transfer* tx; - bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR; - - tx = CALLOC_STRUCT(nvfx_staging_transfer); - if(!tx) - return NULL; - - util_staging_transfer_init(pipe, pt, sr, usage, box, direct, tx); + if((usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_DONTBLOCK)) == PIPE_TRANSFER_DONTBLOCK) + { + struct nouveau_bo* bo = ((struct nvfx_resource*)pt)->bo; + if(bo && nouveau_bo_busy(bo, NOUVEAU_BO_WR)) + return NULL; + } if(pt->target == PIPE_BUFFER) { - tx->base.base.slice_stride = tx->base.base.stride = ((struct nvfx_resource*)tx->base.staging_resource)->bo->size; - if(direct) - tx->offset = util_format_get_stride(pt->format, box->x); - else - tx->offset = 0; + // it would be nice if we could avoid all this ridiculous overhead... + struct pipe_transfer* tx; + struct nvfx_buffer* buffer = nvfx_buffer(pt); + + tx = CALLOC_STRUCT(pipe_transfer); + if (!tx) + return NULL; + + pipe_resource_reference(&tx->resource, pt); + tx->sr = sr; + tx->usage = usage; + tx->box = *box; + + tx->slice_stride = tx->stride = util_format_get_stride(pt->format, box->width); + tx->data = buffer->data + util_format_get_stride(pt->format, box->x); + + return tx; } else { + struct nvfx_staging_transfer* tx; + bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR; + + tx = CALLOC_STRUCT(nvfx_staging_transfer); + if(!tx) + return NULL; + + util_staging_transfer_init(pipe, pt, sr, usage, box, direct, &tx->base); + if(direct) { tx->base.base.stride = nvfx_subresource_pitch(pt, sr.level); @@ -66,26 +85,132 @@ nvfx_transfer_new(struct pipe_context *pipe, } } +static void nvfx_buffer_dirty_interval(struct nvfx_buffer* buffer, unsigned begin, unsigned size, boolean unsynchronized) +{ + struct nvfx_screen* screen = nvfx_screen(buffer->base.base.screen); + buffer->last_update_static = buffer->bytes_to_draw_until_static < 0; + if(buffer->dirty_begin == buffer->dirty_end) + { + buffer->dirty_begin = begin; + buffer->dirty_end = begin + size; + buffer->dirty_unsynchronized = unsynchronized; + } + else + { + buffer->dirty_begin = MIN2(buffer->dirty_begin, begin); + buffer->dirty_end = MAX2(buffer->dirty_end, begin + size); + buffer->dirty_unsynchronized &= unsynchronized; + } + + if(unsynchronized) + { + // TODO: revisit this, it doesn't seem quite right + //printf("UNSYNC UPDATE %p %u %u\n", buffer, begin, size); + buffer->bytes_to_draw_until_static += size * screen->static_reuse_threshold; + } + else + buffer->bytes_to_draw_until_static = buffer->size * screen->static_reuse_threshold; +} + +static void nvfx_transfer_flush_region( struct pipe_context *pipe, + struct pipe_transfer *ptx, + const struct pipe_box *box) +{ + if(ptx->resource->target == PIPE_BUFFER && (ptx->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) + { + struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource); + nvfx_buffer_dirty_interval(buffer, + (uint8_t*)ptx->data - buffer->data + util_format_get_stride(buffer->base.base.format, box->x), + util_format_get_stride(buffer->base.base.format, box->width), + !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED)); + } +} + +static void +nvfx_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx) +{ + if(ptx->resource->target == PIPE_BUFFER) + { + struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource); + if((ptx->usage & (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT)) == PIPE_TRANSFER_WRITE) + nvfx_buffer_dirty_interval(buffer, + (uint8_t*)ptx->data - buffer->data, + ptx->stride, + !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED)); + pipe_resource_reference(&ptx->resource, 0); + FREE(ptx); + } + else + util_staging_transfer_destroy(pipe, ptx); +} + void * nvfx_transfer_map(struct pipe_context *pipe, struct pipe_transfer *ptx) { - struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx; - if(!ptx->data) + if(ptx->resource->target == PIPE_BUFFER) + return ptx->data; + else { - struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource; - uint8_t *map = nouveau_screen_bo_map(pipe->screen, mt->base.bo, nouveau_screen_transfer_flags(ptx->usage)); - ptx->data = map + tx->offset; + struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx; + if(!ptx->data) + { + struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource; + uint8_t *map = nouveau_screen_bo_map(pipe->screen, mt->base.bo, nouveau_screen_transfer_flags(ptx->usage)); + ptx->data = map + tx->offset; + } + + ++tx->map_count; + return ptx->data; } - ++tx->map_count; - return ptx->data; } void nvfx_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx) { - struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx; - struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource; + if(ptx->resource->target != PIPE_BUFFER) + { + struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx; + struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource; + + if(!--tx->map_count) + { + nouveau_screen_bo_unmap(pipe->screen, mt->base.bo); + ptx->data = 0; + } + } +} + +static void nvfx_transfer_inline_write( struct pipe_context *pipe, + struct pipe_resource *pr, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned slice_stride) +{ + if(pr->target != PIPE_BUFFER) + { + u_default_transfer_inline_write(pipe, pr, sr, usage, box, data, stride, slice_stride); + } + else + { + struct nvfx_buffer* buffer = nvfx_buffer(pr); + unsigned begin = util_format_get_stride(pr->format, box->x); + unsigned size = util_format_get_stride(pr->format, box->width); + memcpy(buffer->data + begin, data, size); + nvfx_buffer_dirty_interval(buffer, begin, size, + !!(pr->flags & PIPE_TRANSFER_UNSYNCHRONIZED)); + } +} - if(!--tx->map_count) - nouveau_screen_bo_unmap(pipe->screen, mt->base.bo); +void +nvfx_init_transfer_functions(struct pipe_context *pipe) +{ + pipe->get_transfer = nvfx_transfer_new; + pipe->transfer_map = nvfx_transfer_map; + pipe->transfer_flush_region = nvfx_transfer_flush_region; + pipe->transfer_unmap = nvfx_transfer_unmap; + pipe->transfer_destroy = nvfx_transfer_destroy; + pipe->transfer_inline_write = nvfx_transfer_inline_write; } diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 4aa37938425..a6cd1256350 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -2,6 +2,7 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "translate/translate.h" #include "nvfx_context.h" #include "nvfx_state.h" @@ -10,646 +11,583 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_class.h" #include "nouveau/nouveau_pushbuf.h" -#include "nouveau/nouveau_util.h" -static INLINE int -nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +static inline unsigned +util_guess_unique_indices_count(unsigned mode, unsigned indices) { - switch (pipe) { - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - *fmt = NV34TCL_VTXFMT_TYPE_FLOAT; - break; - case PIPE_FORMAT_R16_FLOAT: - case PIPE_FORMAT_R16G16_FLOAT: - case PIPE_FORMAT_R16G16B16_FLOAT: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - *fmt = NV34TCL_VTXFMT_TYPE_HALF; - break; - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - *fmt = NV34TCL_VTXFMT_TYPE_UBYTE; - break; - case PIPE_FORMAT_R16_SSCALED: - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *fmt = NV34TCL_VTXFMT_TYPE_USHORT; - break; - default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; + /* Euler's formula gives V = + * = E - F + 2 = + * = F * (polygon_edges / 2 - 1) + 2 = + * = F * (polygon_edges - 2) / 2 + 2 = + * = indices * (polygon_edges - 2) / (2 * indices_per_face) + 2 + * = indices * (1 / 2 - 1 / polygon_edges) + 2 + */ + switch(mode) + { + case PIPE_PRIM_LINES: + return indices >> 1; + case PIPE_PRIM_TRIANGLES: + { + // avoid an expensive division by 3 using the multiplicative inverse mod 2^32 + unsigned q; + unsigned inv3 = 2863311531; + indices >>= 1; + q = indices * inv3; + if(unlikely(q >= indices)) + { + q += inv3; + if(q >= indices) + q += inv3; + } + return indices + 2; + //return indices / 6 + 2; } - - switch (pipe) { - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R16_FLOAT: - case PIPE_FORMAT_R16_SSCALED: - *ncomp = 1; - break; - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R16G16_FLOAT: - case PIPE_FORMAT_R16G16_SSCALED: - *ncomp = 2; - break; - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R16G16B16_FLOAT: - case PIPE_FORMAT_R16G16B16_SSCALED: - *ncomp = 3; - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *ncomp = 4; - break; + // guess that indexed quads are created by successive connections, since a closed mesh seems unlikely + case PIPE_PRIM_QUADS: + return (indices >> 1) + 2; + // return (indices >> 2) + 2; // if it is a closed mesh default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; + return indices; } - - return 0; } -static boolean -nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib, - unsigned ib_size) +static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info) { - unsigned type; - - if (!ib) { - nvfx->idxbuf_buffer = NULL; - nvfx->idxbuf_format = 0xdeadbeef; - return FALSE; + struct nvfx_context* nvfx = nvfx_context(pipe); + unsigned hardware_cost = 0; + unsigned inline_cost = 0; + unsigned unique_vertices; + unsigned upload_mode; + if (info->indexed) + unique_vertices = util_guess_unique_indices_count(info->mode, info->count); + else + unique_vertices = info->count; + + /* Here we try to figure out if we are better off writing vertex data directly on the FIFO, + * or create hardware buffer objects and pointing the hardware to them. + * + * This is done by computing the total memcpy cost of each option, ignoring uploads + * if we think that the buffer is static and thus the upload cost will be amortized over + * future draw calls. + * + * For instance, if everything looks static, we will always create buffer objects, while if + * everything is a user buffer and we are not doing indexed drawing, we never do. + * + * Other interesting cases are where a small user vertex buffer, but a huge user index buffer, + * where we will upload the vertex buffer, so that we can use hardware index lookup, and + * the opposite case, where we instead do index lookup in software to avoid uploading + * a huge amount of vertex data that is not going to be used. + * + * Otherwise, we generally move to the GPU the after it has been pushed + * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having + * been updated with a transfer (or just the buffer having been destroyed). + * + * There is no special handling for user buffers, since applications can use + * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this + * by the way. + * + * Note that currently we don't support only putting some data on the FIFO, and + * some on vertex buffers (constant and instanced data is independent from this). + * + * nVidia doesn't seem to do this either, even though it should be at least + * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed. + */ + + for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++) + { + struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; + struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); + buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices; + if (!nvfx_buffer_seems_static(buffer)) + { + hardware_cost += buffer->dirty_end - buffer->dirty_begin; + if (!buffer->base.bo) + hardware_cost += nvfx->screen->buffer_allocation_cost; + } + inline_cost += vbi->per_vertex_size * info->count; } - if (!nvfx->screen->index_buffer_reloc_flags || ib_size == 1) - return FALSE; + float best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f; + boolean prefer_hardware_indices = FALSE; + unsigned index_inline_cost = 0; + unsigned index_hardware_cost = 0; - switch (ib_size) { - case 2: - type = NV34TCL_IDXBUF_FORMAT_TYPE_U16; - break; - case 4: - type = NV34TCL_IDXBUF_FORMAT_TYPE_U32; - break; - default: - return FALSE; - } + if (info->indexed) + { + index_inline_cost = nvfx->idxbuf.index_size * info->count; + if (nvfx->screen->index_buffer_reloc_flags + && (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4) + && !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1))) + { + struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer); + buffer->bytes_to_draw_until_static -= index_inline_cost; - if (ib != nvfx->idxbuf_buffer || - type != nvfx->idxbuf_format) { - nvfx->dirty |= NVFX_NEW_ARRAYS; - nvfx->idxbuf_buffer = ib; - nvfx->idxbuf_format = type; - } + prefer_hardware_indices = TRUE; - return TRUE; -} + if (!nvfx_buffer_seems_static(buffer)) + { + index_hardware_cost = buffer->dirty_end - buffer->dirty_begin; + if (!buffer->base.bo) + index_hardware_cost += nvfx->screen->buffer_allocation_cost; + } -// type must be floating point -static inline void -nvfx_vbo_static_attrib(struct nvfx_context *nvfx, - int attrib, struct pipe_vertex_element *ve, - struct pipe_vertex_buffer *vb, unsigned ncomp) -{ - struct pipe_transfer *transfer; - struct nouveau_channel* chan = nvfx->screen->base.channel; - void *map; - float *v; - - map = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer); - map = (uint8_t *) map + vb->buffer_offset + ve->src_offset; - - v = map; - - switch (ncomp) { - case 4: - OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4)); - OUT_RING(chan, fui(v[0])); - OUT_RING(chan, fui(v[1])); - OUT_RING(chan, fui(v[2])); - OUT_RING(chan, fui(v[3])); - break; - case 3: - OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3)); - OUT_RING(chan, fui(v[0])); - OUT_RING(chan, fui(v[1])); - OUT_RING(chan, fui(v[2])); - break; - case 2: - OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2)); - OUT_RING(chan, fui(v[0])); - OUT_RING(chan, fui(v[1])); - break; - case 1: - OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1)); - OUT_RING(chan, fui(v[0])); - break; + if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost) + { + best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost; + } + else + { + best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost; + prefer_hardware_indices = TRUE; + } + } } - pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer); + /* let's finally figure out which of the 3 paths we want to take */ + if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost)) + upload_mode = 1 + prefer_hardware_indices; + else + upload_mode = 0; + +#ifdef DEBUG + if (unlikely(nvfx->screen->trace_draw)) + { + fprintf(stderr, "DRAW"); + if (info->indexed) + { + fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size); + if (info->index_bias) + fprintf(stderr, " biased %u", info->index_bias); + fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index); + } + if (info->instance_count > 1) + fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed); + fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode); + if (!upload_mode) + fprintf(stderr, " -> inline vertex data"); + else if (upload_mode == 2 || !info->indexed) + fprintf(stderr, " -> buffer range"); + else + fprintf(stderr, " -> inline indices"); + fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost); + for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i) + { + struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; + struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); + if (i) + fprintf(stderr, ", "); + fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static); + } + fprintf(stderr, ">\n"); + } +#endif + + return upload_mode; } -static void -nvfx_draw_arrays(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) +void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - unsigned restart = 0; - - nvfx_vbo_set_idxbuf(nvfx, NULL, 0); - if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) { - nvfx_draw_elements_swtnl(pipe, NULL, 0, 0, - mode, start, count); - return; - } + unsigned upload_mode = 0; - while (count) { - unsigned vc, nr, avail; + if (!nvfx->vtxelt->needs_translate) + upload_mode = nvfx_decide_upload_mode(pipe, info); - nvfx_state_emit(nvfx); + nvfx->use_index_buffer = upload_mode > 1; - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + if ((upload_mode > 0) != nvfx->use_vertex_buffers) + { + nvfx->use_vertex_buffers = (upload_mode > 0); + nvfx->dirty |= NVFX_NEW_ARRAYS; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; + } - vc = nouveau_vbuf_split(avail, 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; + if (upload_mode > 0) + { + for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++) + { + struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; + nvfx_buffer_upload(nvfx_buffer(vb->buffer)); } - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); + if (upload_mode > 1) + { + nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer)); - nr = (vc & 0xff); - if (nr) { - OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1)); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; + if (unlikely(info->index_bias != nvfx->base_vertex)) + { + nvfx->base_vertex = info->index_bias; + nvfx->dirty |= NVFX_NEW_ARRAYS; + } } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push)); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; + else + { + if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex)) + { + nvfx->base_vertex = 0; + nvfx->dirty |= NVFX_NEW_ARRAYS; } } - - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); - - count -= vc; - start = restart; } - pipe->flush(pipe, 0, NULL); + if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) + nvfx_draw_vbo_swtnl(pipe, info); + else + nvfx_push_vbo(pipe, info); } -static INLINE void -nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) +boolean +nvfx_vbo_validate(struct nvfx_context *nvfx) { - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; + struct nouveau_channel* chan = nvfx->screen->base.channel; + int i; + int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); + unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD; - while (count) { - uint8_t *elts = (uint8_t *)ib + start; - unsigned vc, push, restart = 0, avail; + if (!elements) + return TRUE; - nvfx_state_emit(nvfx); + MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2); + for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i) + { + struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); + float v[4]; + ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0); + nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp); + } - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ - vc = nouveau_vbuf_split(avail, 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; + OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements)); + if(nvfx->use_vertex_buffers) + { + unsigned idx = 0; + for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) { + struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); + if(idx != ve->idx) + { + assert(idx < ve->idx); + OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], ve->idx - idx); + idx = ve->idx; + } - if (vc & 1) { - OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1)); - OUT_RING (chan, elts[0]); - elts++; vc--; + OUT_RING(chan, nvfx->vtxelt->vtxfmt[idx] | (vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT)); + ++idx; } + if(idx != nvfx->vtxelt->num_elements) + OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], nvfx->vtxelt->num_elements - idx); + } + else + OUT_RINGp(chan, nvfx->vtxelt->vtxfmt, nvfx->vtxelt->num_elements); - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1)); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); + for(i = nvfx->vtxelt->num_elements; i < elements; ++i) + OUT_RING(chan, NV34TCL_VTXFMT_TYPE_32_FLOAT); - vc -= push; - elts += push; + if(nvfx->is_nv4x) { + unsigned i; + /* seems to be some kind of cache flushing */ + for(i = 0; i < 3; ++i) { + OUT_RING(chan, RING_3D(0x1718, 1)); + OUT_RING(chan, 0); } - - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); - - start = restart; } -} - -static INLINE void -nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - - while (count) { - uint16_t *elts = (uint16_t *)ib + start; - unsigned vc, push, restart = 0, avail; - nvfx_state_emit(nvfx); - - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ - - vc = nouveau_vbuf_split(avail, 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; + OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements)); + if(nvfx->use_vertex_buffers) + { + unsigned idx = 0; + for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) { + struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); + for(; idx < ve->idx; ++idx) + OUT_RING(chan, 0); - if (vc & 1) { - OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1)); - OUT_RING (chan, elts[0]); - elts++; vc--; + OUT_RELOC(chan, bo, + vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV34TCL_VTXBUF_ADDRESS_DMA1); + ++idx; } - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1)); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); - - vc -= push; - elts += push; - } + for(; idx < elements; ++idx) + OUT_RING(chan, 0); + } + else + { + for (i = 0; i < elements; i++) + OUT_RING(chan, 0); + } - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); + OUT_RING(chan, RING_3D(0x1710, 1)); + OUT_RING(chan, 0); - start = restart; - } + nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements; + return TRUE; } -static INLINE void -nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) +void +nvfx_vbo_relocate(struct nvfx_context *nvfx) { - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - - while (count) { - uint32_t *elts = (uint32_t *)ib + start; - unsigned vc, push, restart = 0, avail; - - nvfx_state_emit(nvfx); - - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ - - vc = nouveau_vbuf_split(avail, 5, 1, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); - - while (vc) { - push = MIN2(vc, 2047); - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push)); - OUT_RINGp (chan, elts, push); - - vc -= push; - elts += push; - } + if(!nvfx->use_vertex_buffers) + return; - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; + int i; - start = restart; + MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3); + for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) { + struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; + + OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(ve->idx), 1), + vb_flags, 0, 0); + OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV34TCL_VTXBUF_ADDRESS_DMA1); } } static void -nvfx_draw_elements_inline(struct pipe_context *pipe, - struct pipe_resource *ib, - unsigned ib_size, int ib_bias, - unsigned mode, unsigned start, unsigned count) +nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags) { - struct nvfx_context *nvfx = nvfx_context(pipe); - struct pipe_transfer *transfer; - void *map; - - map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer); - if (!ib) { - NOUVEAU_ERR("failed mapping ib\n"); - return; - } + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned ib_format = (nvfx->idxbuf.index_size == 2) ? NV34TCL_IDXBUF_FORMAT_TYPE_U16 : NV34TCL_IDXBUF_FORMAT_TYPE_U32; + struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf.buffer)->bo; + ib_flags |= nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD; - assert(ib_bias == 0); - - switch (ib_size) { - case 1: - nvfx_draw_elements_u08(nvfx, map, mode, start, count); - break; - case 2: - nvfx_draw_elements_u16(nvfx, map, mode, start, count); - break; - case 4: - nvfx_draw_elements_u32(nvfx, map, mode, start, count); - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - break; - } + assert(nvfx->screen->index_buffer_reloc_flags); - pipe_buffer_unmap(pipe, ib, transfer); + MARK_RING(chan, 3, 3); + if(ib_flags & NOUVEAU_BO_DUMMY) + OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2), ib_flags, 0, 0); + else + OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2)); + OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR, + 0, NV34TCL_IDXBUF_FORMAT_DMA1); } -static void -nvfx_draw_elements_vbo(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) +void +nvfx_idxbuf_validate(struct nvfx_context* nvfx) { - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - unsigned restart = 0; - - while (count) { - unsigned nr, vc, avail; - - nvfx_state_emit(nvfx); + nvfx_idxbuf_emit(nvfx, 0); +} - avail = AVAIL_RING(chan); - avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ +void +nvfx_idxbuf_relocate(struct nvfx_context* nvfx) +{ + nvfx_idxbuf_emit(nvfx, NOUVEAU_BO_DUMMY); +} - vc = nouveau_vbuf_split(avail, 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; - } +unsigned nvfx_vertex_formats[PIPE_FORMAT_COUNT] = +{ + [PIPE_FORMAT_R32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT, + [PIPE_FORMAT_R32G32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT, + [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT, + [PIPE_FORMAT_R32G32B32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT, + [PIPE_FORMAT_R16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT, + [PIPE_FORMAT_R16G16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT, + [PIPE_FORMAT_R16G16B16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT, + [PIPE_FORMAT_R8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM, + [PIPE_FORMAT_R8G8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM, + [PIPE_FORMAT_R8G8B8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM, + [PIPE_FORMAT_R8G8B8A8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM, + [PIPE_FORMAT_R8G8B8A8_USCALED] = NV34TCL_VTXFMT_TYPE_8_USCALED, + [PIPE_FORMAT_R16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM, + [PIPE_FORMAT_R16G16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM, + [PIPE_FORMAT_R16G16B16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM, + [PIPE_FORMAT_R16G16B16A16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM, + [PIPE_FORMAT_R16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED, + [PIPE_FORMAT_R16G16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED, + [PIPE_FORMAT_R16G16B16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED, + [PIPE_FORMAT_R16G16B16A16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED, +}; + +static void * +nvfx_vtxelts_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nvfx_context* nvfx = nvfx_context(pipe); + struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state); + struct translate_key transkey; + unsigned per_vertex_size[16]; + memset(per_vertex_size, 0, sizeof(per_vertex_size)); + + unsigned vb_compacted_index[16]; + + assert(num_elements < 16); /* not doing fallbacks yet */ + + memcpy(cso->pipe, elements, num_elements * sizeof(elements[0])); + cso->num_elements = num_elements; + cso->needs_translate = FALSE; + + transkey.nr_elements = 0; + transkey.output_stride = 0; + + for(unsigned i = 0; i < num_elements; ++i) + { + const struct pipe_vertex_element* ve = &elements[i]; + if(!ve->instance_divisor) + per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1); + } + + for(unsigned i = 0; i < 16; ++i) + { + if(per_vertex_size[i]) + { + unsigned idx = cso->num_per_vertex_buffer_infos++; + cso->per_vertex_buffer_info[idx].vertex_buffer_index = i; + cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i]; + vb_compacted_index[i] = idx; + } + } + + for(unsigned i = 0; i < num_elements; ++i) + { + const struct pipe_vertex_element* ve = &elements[i]; + unsigned type = nvfx_vertex_formats[ve->src_format]; + unsigned ncomp = util_format_get_nr_components(ve->src_format); - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, nvgl_primitive(mode)); + //if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX) + if(ve->instance_divisor) + { + struct nvfx_low_frequency_element* lfve; + cso->vtxfmt[i] = NV34TCL_VTXFMT_TYPE_32_FLOAT; + + //if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT) + if(0) + lfve = &cso->constant[cso->num_constant++]; + else + { + lfve = &cso->per_instance[cso->num_per_instance++].base; + ((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor; + } - nr = (vc & 0xff); - if (nr) { - OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1)); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; + lfve->idx = i; + lfve->vertex_buffer_index = ve->vertex_buffer_index; + lfve->src_offset = ve->src_offset; + lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float; + lfve->ncomp = ncomp; } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push)); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; + else + { + unsigned idx; + + idx = cso->num_per_vertex++; + cso->per_vertex[idx].idx = i; + cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index; + cso->per_vertex[idx].src_offset = ve->src_offset; + + idx = transkey.nr_elements++; + transkey.element[idx].input_format = ve->src_format; + transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index]; + transkey.element[idx].input_offset = ve->src_offset; + transkey.element[idx].instance_divisor = 0; + transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL; + if(type) + { + transkey.element[idx].output_format = ve->src_format; + cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type; + } + else + { + unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT}; + transkey.element[idx].output_format = float32[ncomp - 1]; + cso->needs_translate = TRUE; + cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | NV34TCL_VTXFMT_TYPE_32_FLOAT; } + transkey.element[idx].output_offset = transkey.output_stride; + transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3; } + } - OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); - OUT_RING (chan, 0); + cso->translate = translate_generic_create(&transkey); + cso->vertex_length = transkey.output_stride >> 2; + cso->max_vertices_per_packet = 2047 / cso->vertex_length; - count -= vc; - start = restart; - } + return (void *)cso; } static void -nvfx_draw_elements(struct pipe_context *pipe, - struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, - unsigned mode, unsigned start, unsigned count) +nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nvfx_context *nvfx = nvfx_context(pipe); - boolean idxbuf; - - idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); - if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) { - nvfx_draw_elements_swtnl(pipe, - indexBuffer, indexSize, indexBias, - mode, start, count); - return; - } - - if (idxbuf) { - nvfx_draw_elements_vbo(pipe, mode, start, count); - } else { - nvfx_draw_elements_inline(pipe, - indexBuffer, indexSize, indexBias, - mode, start, count); - } - - pipe->flush(pipe, 0, NULL); + FREE(hwcso); } -void -nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +static void +nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) { struct nvfx_context *nvfx = nvfx_context(pipe); - if (info->indexed && nvfx->idxbuf.buffer) { - unsigned offset; - - assert(nvfx->idxbuf.offset % nvfx->idxbuf.index_size == 0); - offset = nvfx->idxbuf.offset / nvfx->idxbuf.index_size; - - nvfx_draw_elements(pipe, - nvfx->idxbuf.buffer, - nvfx->idxbuf.index_size, - info->index_bias, - info->mode, - info->start + offset, - info->count); - } - else { - nvfx_draw_arrays(pipe, - info->mode, - info->start, - info->count); - } + nvfx->vtxelt = hwcso; + nvfx->use_vertex_buffers = -1; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; } -boolean -nvfx_vbo_validate(struct nvfx_context *nvfx) +static void +nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) { - struct nouveau_channel* chan = nvfx->screen->base.channel; - struct pipe_resource *ib = nvfx->idxbuf_buffer; - unsigned ib_format = nvfx->idxbuf_format; - int i; - int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); - uint32_t vtxfmt[16]; - unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD; - - if (!elements) - return TRUE; - - nvfx->vbo_bo = 0; - - MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2); - for (i = 0; i < nvfx->vtxelt->num_elements; i++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; - unsigned type, ncomp; - - ve = &nvfx->vtxelt->pipe[i]; - vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; - - if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { - MARK_UNDO(chan); - nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS; - return FALSE; - } + struct nvfx_context *nvfx = nvfx_context(pipe); - if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) { - nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp); - vtxfmt[i] = type; - } else { - vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | - (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type); - nvfx->vbo_bo |= (1 << i); - } + for(unsigned i = 0; i < count; ++i) + { + pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer); + nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset; + nvfx->vtxbuf[i].max_index = vb[i].max_index; + nvfx->vtxbuf[i].stride = vb[i].stride; } - for(; i < elements; ++i) - vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT; - - OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements)); - OUT_RINGp(chan, vtxfmt, elements); - - if(nvfx->is_nv4x) { - unsigned i; - /* seems to be some kind of cache flushing */ - for(i = 0; i < 3; ++i) { - OUT_RING(chan, RING_3D(0x1718, 1)); - OUT_RING(chan, 0); - } - } + for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i) + pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0); - OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements)); - for (i = 0; i < nvfx->vtxelt->num_elements; i++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; + nvfx->vtxbuf_nr = count; + nvfx->use_vertex_buffers = -1; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; +} - ve = &nvfx->vtxelt->pipe[i]; - vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; +static void +nvfx_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); - if (!(nvfx->vbo_bo & (1 << i))) - OUT_RING(chan, 0); - else - { - struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; - OUT_RELOC(chan, bo, - vb->buffer_offset + ve->src_offset, - vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - 0, NV34TCL_VTXBUF_ADDRESS_DMA1); - } + if(ib) + { + pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer); + nvfx->idxbuf.index_size = ib->index_size; + nvfx->idxbuf.offset = ib->offset; } - - for (; i < elements; i++) - OUT_RING(chan, 0); - - OUT_RING(chan, RING_3D(0x1710, 1)); - OUT_RING(chan, 0); - - if (ib) { - unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD; - struct nouveau_bo* bo = nvfx_resource(ib)->bo; - - assert(nvfx->screen->index_buffer_reloc_flags); - - OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2)); - OUT_RELOC(chan, bo, 0, ib_flags | NOUVEAU_BO_LOW, 0, 0); - OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR, - 0, NV34TCL_IDXBUF_FORMAT_DMA1); + else + { + pipe_resource_reference(&nvfx->idxbuf.buffer, 0); + nvfx->idxbuf.index_size = 0; + nvfx->idxbuf.offset = 0; } - nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements; - return TRUE; + nvfx->dirty |= NVFX_NEW_INDEX; + nvfx->draw_dirty |= NVFX_NEW_INDEX; } void -nvfx_vbo_relocate(struct nvfx_context *nvfx) +nvfx_init_vbo_functions(struct nvfx_context *nvfx) { - struct nouveau_channel* chan = nvfx->screen->base.channel; - unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; - int i; + nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers; + nvfx->pipe.set_index_buffer = nvfx_set_index_buffer; - MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3); - for(i = 0; i < nvfx->vtxelt->num_elements; ++i) { - if(nvfx->vbo_bo & (1 << i)) { - struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i]; - struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; - struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; - OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1), - vb_flags, 0, 0); - OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset, - vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - 0, NV34TCL_VTXBUF_ADDRESS_DMA1); - } - } - - if(nvfx->idxbuf_buffer) - { - unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; - struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf_buffer)->bo; - - assert(nvfx->screen->index_buffer_reloc_flags); - - OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2), - ib_flags, 0, 0); - OUT_RELOC(chan, bo, 0, - ib_flags | NOUVEAU_BO_LOW, 0, 0); - OUT_RELOC(chan, bo, nvfx->idxbuf_format, - ib_flags | NOUVEAU_BO_OR, - 0, NV34TCL_IDXBUF_FORMAT_DMA1); - } + nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create; + nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete; + nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; } diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index 24d9846310e..939d2b83aee 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -10,6 +10,7 @@ #include "nvfx_context.h" #include "nvfx_state.h" +#include "nvfx_resource.h" /* TODO (at least...): * 1. Indexed consts + ARL @@ -874,7 +875,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) struct nouveau_grobj *eng3d = screen->eng3d; struct nvfx_vertex_program *vp; struct pipe_resource *constbuf; - struct pipe_transfer *transfer = NULL; boolean upload_code = FALSE, upload_data = FALSE; int i; @@ -983,11 +983,8 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) if (vp->nr_consts) { float *map = NULL; - if (constbuf) { - map = pipe_buffer_map(pipe, constbuf, - PIPE_TRANSFER_READ, - &transfer); - } + if (constbuf) + map = nvfx_buffer(constbuf)->data; for (i = 0; i < vp->nr_consts; i++) { struct nvfx_vertex_program_data *vpd = &vp->consts[i]; @@ -1005,9 +1002,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) OUT_RING (chan, i + vp->data->start); OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } - - if (constbuf) - pipe_buffer_unmap(pipe, constbuf, transfer); } /* Upload vtxprog */ |