summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/nouveau/nouveau_class.h12
-rw-r--r--src/gallium/drivers/nouveau/nouveau_util.h91
-rw-r--r--src/gallium/drivers/nvfx/Makefile1
-rw-r--r--src/gallium/drivers/nvfx/nv30_fragtex.c7
-rw-r--r--src/gallium/drivers/nvfx/nvfx_buffer.c98
-rw-r--r--src/gallium/drivers/nvfx/nvfx_context.c3
-rw-r--r--src/gallium/drivers/nvfx/nvfx_context.h99
-rw-r--r--src/gallium/drivers/nvfx/nvfx_draw.c59
-rw-r--r--src/gallium/drivers/nvfx/nvfx_fragprog.c7
-rw-r--r--src/gallium/drivers/nvfx/nvfx_push.c402
-rw-r--r--src/gallium/drivers/nvfx/nvfx_resource.c6
-rw-r--r--src/gallium/drivers/nvfx/nvfx_resource.h91
-rw-r--r--src/gallium/drivers/nvfx/nvfx_screen.c33
-rw-r--r--src/gallium/drivers/nvfx/nvfx_screen.h13
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state.c84
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_emit.c51
-rw-r--r--src/gallium/drivers/nvfx/nvfx_state_fb.c5
-rw-r--r--src/gallium/drivers/nvfx/nvfx_surface.c23
-rw-r--r--src/gallium/drivers/nvfx/nvfx_transfer.c173
-rw-r--r--src/gallium/drivers/nvfx/nvfx_vbo.c1016
-rw-r--r--src/gallium/drivers/nvfx/nvfx_vertprog.c12
21 files changed, 1365 insertions, 921 deletions
diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h
index 685fa00b455..14c11b278ad 100644
--- a/src/gallium/drivers/nouveau/nouveau_class.h
+++ b/src/gallium/drivers/nouveau/nouveau_class.h
@@ -6149,6 +6149,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV34TCL_FP_REG_CONTROL_UNK1_MASK 0xffff0000
#define NV34TCL_FP_REG_CONTROL_UNK0_SHIFT 0
#define NV34TCL_FP_REG_CONTROL_UNK0_MASK 0x0000ffff
+#define NV34TCL_EDGEFLAG_ENABLE 0x0000145c
#define NV34TCL_VP_CLIP_PLANES_ENABLE 0x00001478
#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 (1 << 1)
#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 (1 << 5)
@@ -6182,10 +6183,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV34TCL_VTXFMT__SIZE 0x00000010
#define NV34TCL_VTXFMT_TYPE_SHIFT 0
#define NV34TCL_VTXFMT_TYPE_MASK 0x0000000f
-#define NV34TCL_VTXFMT_TYPE_FLOAT 0x00000002
-#define NV34TCL_VTXFMT_TYPE_HALF 0x00000003
-#define NV34TCL_VTXFMT_TYPE_UBYTE 0x00000004
-#define NV34TCL_VTXFMT_TYPE_USHORT 0x00000005
+#define NV34TCL_VTXFMT_TYPE_16_SNORM 0x00000001
+#define NV34TCL_VTXFMT_TYPE_32_FLOAT 0x00000002
+#define NV34TCL_VTXFMT_TYPE_16_FLOAT 0x00000003
+#define NV34TCL_VTXFMT_TYPE_8_UNORM 0x00000004
+#define NV34TCL_VTXFMT_TYPE_16_SSCALED 0x00000005
+#define NV34TCL_VTXFMT_TYPE_11_11_10_SNORM 0x00000006
+#define NV34TCL_VTXFMT_TYPE_8_USCALED 0x00000007
#define NV34TCL_VTXFMT_SIZE_SHIFT 4
#define NV34TCL_VTXFMT_SIZE_MASK 0x000000f0
#define NV34TCL_VTXFMT_STRIDE_SHIFT 8
diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h
deleted file mode 100644
index b165f7a611a..00000000000
--- a/src/gallium/drivers/nouveau/nouveau_util.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef __NOUVEAU_UTIL_H__
-#define __NOUVEAU_UTIL_H__
-
-/* Determine how many vertices can be pushed into the command stream.
- * Where the remaining space isn't large enough to represent all verices,
- * split the buffer at primitive boundaries.
- *
- * Returns a count of vertices that can be rendered, and an index to
- * restart drawing at after a flush.
- */
-static INLINE unsigned
-nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp,
- unsigned mode, unsigned start, unsigned count,
- unsigned *restart)
-{
- int max, adj = 0;
-
- max = remaining - overhead;
- if (max < 0)
- return 0;
-
- max *= vpp;
- if (max >= count)
- return count;
-
- switch (mode) {
- case PIPE_PRIM_POINTS:
- break;
- case PIPE_PRIM_LINES:
- max = max & 1;
- break;
- case PIPE_PRIM_TRIANGLES:
- max = max - (max % 3);
- break;
- case PIPE_PRIM_QUADS:
- max = max & ~3;
- break;
- case PIPE_PRIM_LINE_LOOP:
- case PIPE_PRIM_LINE_STRIP:
- if (max < 2)
- max = 0;
- adj = 1;
- break;
- case PIPE_PRIM_POLYGON:
- case PIPE_PRIM_TRIANGLE_STRIP:
- case PIPE_PRIM_TRIANGLE_FAN:
- if (max < 3)
- max = 0;
- adj = 2;
- break;
- case PIPE_PRIM_QUAD_STRIP:
- if (max < 4)
- max = 0;
- adj = 3;
- break;
- default:
- assert(0);
- }
-
- *restart = start + max - adj;
- return max;
-}
-
-/* Integer base-2 logarithm, rounded towards zero. */
-static INLINE unsigned log2i(unsigned i)
-{
- unsigned r = 0;
-
- if (i & 0xffff0000) {
- i >>= 16;
- r += 16;
- }
- if (i & 0x0000ff00) {
- i >>= 8;
- r += 8;
- }
- if (i & 0x000000f0) {
- i >>= 4;
- r += 4;
- }
- if (i & 0x0000000c) {
- i >>= 2;
- r += 2;
- }
- if (i & 0x00000002) {
- r += 1;
- }
- return r;
-}
-
-#endif
diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile
index 2834f8984c7..6cbbad699eb 100644
--- a/src/gallium/drivers/nvfx/Makefile
+++ b/src/gallium/drivers/nvfx/Makefile
@@ -14,6 +14,7 @@ C_SOURCES = \
nv30_fragtex.c \
nv40_fragtex.c \
nvfx_miptree.c \
+ nvfx_push.c \
nvfx_query.c \
nvfx_resource.c \
nvfx_screen.c \
diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c
index 63c578a0ce1..db8a8fc4b08 100644
--- a/src/gallium/drivers/nvfx/nv30_fragtex.c
+++ b/src/gallium/drivers/nvfx/nv30_fragtex.c
@@ -1,7 +1,6 @@
#include "util/u_format.h"
#include "nvfx_context.h"
-#include "nouveau/nouveau_util.h"
#include "nvfx_tex.h"
#include "nvfx_resource.h"
@@ -44,9 +43,9 @@ nv30_sampler_view_init(struct pipe_context *pipe,
txf = sv->u.init_fmt;
txf |= (level != sv->base.last_level ? NV34TCL_TX_FORMAT_MIPMAP : 0);
- txf |= log2i(u_minify(pt->width0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
- txf |= log2i(u_minify(pt->height0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
- txf |= log2i(u_minify(pt->depth0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
+ txf |= util_logbase2(u_minify(pt->width0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
+ txf |= util_logbase2(u_minify(pt->height0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
+ txf |= util_logbase2(u_minify(pt->depth0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
txf |= 0x10000;
sv->u.nv30.fmt[0] = tf->fmt[0] | txf;
diff --git a/src/gallium/drivers/nvfx/nvfx_buffer.c b/src/gallium/drivers/nvfx/nvfx_buffer.c
index 44680e51959..89bb8570efd 100644
--- a/src/gallium/drivers/nvfx/nvfx_buffer.c
+++ b/src/gallium/drivers/nvfx/nvfx_buffer.c
@@ -6,13 +6,16 @@
#include "nouveau/nouveau_screen.h"
#include "nouveau/nouveau_winsys.h"
#include "nvfx_resource.h"
+#include "nvfx_screen.h"
void nvfx_buffer_destroy(struct pipe_screen *pscreen,
struct pipe_resource *presource)
{
- struct nvfx_resource *buffer = nvfx_resource(presource);
+ struct nvfx_buffer *buffer = nvfx_buffer(presource);
- nouveau_screen_bo_release(pscreen, buffer->bo);
+ if(!(buffer->base.base.flags & NVFX_RESOURCE_FLAG_USER))
+ align_free(buffer->data);
+ nouveau_screen_bo_release(pscreen, buffer->base.bo);
FREE(buffer);
}
@@ -20,31 +23,22 @@ struct pipe_resource *
nvfx_buffer_create(struct pipe_screen *pscreen,
const struct pipe_resource *template)
{
- struct nvfx_resource *buffer;
+ struct nvfx_screen* screen = nvfx_screen(pscreen);
+ struct nvfx_buffer* buffer;
- buffer = CALLOC_STRUCT(nvfx_resource);
+ buffer = CALLOC_STRUCT(nvfx_buffer);
if (!buffer)
return NULL;
- buffer->base = *template;
- buffer->base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
- pipe_reference_init(&buffer->base.reference, 1);
- buffer->base.screen = pscreen;
+ buffer->base.base = *template;
+ buffer->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+ pipe_reference_init(&buffer->base.base.reference, 1);
+ buffer->base.base.screen = pscreen;
+ buffer->size = util_format_get_stride(template->format, template->width0);
+ buffer->bytes_to_draw_until_static = buffer->size * screen->static_reuse_threshold;
+ buffer->data = align_malloc(buffer->size, 16);
- buffer->bo = nouveau_screen_bo_new(pscreen,
- 16,
- buffer->base.usage,
- buffer->base.bind,
- buffer->base.width0);
-
- if (buffer->bo == NULL)
- goto fail;
-
- return &buffer->base;
-
-fail:
- FREE(buffer);
- return NULL;
+ return &buffer->base.base;
}
@@ -54,29 +48,49 @@ nvfx_user_buffer_create(struct pipe_screen *pscreen,
unsigned bytes,
unsigned usage)
{
- struct nvfx_resource *buffer;
+ struct nvfx_screen* screen = nvfx_screen(pscreen);
+ struct nvfx_buffer* buffer;
- buffer = CALLOC_STRUCT(nvfx_resource);
+ buffer = CALLOC_STRUCT(nvfx_buffer);
if (!buffer)
return NULL;
- pipe_reference_init(&buffer->base.reference, 1);
- buffer->base.flags = NVFX_RESOURCE_FLAG_LINEAR;
- buffer->base.screen = pscreen;
- buffer->base.format = PIPE_FORMAT_R8_UNORM;
- buffer->base.usage = PIPE_USAGE_IMMUTABLE;
- buffer->base.bind = usage;
- buffer->base.width0 = bytes;
- buffer->base.height0 = 1;
- buffer->base.depth0 = 1;
-
- buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes);
- if (!buffer->bo)
- goto fail;
-
- return &buffer->base;
+ pipe_reference_init(&buffer->base.base.reference, 1);
+ buffer->base.base.flags = NVFX_RESOURCE_FLAG_LINEAR | NVFX_RESOURCE_FLAG_USER;
+ buffer->base.base.screen = pscreen;
+ buffer->base.base.format = PIPE_FORMAT_R8_UNORM;
+ buffer->base.base.usage = PIPE_USAGE_IMMUTABLE;
+ buffer->base.base.bind = usage;
+ buffer->base.base.width0 = bytes;
+ buffer->base.base.height0 = 1;
+ buffer->base.base.depth0 = 1;
+ buffer->data = ptr;
+ buffer->size = bytes;
+ buffer->bytes_to_draw_until_static = bytes * screen->static_reuse_threshold;
+ buffer->dirty_end = bytes;
+
+ return &buffer->base.base;
+}
-fail:
- FREE(buffer);
- return NULL;
+void nvfx_buffer_upload(struct nvfx_buffer* buffer)
+{
+ unsigned dirty = buffer->dirty_end - buffer->dirty_begin;
+ if(!buffer->base.bo)
+ {
+ buffer->base.bo = nouveau_screen_bo_new(buffer->base.base.screen,
+ 16,
+ buffer->base.base.usage,
+ buffer->base.base.bind,
+ buffer->base.base.width0);
+ }
+
+ if(dirty)
+ {
+ // TODO: may want to use a temporary in some cases
+ nouveau_bo_map(buffer->base.bo, NOUVEAU_BO_WR
+ | (buffer->dirty_unsynchronized ? NOUVEAU_BO_NOSYNC : 0));
+ memcpy(buffer->base.bo->map + buffer->dirty_begin, buffer->data + buffer->dirty_begin, dirty);
+ nouveau_bo_unmap(buffer->base.bo);
+ buffer->dirty_begin = buffer->dirty_end = 0;
+ }
}
diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c
index 1980176b23e..94c854b22b8 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_context.c
@@ -76,7 +76,9 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
nvfx_init_surface_functions(nvfx);
nvfx_init_state_functions(nvfx);
nvfx_init_sampling_functions(nvfx);
+ nvfx_init_vbo_functions(nvfx);
nvfx_init_resource_functions(&nvfx->pipe);
+ nvfx_init_transfer_functions(&nvfx->pipe);
/* Create, configure, and install fallback swtnl path */
nvfx->draw = draw_create(&nvfx->pipe);
@@ -89,6 +91,7 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
/* set these to that we init them on first validation */
nvfx->state.scissor_enabled = ~0;
nvfx->state.stipple_enabled = ~0;
+ nvfx->use_vertex_buffers = -1;
LIST_INITHEAD(&nvfx->render_cache);
diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h
index bce19df044d..8899bf991e1 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.h
+++ b/src/gallium/drivers/nvfx/nvfx_context.h
@@ -44,6 +44,7 @@
#define NVFX_NEW_SR (1 << 13)
#define NVFX_NEW_VERTCONST (1 << 14)
#define NVFX_NEW_FRAGCONST (1 << 15)
+#define NVFX_NEW_INDEX (1 << 16)
struct nvfx_rasterizer_state {
struct pipe_rasterizer_state pipe;
@@ -71,9 +72,53 @@ struct nvfx_state {
unsigned render_temps;
};
+struct nvfx_per_vertex_element {
+ unsigned idx;
+ unsigned vertex_buffer_index;
+ unsigned src_offset;
+};
+
+struct nvfx_low_frequency_element {
+ unsigned idx;
+ unsigned vertex_buffer_index;
+ unsigned src_offset;
+ void (*fetch_rgba_float)(float *dst, const uint8_t *src, unsigned i, unsigned j);
+ unsigned ncomp;
+};
+
+struct nvfx_per_instance_element {
+ struct nvfx_low_frequency_element base;
+ unsigned instance_divisor;
+};
+
+struct nvfx_per_vertex_buffer_info
+{
+ unsigned vertex_buffer_index;
+ unsigned per_vertex_size;
+};
+
struct nvfx_vtxelt_state {
struct pipe_vertex_element pipe[16];
unsigned num_elements;
+ unsigned vtxfmt[16];
+
+ unsigned num_per_vertex_buffer_infos;
+ struct nvfx_per_vertex_buffer_info per_vertex_buffer_info[16];
+
+ unsigned num_per_vertex;
+ struct nvfx_per_vertex_element per_vertex[16];
+
+ unsigned num_per_instance;
+ struct nvfx_per_instance_element per_instance[16];
+
+ unsigned num_constant;
+ struct nvfx_low_frequency_element constant[16];
+
+ boolean needs_translate;
+ struct translate* translate;
+
+ unsigned vertex_length;
+ unsigned max_vertices_per_packet;
};
struct nvfx_render_target {
@@ -127,8 +172,6 @@ struct nvfx_context {
struct pipe_viewport_state viewport;
struct pipe_framebuffer_state framebuffer;
struct pipe_index_buffer idxbuf;
- struct pipe_resource *idxbuf_buffer;
- unsigned idxbuf_format;
struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
@@ -137,8 +180,14 @@ struct nvfx_context {
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned vtxbuf_nr;
struct nvfx_vtxelt_state *vtxelt;
+ int base_vertex;
+ boolean use_index_buffer;
+ /* -1 = hardware input setup is outdated
+ * 0 = hardware input setup is for inline vertices
+ * 1 = hardware input setup is for hardware vertices
+ */
+ int use_vertex_buffers;
- unsigned vbo_bo;
unsigned hw_vtxelt_nr;
uint8_t hw_samplers;
uint32_t hw_txf[8];
@@ -180,11 +229,7 @@ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers,
/* nvfx_draw.c */
extern struct draw_stage *nvfx_draw_render_stage(struct nvfx_context *nvfx);
-extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe,
- struct pipe_resource *idxbuf,
- unsigned ib_size, int ib_bias,
- unsigned mode,
- unsigned start, unsigned count);
+extern void nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info);
extern void nvfx_vtxfmt_validate(struct nvfx_context *nvfx);
/* nvfx_fb.c */
@@ -245,17 +290,53 @@ extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx);
extern void nvfx_state_emit(struct nvfx_context *nvfx);
/* nvfx_transfer.c */
-extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx);
+extern void nvfx_init_transfer_functions(struct pipe_context *pipe);
/* nvfx_vbo.c */
extern boolean nvfx_vbo_validate(struct nvfx_context *nvfx);
extern void nvfx_vbo_relocate(struct nvfx_context *nvfx);
+extern void nvfx_idxbuf_validate(struct nvfx_context* nvfx);
+extern void nvfx_idxbuf_relocate(struct nvfx_context* nvfx);
extern void nvfx_draw_vbo(struct pipe_context *pipe,
const struct pipe_draw_info *info);
+extern void nvfx_init_vbo_functions(struct nvfx_context *nvfx);
+extern unsigned nvfx_vertex_formats[];
/* nvfx_vertprog.c */
extern boolean nvfx_vertprog_validate(struct nvfx_context *nvfx);
extern void nvfx_vertprog_destroy(struct nvfx_context *,
struct nvfx_vertex_program *);
+/* nvfx_push.c */
+extern void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);
+
+/* must WAIT_RING(chan, ncomp + 1) or equivalent beforehand! */
+static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, unsigned attrib, float* v, unsigned ncomp)
+{
+ switch (ncomp) {
+ case 4:
+ OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4));
+ OUT_RING(chan, fui(v[0]));
+ OUT_RING(chan, fui(v[1]));
+ OUT_RING(chan, fui(v[2]));
+ OUT_RING(chan, fui(v[3]));
+ break;
+ case 3:
+ OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3));
+ OUT_RING(chan, fui(v[0]));
+ OUT_RING(chan, fui(v[1]));
+ OUT_RING(chan, fui(v[2]));
+ break;
+ case 2:
+ OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2));
+ OUT_RING(chan, fui(v[0]));
+ OUT_RING(chan, fui(v[1]));
+ break;
+ case 1:
+ OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1));
+ OUT_RING(chan, fui(v[0]));
+ break;
+ }
+}
+
#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c
index 22cff370b77..331e28418ad 100644
--- a/src/gallium/drivers/nvfx/nvfx_draw.c
+++ b/src/gallium/drivers/nvfx/nvfx_draw.c
@@ -9,6 +9,7 @@
#include "draw/draw_pipe.h"
#include "nvfx_context.h"
+#include "nvfx_resource.h"
/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
* often at all. Uses "quadro style" vertex submission + a fixed vertex
@@ -39,30 +40,21 @@ nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v)
unsigned idx = nvfx->swtnl.draw[i];
unsigned hw = nvfx->swtnl.hw[i];
+ WAIT_RING(chan, 5);
switch (nvfx->swtnl.emit[i]) {
case EMIT_OMIT:
break;
case EMIT_1F:
- BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_1F(hw), 1);
- OUT_RING (chan, fui(v->data[idx][0]));
+ nvfx_emit_vtx_attr(chan, hw, v->data[idx], 1);
break;
case EMIT_2F:
- BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_2F_X(hw), 2);
- OUT_RING (chan, fui(v->data[idx][0]));
- OUT_RING (chan, fui(v->data[idx][1]));
+ nvfx_emit_vtx_attr(chan, hw, v->data[idx], 2);
break;
case EMIT_3F:
- BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_3F_X(hw), 3);
- OUT_RING (chan, fui(v->data[idx][0]));
- OUT_RING (chan, fui(v->data[idx][1]));
- OUT_RING (chan, fui(v->data[idx][2]));
+ nvfx_emit_vtx_attr(chan, hw, v->data[idx], 3);
break;
case EMIT_4F:
- BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4);
- OUT_RING (chan, fui(v->data[idx][0]));
- OUT_RING (chan, fui(v->data[idx][1]));
- OUT_RING (chan, fui(v->data[idx][2]));
- OUT_RING (chan, fui(v->data[idx][3]));
+ nvfx_emit_vtx_attr(chan, hw, v->data[idx], 4);
break;
case 0xff:
BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4);
@@ -231,15 +223,9 @@ nvfx_draw_render_stage(struct nvfx_context *nvfx)
}
void
-nvfx_draw_elements_swtnl(struct pipe_context *pipe,
- struct pipe_resource *idxbuf,
- unsigned idxbuf_size, int idxbuf_bias,
- unsigned mode, unsigned start, unsigned count)
+nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
- struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
- struct pipe_transfer *ib_transfer = NULL;
- struct pipe_transfer *cb_transfer = NULL;
unsigned i;
void *map;
@@ -247,18 +233,15 @@ nvfx_draw_elements_swtnl(struct pipe_context *pipe,
return;
nvfx_state_emit(nvfx);
+ /* these must be passed without adding the offsets */
for (i = 0; i < nvfx->vtxbuf_nr; i++) {
- map = pipe_buffer_map(pipe, nvfx->vtxbuf[i].buffer,
- PIPE_TRANSFER_READ,
- &vb_transfer[i]);
+ map = nvfx_buffer(nvfx->vtxbuf[i].buffer)->data;
draw_set_mapped_vertex_buffer(nvfx->draw, i, map);
}
- if (idxbuf) {
- map = pipe_buffer_map(pipe, idxbuf,
- PIPE_TRANSFER_READ,
- &ib_transfer);
- draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, idxbuf_bias, map);
+ if (info->indexed) {
+ map = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset;
+ draw_set_mapped_element_buffer_range(nvfx->draw, nvfx->idxbuf.index_size, info->index_bias, info->min_index, info->max_index, map);
} else {
draw_set_mapped_element_buffer(nvfx->draw, 0, 0, NULL);
}
@@ -266,28 +249,14 @@ nvfx_draw_elements_swtnl(struct pipe_context *pipe,
if (nvfx->constbuf[PIPE_SHADER_VERTEX]) {
const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX];
- map = pipe_buffer_map(pipe,
- nvfx->constbuf[PIPE_SHADER_VERTEX],
- PIPE_TRANSFER_READ,
- &cb_transfer);
+ map = nvfx_buffer(nvfx->constbuf[PIPE_SHADER_VERTEX])->data;
draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0,
map, nr);
}
- draw_arrays(nvfx->draw, mode, start, count);
-
- for (i = 0; i < nvfx->vtxbuf_nr; i++)
- pipe_buffer_unmap(pipe, nvfx->vtxbuf[i].buffer, vb_transfer[i]);
-
- if (idxbuf)
- pipe_buffer_unmap(pipe, idxbuf, ib_transfer);
-
- if (nvfx->constbuf[PIPE_SHADER_VERTEX])
- pipe_buffer_unmap(pipe, nvfx->constbuf[PIPE_SHADER_VERTEX],
- cb_transfer);
+ draw_arrays_instanced(nvfx->draw, info->mode, info->start, info->count, info->start_instance, info->instance_count);
draw_flush(nvfx->draw);
- pipe->flush(pipe, 0, NULL);
}
static INLINE void
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c
index ee41f03b9b8..ae4fe3aa262 100644
--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
@@ -9,6 +9,7 @@
#include "nvfx_context.h"
#include "nvfx_shader.h"
+#include "nvfx_resource.h"
#define MAX_CONSTS 128
#define MAX_IMM 32
@@ -925,10 +926,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) {
struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT];
- // TODO: avoid using transfers, just directly the buffer
- struct pipe_transfer* transfer;
- // TODO: does this check make any sense, or should we do this unconditionally?
- uint32_t* map = pipe_buffer_map(&nvfx->pipe, constbuf, PIPE_TRANSFER_READ, &transfer);
+ uint32_t* map = (uint32_t*)nvfx_buffer(constbuf)->data;
uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset);
int i;
@@ -942,7 +940,6 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
nvfx_fp_memcpy(&fpmap[off], &map[idx], 4 * sizeof(uint32_t));
}
}
- pipe_buffer_unmap(&nvfx->pipe, constbuf, transfer);
}
}
diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c
new file mode 100644
index 00000000000..52e891c6678
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_push.c
@@ -0,0 +1,402 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_split_prim.h"
+#include "translate/translate.h"
+
+#include "nvfx_context.h"
+#include "nvfx_resource.h"
+
+struct push_context {
+ struct nouveau_channel* chan;
+
+ void *idxbuf;
+ int32_t idxbias;
+
+ float edgeflag;
+ int edgeflag_attr;
+
+ unsigned vertex_length;
+ unsigned max_vertices_per_packet;
+
+ struct translate* translate;
+};
+
+static void
+emit_edgeflag(void *priv, boolean enabled)
+{
+ struct push_context* ctx = priv;
+ struct nouveau_channel *chan = ctx->chan;
+
+ OUT_RING(chan, RING_3D(NV34TCL_EDGEFLAG_ENABLE, 1));
+ OUT_RING(chan, enabled ? 1 : 0);
+}
+
+static void
+emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+ uint8_t* elts = (uint8_t*)ctx->idxbuf + start;
+
+ while(count)
+ {
+ unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+ unsigned length = push * ctx->vertex_length;
+
+ OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+ ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur);
+ ctx->chan->cur += length;
+
+ count -= push;
+ elts += push;
+ }
+}
+
+static void
+emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+ uint16_t* elts = (uint16_t*)ctx->idxbuf + start;
+
+ while(count)
+ {
+ unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+ unsigned length = push * ctx->vertex_length;
+
+ OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+ ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur);
+ ctx->chan->cur += length;
+
+ count -= push;
+ elts += push;
+ }
+}
+
+static void
+emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+ uint32_t* elts = (uint32_t*)ctx->idxbuf + start;
+
+ while(count)
+ {
+ unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+ unsigned length = push * ctx->vertex_length;
+
+ OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+ ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur);
+ ctx->chan->cur += length;
+
+ count -= push;
+ elts += push;
+ }
+}
+
+static void
+emit_vertices(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+
+ while(count)
+ {
+ unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+ unsigned length = push * ctx->vertex_length;
+
+ OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+ ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur);
+ ctx->chan->cur += length;
+
+ count -= push;
+ start += push;
+ }
+}
+
+static void
+emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
+{
+ struct push_context* ctx = priv;
+ struct nouveau_channel *chan = ctx->chan;
+ unsigned nr = (vc & 0xff);
+ if (nr) {
+ OUT_RING(chan, RING_3D(reg, 1));
+ OUT_RING (chan, ((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ nr = vc >> 8;
+ while (nr) {
+ unsigned push = nr > 2047 ? 2047 : nr;
+
+ nr -= push;
+
+ OUT_RING(chan, RING_3D_NI(reg, push));
+ while (push--) {
+ OUT_RING(chan, ((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+}
+
+static void
+emit_ib_ranges(void* priv, unsigned start, unsigned vc)
+{
+ emit_ranges(priv, start, vc, NV34TCL_VB_INDEX_BATCH);
+}
+
+static void
+emit_vb_ranges(void* priv, unsigned start, unsigned vc)
+{
+ emit_ranges(priv, start, vc, NV34TCL_VB_VERTEX_BATCH);
+}
+
+static INLINE void
+emit_elt8(void* priv, unsigned start, unsigned vc)
+{
+ struct push_context* ctx = priv;
+ struct nouveau_channel *chan = ctx->chan;
+ uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
+ int idxbias = ctx->idxbias;
+
+ if (vc & 1) {
+ OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
+ OUT_RING (chan, elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+ unsigned push = MIN2(vc, 2047 * 2);
+
+ OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
+ for (i = 0; i < push; i+=2)
+ OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
+
+ vc -= push;
+ elts += push;
+ }
+}
+
+static INLINE void
+emit_elt16(void* priv, unsigned start, unsigned vc)
+{
+ struct push_context* ctx = priv;
+ struct nouveau_channel *chan = ctx->chan;
+ uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
+ int idxbias = ctx->idxbias;
+
+ if (vc & 1) {
+ OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
+ OUT_RING (chan, elts[0]);
+ elts++; vc--;
+ }
+
+ while (vc) {
+ unsigned i;
+ unsigned push = MIN2(vc, 2047 * 2);
+
+ OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
+ for (i = 0; i < push; i+=2)
+ OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
+
+ vc -= push;
+ elts += push;
+ }
+}
+
+static INLINE void
+emit_elt32(void* priv, unsigned start, unsigned vc)
+{
+ struct push_context* ctx = priv;
+ struct nouveau_channel *chan = ctx->chan;
+ uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
+ int idxbias = ctx->idxbias;
+
+ while (vc) {
+ unsigned push = MIN2(vc, 2047);
+
+ OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push));
+ assert(AVAIL_RING(chan) >= push);
+ if(idxbias)
+ {
+ for(unsigned i = 0; i < push; ++i)
+ OUT_RING(chan, elts[i] + idxbias);
+ }
+ else
+ OUT_RINGp(chan, elts, push);
+
+ vc -= push;
+ elts += push;
+ }
+}
+
+void
+nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
+ struct nouveau_channel *chan = nvfx->screen->base.channel;
+ struct push_context ctx;
+ struct util_split_prim s;
+ unsigned instances_left = info->instance_count;
+ int vtx_value;
+ unsigned hw_mode = nvgl_primitive(info->mode);
+ int i;
+ struct
+ {
+ uint8_t* map;
+ unsigned step;
+ } per_instance[16];
+ unsigned p_overhead = 0
+ + 4 /* begin/end */
+ + 4; /* potential edgeflag enable/disable */
+
+ ctx.chan = nvfx->screen->base.channel;
+ ctx.translate = nvfx->vtxelt->translate;
+ ctx.idxbuf = NULL;
+ ctx.vertex_length = nvfx->vtxelt->vertex_length;
+ ctx.max_vertices_per_packet = nvfx->vtxelt->max_vertices_per_packet;
+ ctx.edgeflag = 0.5f;
+ // TODO: figure out if we really want to handle this, and do so in that case
+ ctx.edgeflag_attr = 0xff; // nvfx->vertprog->cfg.edgeflag_in;
+
+ if(!nvfx->use_vertex_buffers)
+ {
+ for(i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
+ {
+ struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+ uint8_t* data = nvfx_buffer(vb->buffer)->data + vb->buffer_offset;
+ if(info->indexed)
+ data += info->index_bias * vb->stride;
+ ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
+ }
+
+ if(ctx.edgeflag_attr < 16)
+ vtx_value = -(ctx.vertex_length + 3); /* vertex data and edgeflag header and value */
+ else
+ {
+ p_overhead += 1; /* initial vertex_data header */
+ vtx_value = -ctx.vertex_length; /* vertex data and edgeflag header and value */
+ }
+
+ if (info->indexed) {
+ // XXX: this case and is broken and probably need a new VTX_ATTR push path
+ if (nvfx->idxbuf.index_size == 1)
+ s.emit = emit_vertices_lookup8;
+ else if (nvfx->idxbuf.index_size == 2)
+ s.emit = emit_vertices_lookup16;
+ else
+ s.emit = emit_vertices_lookup32;
+ } else
+ s.emit = emit_vertices;
+ }
+ else
+ {
+ if(!info->indexed || nvfx->use_index_buffer)
+ {
+ s.emit = info->indexed ? emit_ib_ranges : emit_vb_ranges;
+ p_overhead += 3;
+ vtx_value = 0;
+ }
+ else if (nvfx->idxbuf.index_size == 4)
+ {
+ s.emit = emit_elt32;
+ p_overhead += 1;
+ vtx_value = 8;
+ }
+ else
+ {
+ s.emit = (nvfx->idxbuf.index_size == 2) ? emit_elt16 : emit_elt8;
+ p_overhead += 3;
+ vtx_value = 7;
+ }
+ }
+
+ ctx.idxbias = info->index_bias;
+ if(nvfx->use_vertex_buffers)
+ ctx.idxbias -= nvfx->base_vertex;
+
+ /* map index buffer, if present */
+ if (info->indexed && !nvfx->use_index_buffer)
+ ctx.idxbuf = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset;
+
+ s.priv = &ctx;
+ s.edge = emit_edgeflag;
+
+ for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i)
+ {
+ struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index];
+ float v[4];
+ per_instance[i].step = info->start_instance % ve->instance_divisor;
+ per_instance[i].map = nvfx_buffer(vb->buffer)->data + vb->buffer_offset + ve->base.src_offset;
+
+ nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
+
+ WAIT_RING(chan, 5);
+ nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
+ }
+
+ /* per-instance loop */
+ while (instances_left--) {
+ int max_verts;
+ boolean done;
+
+ util_split_prim_init(&s, info->mode, info->start, info->count);
+ nvfx_state_emit(nvfx);
+ for(;;) {
+ max_verts = AVAIL_RING(chan);
+ max_verts -= p_overhead;
+
+ /* if vtx_value < 0, each vertex is -vtx_value words long
+ * otherwise, each vertex is 2^(vtx_value) / 255 words long (this is an approximation)
+ */
+ if(vtx_value < 0)
+ {
+ max_verts /= -vtx_value;
+ max_verts -= (max_verts >> 10); /* vertex data headers */
+ }
+ else
+ {
+ if(max_verts >= (1 << 23)) /* avoid overflow here */
+ max_verts = (1 << 23);
+ max_verts = (max_verts * 255) >> vtx_value;
+ }
+
+ //printf("avail %u max_verts %u\n", AVAIL_RING(chan), max_verts);
+
+ if(max_verts >= 16)
+ {
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING(chan, hw_mode);
+ done = util_split_prim_next(&s, max_verts);
+ OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+ OUT_RING(chan, 0);
+
+ if(done)
+ break;
+ }
+
+ FIRE_RING(chan);
+ nvfx_state_emit(nvfx);
+ }
+
+ /* set data for the next instance, if any changed */
+ for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i)
+ {
+ struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index];
+
+ if(++per_instance[i].step == ve->instance_divisor)
+ {
+ float v[4];
+ per_instance[i].map += vb->stride;
+ per_instance[i].step = 0;
+
+ nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
+ WAIT_RING(chan, 5);
+ nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_resource.c b/src/gallium/drivers/nvfx/nvfx_resource.c
index 1c921b47100..3a46e0a7a57 100644
--- a/src/gallium/drivers/nvfx/nvfx_resource.c
+++ b/src/gallium/drivers/nvfx/nvfx_resource.c
@@ -59,12 +59,6 @@ nvfx_resource_get_handle(struct pipe_screen *pscreen,
void
nvfx_init_resource_functions(struct pipe_context *pipe)
{
- pipe->get_transfer = nvfx_transfer_new;
- pipe->transfer_map = nvfx_transfer_map;
- pipe->transfer_flush_region = u_default_transfer_flush_region;
- pipe->transfer_unmap = nvfx_transfer_unmap;
- pipe->transfer_destroy = util_staging_transfer_destroy;
- pipe->transfer_inline_write = u_default_transfer_inline_write;
pipe->is_resource_referenced = nvfx_resource_is_referenced;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h
index ff86f6d9cb6..583be4de2ae 100644
--- a/src/gallium/drivers/nvfx/nvfx_resource.h
+++ b/src/gallium/drivers/nvfx/nvfx_resource.h
@@ -17,8 +17,23 @@ struct nvfx_resource {
struct nouveau_bo *bo;
};
+static INLINE
+struct nvfx_resource *nvfx_resource(struct pipe_resource *resource)
+{
+ return (struct nvfx_resource *)resource;
+}
+
#define NVFX_RESOURCE_FLAG_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
+#define NVFX_RESOURCE_FLAG_USER (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
+
+/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */
+static INLINE boolean
+nvfx_resource_mapped_by_gpu(struct pipe_resource *resource)
+{
+ return nvfx_resource(resource)->bo->handle;
+}
+/* is resource in VRAM? */
static inline int
nvfx_resource_on_gpu(struct pipe_resource* pr)
{
@@ -63,12 +78,6 @@ struct nvfx_surface {
struct nvfx_miptree* temp;
};
-static INLINE
-struct nvfx_resource *nvfx_resource(struct pipe_resource *resource)
-{
- return (struct nvfx_resource *)resource;
-}
-
static INLINE struct nouveau_bo *
nvfx_surface_buffer(struct pipe_surface *surf)
{
@@ -106,22 +115,6 @@ nvfx_miptree_from_handle(struct pipe_screen *pscreen,
const struct pipe_resource *template,
struct winsys_handle *whandle);
-struct pipe_resource *
-nvfx_buffer_create(struct pipe_screen *pscreen,
- const struct pipe_resource *template);
-
-void
-nvfx_buffer_destroy(struct pipe_screen *pscreen,
- struct pipe_resource *presource);
-
-struct pipe_resource *
-nvfx_user_buffer_create(struct pipe_screen *screen,
- void *ptr,
- unsigned bytes,
- unsigned usage);
-
-
-
void
nvfx_miptree_surface_del(struct pipe_surface *ps);
@@ -173,4 +166,58 @@ nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf);
void
nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf);
+struct nvfx_buffer
+{
+ struct nvfx_resource base;
+ uint8_t* data;
+ unsigned size;
+
+ /* the range of data not yet uploaded to the GPU bo */
+ unsigned dirty_begin;
+ unsigned dirty_end;
+
+ /* whether all transfers were unsynchronized */
+ boolean dirty_unsynchronized;
+
+ /* whether it would have been profitable to upload
+ * the latest updated data to the GPU immediately */
+ boolean last_update_static;
+
+ /* how many bytes we need to draw before we deem
+ * the buffer to be static
+ */
+ long long bytes_to_draw_until_static;
+};
+
+static inline struct nvfx_buffer* nvfx_buffer(struct pipe_resource* pr)
+{
+ return (struct nvfx_buffer*)pr;
+}
+
+/* this is an heuristic to determine whether we are better off uploading the
+ * buffer to the GPU, or just continuing pushing it on the FIFO
+ */
+static inline boolean nvfx_buffer_seems_static(struct nvfx_buffer* buffer)
+{
+ return buffer->last_update_static
+ || buffer->bytes_to_draw_until_static < 0;
+}
+
+struct pipe_resource *
+nvfx_buffer_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *template);
+
+void
+nvfx_buffer_destroy(struct pipe_screen *pscreen,
+ struct pipe_resource *presource);
+
+struct pipe_resource *
+nvfx_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes,
+ unsigned usage);
+
+void
+nvfx_buffer_upload(struct nvfx_buffer* buffer);
+
#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index a1b8361a9a4..7e3caf8d2e3 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -163,11 +163,11 @@ nvfx_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
}
static boolean
-nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
+nvfx_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned tex_usage, unsigned geom_flags)
+ unsigned bind, unsigned geom_flags)
{
struct nvfx_screen *screen = nvfx_screen(pscreen);
struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front;
@@ -175,7 +175,7 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
if (sample_count > 1)
return FALSE;
- if (tex_usage & PIPE_BIND_RENDER_TARGET) {
+ if (bind & PIPE_BIND_RENDER_TARGET) {
switch (format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
@@ -186,7 +186,7 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
}
}
- if (tex_usage & PIPE_BIND_DEPTH_STENCIL) {
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
switch (format) {
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
@@ -201,7 +201,7 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
}
}
- if (tex_usage & PIPE_BIND_SAMPLER_VIEW) {
+ if (bind & PIPE_BIND_SAMPLER_VIEW) {
struct nvfx_texture_format* tf = &nvfx_texture_formats[format];
if(util_format_is_s3tc(format) && !util_format_s3tc_enabled)
return FALSE;
@@ -218,6 +218,22 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
}
}
+ // note that we do actually support everything through translate
+ if (bind & PIPE_BIND_VERTEX_BUFFER) {
+ unsigned type = nvfx_vertex_formats[format];
+ if(!type)
+ return FALSE;
+ }
+
+ if (bind & PIPE_BIND_INDEX_BUFFER) {
+ // 8-bit indices supported, but not in hardware index buffer
+ if(format != PIPE_FORMAT_R16_USCALED && format != PIPE_FORMAT_R32_USCALED)
+ return FALSE;
+ }
+
+ if(bind & PIPE_BIND_STREAM_OUTPUT)
+ return FALSE;
+
return TRUE;
}
@@ -387,7 +403,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
pscreen->destroy = nvfx_screen_destroy;
pscreen->get_param = nvfx_screen_get_param;
pscreen->get_paramf = nvfx_screen_get_paramf;
- pscreen->is_format_supported = nvfx_screen_surface_format_supported;
+ pscreen->is_format_supported = nvfx_screen_is_format_supported;
pscreen->context_create = nvfx_create;
switch (dev->chipset & 0xf0) {
@@ -419,6 +435,11 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE);
+ screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE);
+
+ screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384);
+ screen->inline_cost_per_hardware_cost = atof(debug_get_option("NVFX_INLINE_COST_PER_HARDWARE_COST", "1.0"));
+ screen->static_reuse_threshold = atof(debug_get_option("NVFX_STATIC_REUSE_THRESHOLD", "2.0"));
screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen);
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h
index 4dedbe9cb40..473a1127752 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.h
+++ b/src/gallium/drivers/nvfx/nvfx_screen.h
@@ -16,6 +16,7 @@ struct nvfx_screen {
unsigned is_nv4x; /* either 0 or ~0 */
boolean force_swtnl;
+ boolean trace_draw;
unsigned vertex_buffer_reloc_flags;
unsigned index_buffer_reloc_flags;
@@ -33,6 +34,18 @@ struct nvfx_screen {
struct nouveau_resource *vp_data_heap;
struct nv04_2d_context* eng2d;
+
+ /* Once the amount of bytes drawn from the buffer reaches the updated size times this value,
+ * we will assume that the buffer will be drawn an huge number of times before the
+ * next modification
+ */
+ float static_reuse_threshold;
+
+ /* Cost of allocating a buffer in terms of the cost of copying a byte to an hardware buffer */
+ unsigned buffer_allocation_cost;
+
+ /* inline_cost/hardware_cost conversion ration */
+ float inline_cost_per_hardware_cost;
};
static INLINE struct nvfx_screen *
diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c
index d459f9a8801..25d29720a85 100644
--- a/src/gallium/drivers/nvfx/nvfx_state.c
+++ b/src/gallium/drivers/nvfx/nvfx_state.c
@@ -441,83 +441,6 @@ nvfx_set_viewport_state(struct pipe_context *pipe,
nvfx->draw_dirty |= NVFX_NEW_VIEWPORT;
}
-static void
-nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
- const struct pipe_vertex_buffer *vb)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
-
- for(unsigned i = 0; i < count; ++i)
- {
- pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
- nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
- nvfx->vtxbuf[i].max_index = vb[i].max_index;
- nvfx->vtxbuf[i].stride = vb[i].stride;
- }
-
- for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
- pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
-
- nvfx->vtxbuf_nr = count;
-
- nvfx->dirty |= NVFX_NEW_ARRAYS;
- nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
-}
-
-static void
-nvfx_set_index_buffer(struct pipe_context *pipe,
- const struct pipe_index_buffer *ib)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
-
- /* TODO make this more like a state */
-
- if(ib)
- {
- pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
- nvfx->idxbuf.index_size = ib->index_size;
- nvfx->idxbuf.offset = ib->offset;
- }
- else
- {
- pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
- nvfx->idxbuf.index_size = 0;
- nvfx->idxbuf.offset = 0;
- }
-}
-
-static void *
-nvfx_vtxelts_state_create(struct pipe_context *pipe,
- unsigned num_elements,
- const struct pipe_vertex_element *elements)
-{
- struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
-
- assert(num_elements < 16); /* not doing fallbacks yet */
- cso->num_elements = num_elements;
- memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
-
-/* nvfx_vtxelt_construct(cso);*/
-
- return (void *)cso;
-}
-
-static void
-nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
-{
- FREE(hwcso);
-}
-
-static void
-nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
-{
- struct nvfx_context *nvfx = nvfx_context(pipe);
-
- nvfx->vtxelt = hwcso;
- nvfx->dirty |= NVFX_NEW_ARRAYS;
- /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/
-}
-
void
nvfx_init_state_functions(struct nvfx_context *nvfx)
{
@@ -553,11 +476,4 @@ nvfx_init_state_functions(struct nvfx_context *nvfx)
nvfx->pipe.set_polygon_stipple = nvfx_set_polygon_stipple;
nvfx->pipe.set_scissor_state = nvfx_set_scissor_state;
nvfx->pipe.set_viewport_state = nvfx_set_viewport_state;
-
- nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
- nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
- nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
-
- nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
- nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c
index dc70f3de870..b9d18977919 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_emit.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c
@@ -8,6 +8,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
{
struct nouveau_channel* chan = nvfx->screen->base.channel;
unsigned dirty;
+ unsigned still_dirty = 0;
int all_swizzled = -1;
boolean flush_tex_cache = FALSE;
@@ -52,11 +53,19 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
return FALSE;
}
- if(dirty & (NVFX_NEW_ARRAYS))
+ if(dirty & NVFX_NEW_ARRAYS)
{
if(!nvfx_vbo_validate(nvfx))
return FALSE;
}
+
+ if(dirty & NVFX_NEW_INDEX)
+ {
+ if(nvfx->use_index_buffer)
+ nvfx_idxbuf_validate(nvfx);
+ else
+ still_dirty = NVFX_NEW_INDEX;
+ }
}
else
{
@@ -64,7 +73,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP))
nvfx_vertprog_validate(nvfx);
- if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG))
+ if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_INDEX | NVFX_NEW_FRAGPROG))
nvfx_vtxfmt_validate(nvfx);
}
@@ -118,7 +127,24 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
OUT_RING(chan, 1);
}
}
- nvfx->dirty = 0;
+
+ nvfx->dirty = dirty & still_dirty;
+
+ unsigned render_temps = nvfx->state.render_temps;
+ if(render_temps)
+ {
+ for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
+ {
+ if(render_temps & (1 << i))
+ util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]),
+ (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]);
+ }
+
+ if(render_temps & 0x80)
+ util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf),
+ (struct util_dirty_surface*)nvfx->framebuffer.zsbuf);
+ }
+
return TRUE;
}
@@ -137,21 +163,6 @@ nvfx_state_emit(struct nvfx_context *nvfx)
;
MARK_RING(chan, max_relocs * 2, max_relocs * 2);
nvfx_state_relocate(nvfx);
-
- unsigned render_temps = nvfx->state.render_temps;
- if(render_temps)
- {
- for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
- {
- if(render_temps & (1 << i))
- util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]),
- (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]);
- }
-
- if(render_temps & 0x80)
- util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf),
- (struct util_dirty_surface*)nvfx->framebuffer.zsbuf);
- }
}
void
@@ -161,7 +172,11 @@ nvfx_state_relocate(struct nvfx_context *nvfx)
nvfx_fragtex_relocate(nvfx);
nvfx_fragprog_relocate(nvfx);
if (nvfx->render_mode == HW)
+ {
nvfx_vbo_relocate(nvfx);
+ if(nvfx->use_index_buffer)
+ nvfx_idxbuf_relocate(nvfx);
+ }
}
boolean
diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c
index 80b0f21575f..28bbd36c2e8 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_fb.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c
@@ -1,6 +1,5 @@
#include "nvfx_context.h"
#include "nvfx_resource.h"
-#include "nouveau/nouveau_util.h"
#include "util/u_format.h"
static inline boolean
@@ -125,8 +124,8 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
- (log2i(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
- (log2i(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+ (util_logbase2(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
+ (util_logbase2(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
} else
rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c
index 7efdd954b4b..135978ad274 100644
--- a/src/gallium/drivers/nvfx/nvfx_surface.c
+++ b/src/gallium/drivers/nvfx/nvfx_surface.c
@@ -36,7 +36,6 @@
#include "util/u_blitter.h"
#include "nouveau/nouveau_winsys.h"
-#include "nouveau/nouveau_util.h"
#include "nouveau/nouveau_screen.h"
#include "nvfx_context.h"
#include "nvfx_screen.h"
@@ -62,7 +61,7 @@ nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format)
break;
default:
assert(util_is_pot(bits));
- int shift = log2i(bits) - 3;
+ int shift = util_logbase2(bits) - 3;
assert(shift >= 2);
rgn->bpps = 2;
shift -= 2;
@@ -365,25 +364,29 @@ nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int
{
struct nvfx_surface* ns = (struct nvfx_surface*)surf;
struct pipe_subresource tempsr, surfsr;
- struct pipe_resource *idxbuf_buffer;
- unsigned idxbuf_format;
+ struct nvfx_context* nvfx = nvfx_context(pipe);
+
+ // TODO: we really should do this validation before setting these variable in draw calls
+ unsigned use_vertex_buffers = nvfx->use_vertex_buffers;
+ boolean use_index_buffer = nvfx->use_index_buffer;
+ unsigned base_vertex = nvfx->base_vertex;
tempsr.face = 0;
tempsr.level = 0;
surfsr.face = surf->face;
surfsr.level = surf->level;
- // TODO: do this properly, in blitter save
- idxbuf_buffer = ((struct nvfx_context*)pipe)->idxbuf_buffer;
- idxbuf_format = ((struct nvfx_context*)pipe)->idxbuf_format;
-
if(to_temp)
nvfx_resource_copy_region(pipe, &ns->temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height);
else
nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &ns->temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height);
- ((struct nvfx_context*)pipe)->idxbuf_buffer = idxbuf_buffer;
- ((struct nvfx_context*)pipe)->idxbuf_format = idxbuf_format;
+ nvfx->use_vertex_buffers = use_vertex_buffers;
+ nvfx->use_index_buffer = use_index_buffer;
+ nvfx->base_vertex = base_vertex;
+
+ nvfx->dirty |= NVFX_NEW_ARRAYS;
+ nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
}
void
diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c
index e9c3dd7e551..ca4462ef9dc 100644
--- a/src/gallium/drivers/nvfx/nvfx_transfer.c
+++ b/src/gallium/drivers/nvfx/nvfx_transfer.c
@@ -26,25 +26,44 @@ nvfx_transfer_new(struct pipe_context *pipe,
unsigned usage,
const struct pipe_box *box)
{
- struct nvfx_staging_transfer* tx;
- bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR;
-
- tx = CALLOC_STRUCT(nvfx_staging_transfer);
- if(!tx)
- return NULL;
-
- util_staging_transfer_init(pipe, pt, sr, usage, box, direct, tx);
+ if((usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_DONTBLOCK)) == PIPE_TRANSFER_DONTBLOCK)
+ {
+ struct nouveau_bo* bo = ((struct nvfx_resource*)pt)->bo;
+ if(bo && nouveau_bo_busy(bo, NOUVEAU_BO_WR))
+ return NULL;
+ }
if(pt->target == PIPE_BUFFER)
{
- tx->base.base.slice_stride = tx->base.base.stride = ((struct nvfx_resource*)tx->base.staging_resource)->bo->size;
- if(direct)
- tx->offset = util_format_get_stride(pt->format, box->x);
- else
- tx->offset = 0;
+ // it would be nice if we could avoid all this ridiculous overhead...
+ struct pipe_transfer* tx;
+ struct nvfx_buffer* buffer = nvfx_buffer(pt);
+
+ tx = CALLOC_STRUCT(pipe_transfer);
+ if (!tx)
+ return NULL;
+
+ pipe_resource_reference(&tx->resource, pt);
+ tx->sr = sr;
+ tx->usage = usage;
+ tx->box = *box;
+
+ tx->slice_stride = tx->stride = util_format_get_stride(pt->format, box->width);
+ tx->data = buffer->data + util_format_get_stride(pt->format, box->x);
+
+ return tx;
}
else
{
+ struct nvfx_staging_transfer* tx;
+ bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR;
+
+ tx = CALLOC_STRUCT(nvfx_staging_transfer);
+ if(!tx)
+ return NULL;
+
+ util_staging_transfer_init(pipe, pt, sr, usage, box, direct, &tx->base);
+
if(direct)
{
tx->base.base.stride = nvfx_subresource_pitch(pt, sr.level);
@@ -66,26 +85,132 @@ nvfx_transfer_new(struct pipe_context *pipe,
}
}
+static void nvfx_buffer_dirty_interval(struct nvfx_buffer* buffer, unsigned begin, unsigned size, boolean unsynchronized)
+{
+ struct nvfx_screen* screen = nvfx_screen(buffer->base.base.screen);
+ buffer->last_update_static = buffer->bytes_to_draw_until_static < 0;
+ if(buffer->dirty_begin == buffer->dirty_end)
+ {
+ buffer->dirty_begin = begin;
+ buffer->dirty_end = begin + size;
+ buffer->dirty_unsynchronized = unsynchronized;
+ }
+ else
+ {
+ buffer->dirty_begin = MIN2(buffer->dirty_begin, begin);
+ buffer->dirty_end = MAX2(buffer->dirty_end, begin + size);
+ buffer->dirty_unsynchronized &= unsynchronized;
+ }
+
+ if(unsynchronized)
+ {
+ // TODO: revisit this, it doesn't seem quite right
+ //printf("UNSYNC UPDATE %p %u %u\n", buffer, begin, size);
+ buffer->bytes_to_draw_until_static += size * screen->static_reuse_threshold;
+ }
+ else
+ buffer->bytes_to_draw_until_static = buffer->size * screen->static_reuse_threshold;
+}
+
+static void nvfx_transfer_flush_region( struct pipe_context *pipe,
+ struct pipe_transfer *ptx,
+ const struct pipe_box *box)
+{
+ if(ptx->resource->target == PIPE_BUFFER && (ptx->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+ {
+ struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource);
+ nvfx_buffer_dirty_interval(buffer,
+ (uint8_t*)ptx->data - buffer->data + util_format_get_stride(buffer->base.base.format, box->x),
+ util_format_get_stride(buffer->base.base.format, box->width),
+ !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED));
+ }
+}
+
+static void
+nvfx_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx)
+{
+ if(ptx->resource->target == PIPE_BUFFER)
+ {
+ struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource);
+ if((ptx->usage & (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT)) == PIPE_TRANSFER_WRITE)
+ nvfx_buffer_dirty_interval(buffer,
+ (uint8_t*)ptx->data - buffer->data,
+ ptx->stride,
+ !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED));
+ pipe_resource_reference(&ptx->resource, 0);
+ FREE(ptx);
+ }
+ else
+ util_staging_transfer_destroy(pipe, ptx);
+}
+
void *
nvfx_transfer_map(struct pipe_context *pipe, struct pipe_transfer *ptx)
{
- struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
- if(!ptx->data)
+ if(ptx->resource->target == PIPE_BUFFER)
+ return ptx->data;
+ else
{
- struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
- uint8_t *map = nouveau_screen_bo_map(pipe->screen, mt->base.bo, nouveau_screen_transfer_flags(ptx->usage));
- ptx->data = map + tx->offset;
+ struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
+ if(!ptx->data)
+ {
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+ uint8_t *map = nouveau_screen_bo_map(pipe->screen, mt->base.bo, nouveau_screen_transfer_flags(ptx->usage));
+ ptx->data = map + tx->offset;
+ }
+
+ ++tx->map_count;
+ return ptx->data;
}
- ++tx->map_count;
- return ptx->data;
}
void
nvfx_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx)
{
- struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
- struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+ if(ptx->resource->target != PIPE_BUFFER)
+ {
+ struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
+ struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+
+ if(!--tx->map_count)
+ {
+ nouveau_screen_bo_unmap(pipe->screen, mt->base.bo);
+ ptx->data = 0;
+ }
+ }
+}
+
+static void nvfx_transfer_inline_write( struct pipe_context *pipe,
+ struct pipe_resource *pr,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned slice_stride)
+{
+ if(pr->target != PIPE_BUFFER)
+ {
+ u_default_transfer_inline_write(pipe, pr, sr, usage, box, data, stride, slice_stride);
+ }
+ else
+ {
+ struct nvfx_buffer* buffer = nvfx_buffer(pr);
+ unsigned begin = util_format_get_stride(pr->format, box->x);
+ unsigned size = util_format_get_stride(pr->format, box->width);
+ memcpy(buffer->data + begin, data, size);
+ nvfx_buffer_dirty_interval(buffer, begin, size,
+ !!(pr->flags & PIPE_TRANSFER_UNSYNCHRONIZED));
+ }
+}
- if(!--tx->map_count)
- nouveau_screen_bo_unmap(pipe->screen, mt->base.bo);
+void
+nvfx_init_transfer_functions(struct pipe_context *pipe)
+{
+ pipe->get_transfer = nvfx_transfer_new;
+ pipe->transfer_map = nvfx_transfer_map;
+ pipe->transfer_flush_region = nvfx_transfer_flush_region;
+ pipe->transfer_unmap = nvfx_transfer_unmap;
+ pipe->transfer_destroy = nvfx_transfer_destroy;
+ pipe->transfer_inline_write = nvfx_transfer_inline_write;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c
index 4aa37938425..a6cd1256350 100644
--- a/src/gallium/drivers/nvfx/nvfx_vbo.c
+++ b/src/gallium/drivers/nvfx/nvfx_vbo.c
@@ -2,6 +2,7 @@
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "translate/translate.h"
#include "nvfx_context.h"
#include "nvfx_state.h"
@@ -10,646 +11,583 @@
#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_class.h"
#include "nouveau/nouveau_pushbuf.h"
-#include "nouveau/nouveau_util.h"
-static INLINE int
-nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
+static inline unsigned
+util_guess_unique_indices_count(unsigned mode, unsigned indices)
{
- switch (pipe) {
- case PIPE_FORMAT_R32_FLOAT:
- case PIPE_FORMAT_R32G32_FLOAT:
- case PIPE_FORMAT_R32G32B32_FLOAT:
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- *fmt = NV34TCL_VTXFMT_TYPE_FLOAT;
- break;
- case PIPE_FORMAT_R16_FLOAT:
- case PIPE_FORMAT_R16G16_FLOAT:
- case PIPE_FORMAT_R16G16B16_FLOAT:
- case PIPE_FORMAT_R16G16B16A16_FLOAT:
- *fmt = NV34TCL_VTXFMT_TYPE_HALF;
- break;
- case PIPE_FORMAT_R8_UNORM:
- case PIPE_FORMAT_R8G8_UNORM:
- case PIPE_FORMAT_R8G8B8_UNORM:
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- *fmt = NV34TCL_VTXFMT_TYPE_UBYTE;
- break;
- case PIPE_FORMAT_R16_SSCALED:
- case PIPE_FORMAT_R16G16_SSCALED:
- case PIPE_FORMAT_R16G16B16_SSCALED:
- case PIPE_FORMAT_R16G16B16A16_SSCALED:
- *fmt = NV34TCL_VTXFMT_TYPE_USHORT;
- break;
- default:
- NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
- return 1;
+ /* Euler's formula gives V =
+ * = E - F + 2 =
+ * = F * (polygon_edges / 2 - 1) + 2 =
+ * = F * (polygon_edges - 2) / 2 + 2 =
+ * = indices * (polygon_edges - 2) / (2 * indices_per_face) + 2
+ * = indices * (1 / 2 - 1 / polygon_edges) + 2
+ */
+ switch(mode)
+ {
+ case PIPE_PRIM_LINES:
+ return indices >> 1;
+ case PIPE_PRIM_TRIANGLES:
+ {
+ // avoid an expensive division by 3 using the multiplicative inverse mod 2^32
+ unsigned q;
+ unsigned inv3 = 2863311531;
+ indices >>= 1;
+ q = indices * inv3;
+ if(unlikely(q >= indices))
+ {
+ q += inv3;
+ if(q >= indices)
+ q += inv3;
+ }
+ return indices + 2;
+ //return indices / 6 + 2;
}
-
- switch (pipe) {
- case PIPE_FORMAT_R8_UNORM:
- case PIPE_FORMAT_R32_FLOAT:
- case PIPE_FORMAT_R16_FLOAT:
- case PIPE_FORMAT_R16_SSCALED:
- *ncomp = 1;
- break;
- case PIPE_FORMAT_R8G8_UNORM:
- case PIPE_FORMAT_R32G32_FLOAT:
- case PIPE_FORMAT_R16G16_FLOAT:
- case PIPE_FORMAT_R16G16_SSCALED:
- *ncomp = 2;
- break;
- case PIPE_FORMAT_R8G8B8_UNORM:
- case PIPE_FORMAT_R32G32B32_FLOAT:
- case PIPE_FORMAT_R16G16B16_FLOAT:
- case PIPE_FORMAT_R16G16B16_SSCALED:
- *ncomp = 3;
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- case PIPE_FORMAT_R16G16B16A16_FLOAT:
- case PIPE_FORMAT_R16G16B16A16_SSCALED:
- *ncomp = 4;
- break;
+ // guess that indexed quads are created by successive connections, since a closed mesh seems unlikely
+ case PIPE_PRIM_QUADS:
+ return (indices >> 1) + 2;
+ // return (indices >> 2) + 2; // if it is a closed mesh
default:
- NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
- return 1;
+ return indices;
}
-
- return 0;
}
-static boolean
-nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib,
- unsigned ib_size)
+static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
- unsigned type;
-
- if (!ib) {
- nvfx->idxbuf_buffer = NULL;
- nvfx->idxbuf_format = 0xdeadbeef;
- return FALSE;
+ struct nvfx_context* nvfx = nvfx_context(pipe);
+ unsigned hardware_cost = 0;
+ unsigned inline_cost = 0;
+ unsigned unique_vertices;
+ unsigned upload_mode;
+ if (info->indexed)
+ unique_vertices = util_guess_unique_indices_count(info->mode, info->count);
+ else
+ unique_vertices = info->count;
+
+ /* Here we try to figure out if we are better off writing vertex data directly on the FIFO,
+ * or create hardware buffer objects and pointing the hardware to them.
+ *
+ * This is done by computing the total memcpy cost of each option, ignoring uploads
+ * if we think that the buffer is static and thus the upload cost will be amortized over
+ * future draw calls.
+ *
+ * For instance, if everything looks static, we will always create buffer objects, while if
+ * everything is a user buffer and we are not doing indexed drawing, we never do.
+ *
+ * Other interesting cases are where a small user vertex buffer, but a huge user index buffer,
+ * where we will upload the vertex buffer, so that we can use hardware index lookup, and
+ * the opposite case, where we instead do index lookup in software to avoid uploading
+ * a huge amount of vertex data that is not going to be used.
+ *
+ * Otherwise, we generally move to the GPU the after it has been pushed
+ * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having
+ * been updated with a transfer (or just the buffer having been destroyed).
+ *
+ * There is no special handling for user buffers, since applications can use
+ * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this
+ * by the way.
+ *
+ * Note that currently we don't support only putting some data on the FIFO, and
+ * some on vertex buffers (constant and instanced data is independent from this).
+ *
+ * nVidia doesn't seem to do this either, even though it should be at least
+ * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed.
+ */
+
+ for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
+ {
+ struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+ struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+ buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices;
+ if (!nvfx_buffer_seems_static(buffer))
+ {
+ hardware_cost += buffer->dirty_end - buffer->dirty_begin;
+ if (!buffer->base.bo)
+ hardware_cost += nvfx->screen->buffer_allocation_cost;
+ }
+ inline_cost += vbi->per_vertex_size * info->count;
}
- if (!nvfx->screen->index_buffer_reloc_flags || ib_size == 1)
- return FALSE;
+ float best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f;
+ boolean prefer_hardware_indices = FALSE;
+ unsigned index_inline_cost = 0;
+ unsigned index_hardware_cost = 0;
- switch (ib_size) {
- case 2:
- type = NV34TCL_IDXBUF_FORMAT_TYPE_U16;
- break;
- case 4:
- type = NV34TCL_IDXBUF_FORMAT_TYPE_U32;
- break;
- default:
- return FALSE;
- }
+ if (info->indexed)
+ {
+ index_inline_cost = nvfx->idxbuf.index_size * info->count;
+ if (nvfx->screen->index_buffer_reloc_flags
+ && (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4)
+ && !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1)))
+ {
+ struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer);
+ buffer->bytes_to_draw_until_static -= index_inline_cost;
- if (ib != nvfx->idxbuf_buffer ||
- type != nvfx->idxbuf_format) {
- nvfx->dirty |= NVFX_NEW_ARRAYS;
- nvfx->idxbuf_buffer = ib;
- nvfx->idxbuf_format = type;
- }
+ prefer_hardware_indices = TRUE;
- return TRUE;
-}
+ if (!nvfx_buffer_seems_static(buffer))
+ {
+ index_hardware_cost = buffer->dirty_end - buffer->dirty_begin;
+ if (!buffer->base.bo)
+ index_hardware_cost += nvfx->screen->buffer_allocation_cost;
+ }
-// type must be floating point
-static inline void
-nvfx_vbo_static_attrib(struct nvfx_context *nvfx,
- int attrib, struct pipe_vertex_element *ve,
- struct pipe_vertex_buffer *vb, unsigned ncomp)
-{
- struct pipe_transfer *transfer;
- struct nouveau_channel* chan = nvfx->screen->base.channel;
- void *map;
- float *v;
-
- map = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer);
- map = (uint8_t *) map + vb->buffer_offset + ve->src_offset;
-
- v = map;
-
- switch (ncomp) {
- case 4:
- OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4));
- OUT_RING(chan, fui(v[0]));
- OUT_RING(chan, fui(v[1]));
- OUT_RING(chan, fui(v[2]));
- OUT_RING(chan, fui(v[3]));
- break;
- case 3:
- OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3));
- OUT_RING(chan, fui(v[0]));
- OUT_RING(chan, fui(v[1]));
- OUT_RING(chan, fui(v[2]));
- break;
- case 2:
- OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2));
- OUT_RING(chan, fui(v[0]));
- OUT_RING(chan, fui(v[1]));
- break;
- case 1:
- OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1));
- OUT_RING(chan, fui(v[0]));
- break;
+ if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost)
+ {
+ best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost;
+ }
+ else
+ {
+ best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost;
+ prefer_hardware_indices = TRUE;
+ }
+ }
}
- pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer);
+ /* let's finally figure out which of the 3 paths we want to take */
+ if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost))
+ upload_mode = 1 + prefer_hardware_indices;
+ else
+ upload_mode = 0;
+
+#ifdef DEBUG
+ if (unlikely(nvfx->screen->trace_draw))
+ {
+ fprintf(stderr, "DRAW");
+ if (info->indexed)
+ {
+ fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size);
+ if (info->index_bias)
+ fprintf(stderr, " biased %u", info->index_bias);
+ fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index);
+ }
+ if (info->instance_count > 1)
+ fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed);
+ fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode);
+ if (!upload_mode)
+ fprintf(stderr, " -> inline vertex data");
+ else if (upload_mode == 2 || !info->indexed)
+ fprintf(stderr, " -> buffer range");
+ else
+ fprintf(stderr, " -> inline indices");
+ fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost);
+ for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
+ {
+ struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+ struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+ if (i)
+ fprintf(stderr, ", ");
+ fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static);
+ }
+ fprintf(stderr, ">\n");
+ }
+#endif
+
+ return upload_mode;
}
-static void
-nvfx_draw_arrays(struct pipe_context *pipe,
- unsigned mode, unsigned start, unsigned count)
+void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
- struct nvfx_screen *screen = nvfx->screen;
- struct nouveau_channel *chan = screen->base.channel;
- unsigned restart = 0;
-
- nvfx_vbo_set_idxbuf(nvfx, NULL, 0);
- if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
- nvfx_draw_elements_swtnl(pipe, NULL, 0, 0,
- mode, start, count);
- return;
- }
+ unsigned upload_mode = 0;
- while (count) {
- unsigned vc, nr, avail;
+ if (!nvfx->vtxelt->needs_translate)
+ upload_mode = nvfx_decide_upload_mode(pipe, info);
- nvfx_state_emit(nvfx);
+ nvfx->use_index_buffer = upload_mode > 1;
- avail = AVAIL_RING(chan);
- avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+ if ((upload_mode > 0) != nvfx->use_vertex_buffers)
+ {
+ nvfx->use_vertex_buffers = (upload_mode > 0);
+ nvfx->dirty |= NVFX_NEW_ARRAYS;
+ nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
+ }
- vc = nouveau_vbuf_split(avail, 6, 256,
- mode, start, count, &restart);
- if (!vc) {
- FIRE_RING(chan);
- continue;
+ if (upload_mode > 0)
+ {
+ for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
+ {
+ struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+ nvfx_buffer_upload(nvfx_buffer(vb->buffer));
}
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, nvgl_primitive(mode));
+ if (upload_mode > 1)
+ {
+ nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer));
- nr = (vc & 0xff);
- if (nr) {
- OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1));
- OUT_RING (chan, ((nr - 1) << 24) | start);
- start += nr;
+ if (unlikely(info->index_bias != nvfx->base_vertex))
+ {
+ nvfx->base_vertex = info->index_bias;
+ nvfx->dirty |= NVFX_NEW_ARRAYS;
+ }
}
-
- nr = vc >> 8;
- while (nr) {
- unsigned push = nr > 2047 ? 2047 : nr;
-
- nr -= push;
-
- OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push));
- while (push--) {
- OUT_RING(chan, ((0x100 - 1) << 24) | start);
- start += 0x100;
+ else
+ {
+ if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex))
+ {
+ nvfx->base_vertex = 0;
+ nvfx->dirty |= NVFX_NEW_ARRAYS;
}
}
-
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, 0);
-
- count -= vc;
- start = restart;
}
- pipe->flush(pipe, 0, NULL);
+ if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx))
+ nvfx_draw_vbo_swtnl(pipe, info);
+ else
+ nvfx_push_vbo(pipe, info);
}
-static INLINE void
-nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
- unsigned mode, unsigned start, unsigned count)
+boolean
+nvfx_vbo_validate(struct nvfx_context *nvfx)
{
- struct nvfx_screen *screen = nvfx->screen;
- struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ int i;
+ int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
+ unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
- while (count) {
- uint8_t *elts = (uint8_t *)ib + start;
- unsigned vc, push, restart = 0, avail;
+ if (!elements)
+ return TRUE;
- nvfx_state_emit(nvfx);
+ MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
+ for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i)
+ {
+ struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+ struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+ float v[4];
+ ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
+ nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
+ }
- avail = AVAIL_RING(chan);
- avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
- vc = nouveau_vbuf_split(avail, 6, 2,
- mode, start, count, &restart);
- if (vc == 0) {
- FIRE_RING(chan);
- continue;
- }
- count -= vc;
+ OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
+ if(nvfx->use_vertex_buffers)
+ {
+ unsigned idx = 0;
+ for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+ struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, nvgl_primitive(mode));
+ if(idx != ve->idx)
+ {
+ assert(idx < ve->idx);
+ OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], ve->idx - idx);
+ idx = ve->idx;
+ }
- if (vc & 1) {
- OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
- OUT_RING (chan, elts[0]);
- elts++; vc--;
+ OUT_RING(chan, nvfx->vtxelt->vtxfmt[idx] | (vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT));
+ ++idx;
}
+ if(idx != nvfx->vtxelt->num_elements)
+ OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], nvfx->vtxelt->num_elements - idx);
+ }
+ else
+ OUT_RINGp(chan, nvfx->vtxelt->vtxfmt, nvfx->vtxelt->num_elements);
- while (vc) {
- unsigned i;
-
- push = MIN2(vc, 2047 * 2);
-
- OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
- for (i = 0; i < push; i+=2)
- OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
+ for(i = nvfx->vtxelt->num_elements; i < elements; ++i)
+ OUT_RING(chan, NV34TCL_VTXFMT_TYPE_32_FLOAT);
- vc -= push;
- elts += push;
+ if(nvfx->is_nv4x) {
+ unsigned i;
+ /* seems to be some kind of cache flushing */
+ for(i = 0; i < 3; ++i) {
+ OUT_RING(chan, RING_3D(0x1718, 1));
+ OUT_RING(chan, 0);
}
-
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, 0);
-
- start = restart;
}
-}
-
-static INLINE void
-nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
- unsigned mode, unsigned start, unsigned count)
-{
- struct nvfx_screen *screen = nvfx->screen;
- struct nouveau_channel *chan = screen->base.channel;
-
- while (count) {
- uint16_t *elts = (uint16_t *)ib + start;
- unsigned vc, push, restart = 0, avail;
- nvfx_state_emit(nvfx);
-
- avail = AVAIL_RING(chan);
- avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
-
- vc = nouveau_vbuf_split(avail, 6, 2,
- mode, start, count, &restart);
- if (vc == 0) {
- FIRE_RING(chan);
- continue;
- }
- count -= vc;
+ OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
+ if(nvfx->use_vertex_buffers)
+ {
+ unsigned idx = 0;
+ for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+ struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+ struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, nvgl_primitive(mode));
+ for(; idx < ve->idx; ++idx)
+ OUT_RING(chan, 0);
- if (vc & 1) {
- OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
- OUT_RING (chan, elts[0]);
- elts++; vc--;
+ OUT_RELOC(chan, bo,
+ vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
+ vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
+ ++idx;
}
- while (vc) {
- unsigned i;
-
- push = MIN2(vc, 2047 * 2);
-
- OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
- for (i = 0; i < push; i+=2)
- OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
-
- vc -= push;
- elts += push;
- }
+ for(; idx < elements; ++idx)
+ OUT_RING(chan, 0);
+ }
+ else
+ {
+ for (i = 0; i < elements; i++)
+ OUT_RING(chan, 0);
+ }
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, 0);
+ OUT_RING(chan, RING_3D(0x1710, 1));
+ OUT_RING(chan, 0);
- start = restart;
- }
+ nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
+ return TRUE;
}
-static INLINE void
-nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib,
- unsigned mode, unsigned start, unsigned count)
+void
+nvfx_vbo_relocate(struct nvfx_context *nvfx)
{
- struct nvfx_screen *screen = nvfx->screen;
- struct nouveau_channel *chan = screen->base.channel;
-
- while (count) {
- uint32_t *elts = (uint32_t *)ib + start;
- unsigned vc, push, restart = 0, avail;
-
- nvfx_state_emit(nvfx);
-
- avail = AVAIL_RING(chan);
- avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
-
- vc = nouveau_vbuf_split(avail, 5, 1,
- mode, start, count, &restart);
- if (vc == 0) {
- FIRE_RING(chan);
- continue;
- }
- count -= vc;
-
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, nvgl_primitive(mode));
-
- while (vc) {
- push = MIN2(vc, 2047);
-
- OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push));
- OUT_RINGp (chan, elts, push);
-
- vc -= push;
- elts += push;
- }
+ if(!nvfx->use_vertex_buffers)
+ return;
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, 0);
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
+ int i;
- start = restart;
+ MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
+ for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+ struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+ struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+ struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
+
+ OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(ve->idx), 1),
+ vb_flags, 0, 0);
+ OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
+ vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
}
}
static void
-nvfx_draw_elements_inline(struct pipe_context *pipe,
- struct pipe_resource *ib,
- unsigned ib_size, int ib_bias,
- unsigned mode, unsigned start, unsigned count)
+nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags)
{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- struct pipe_transfer *transfer;
- void *map;
-
- map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer);
- if (!ib) {
- NOUVEAU_ERR("failed mapping ib\n");
- return;
- }
+ struct nouveau_channel* chan = nvfx->screen->base.channel;
+ unsigned ib_format = (nvfx->idxbuf.index_size == 2) ? NV34TCL_IDXBUF_FORMAT_TYPE_U16 : NV34TCL_IDXBUF_FORMAT_TYPE_U32;
+ struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf.buffer)->bo;
+ ib_flags |= nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
- assert(ib_bias == 0);
-
- switch (ib_size) {
- case 1:
- nvfx_draw_elements_u08(nvfx, map, mode, start, count);
- break;
- case 2:
- nvfx_draw_elements_u16(nvfx, map, mode, start, count);
- break;
- case 4:
- nvfx_draw_elements_u32(nvfx, map, mode, start, count);
- break;
- default:
- NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
- break;
- }
+ assert(nvfx->screen->index_buffer_reloc_flags);
- pipe_buffer_unmap(pipe, ib, transfer);
+ MARK_RING(chan, 3, 3);
+ if(ib_flags & NOUVEAU_BO_DUMMY)
+ OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2), ib_flags, 0, 0);
+ else
+ OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
+ OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0);
+ OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
+ 0, NV34TCL_IDXBUF_FORMAT_DMA1);
}
-static void
-nvfx_draw_elements_vbo(struct pipe_context *pipe,
- unsigned mode, unsigned start, unsigned count)
+void
+nvfx_idxbuf_validate(struct nvfx_context* nvfx)
{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- struct nvfx_screen *screen = nvfx->screen;
- struct nouveau_channel *chan = screen->base.channel;
- unsigned restart = 0;
-
- while (count) {
- unsigned nr, vc, avail;
-
- nvfx_state_emit(nvfx);
+ nvfx_idxbuf_emit(nvfx, 0);
+}
- avail = AVAIL_RING(chan);
- avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+void
+nvfx_idxbuf_relocate(struct nvfx_context* nvfx)
+{
+ nvfx_idxbuf_emit(nvfx, NOUVEAU_BO_DUMMY);
+}
- vc = nouveau_vbuf_split(avail, 6, 256,
- mode, start, count, &restart);
- if (!vc) {
- FIRE_RING(chan);
- continue;
- }
+unsigned nvfx_vertex_formats[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+ [PIPE_FORMAT_R32G32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+ [PIPE_FORMAT_R32G32B32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+ [PIPE_FORMAT_R16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+ [PIPE_FORMAT_R16G16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+ [PIPE_FORMAT_R16G16B16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+ [PIPE_FORMAT_R8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+ [PIPE_FORMAT_R8G8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+ [PIPE_FORMAT_R8G8B8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_USCALED] = NV34TCL_VTXFMT_TYPE_8_USCALED,
+ [PIPE_FORMAT_R16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+ [PIPE_FORMAT_R16G16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+ [PIPE_FORMAT_R16G16B16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+ [PIPE_FORMAT_R16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+ [PIPE_FORMAT_R16G16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+ [PIPE_FORMAT_R16G16B16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+ [PIPE_FORMAT_R16G16B16A16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+};
+
+static void *
+nvfx_vtxelts_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct nvfx_context* nvfx = nvfx_context(pipe);
+ struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
+ struct translate_key transkey;
+ unsigned per_vertex_size[16];
+ memset(per_vertex_size, 0, sizeof(per_vertex_size));
+
+ unsigned vb_compacted_index[16];
+
+ assert(num_elements < 16); /* not doing fallbacks yet */
+
+ memcpy(cso->pipe, elements, num_elements * sizeof(elements[0]));
+ cso->num_elements = num_elements;
+ cso->needs_translate = FALSE;
+
+ transkey.nr_elements = 0;
+ transkey.output_stride = 0;
+
+ for(unsigned i = 0; i < num_elements; ++i)
+ {
+ const struct pipe_vertex_element* ve = &elements[i];
+ if(!ve->instance_divisor)
+ per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1);
+ }
+
+ for(unsigned i = 0; i < 16; ++i)
+ {
+ if(per_vertex_size[i])
+ {
+ unsigned idx = cso->num_per_vertex_buffer_infos++;
+ cso->per_vertex_buffer_info[idx].vertex_buffer_index = i;
+ cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i];
+ vb_compacted_index[i] = idx;
+ }
+ }
+
+ for(unsigned i = 0; i < num_elements; ++i)
+ {
+ const struct pipe_vertex_element* ve = &elements[i];
+ unsigned type = nvfx_vertex_formats[ve->src_format];
+ unsigned ncomp = util_format_get_nr_components(ve->src_format);
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, nvgl_primitive(mode));
+ //if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX)
+ if(ve->instance_divisor)
+ {
+ struct nvfx_low_frequency_element* lfve;
+ cso->vtxfmt[i] = NV34TCL_VTXFMT_TYPE_32_FLOAT;
+
+ //if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT)
+ if(0)
+ lfve = &cso->constant[cso->num_constant++];
+ else
+ {
+ lfve = &cso->per_instance[cso->num_per_instance++].base;
+ ((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor;
+ }
- nr = (vc & 0xff);
- if (nr) {
- OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1));
- OUT_RING (chan, ((nr - 1) << 24) | start);
- start += nr;
+ lfve->idx = i;
+ lfve->vertex_buffer_index = ve->vertex_buffer_index;
+ lfve->src_offset = ve->src_offset;
+ lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float;
+ lfve->ncomp = ncomp;
}
-
- nr = vc >> 8;
- while (nr) {
- unsigned push = nr > 2047 ? 2047 : nr;
-
- nr -= push;
-
- OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push));
- while (push--) {
- OUT_RING(chan, ((0x100 - 1) << 24) | start);
- start += 0x100;
+ else
+ {
+ unsigned idx;
+
+ idx = cso->num_per_vertex++;
+ cso->per_vertex[idx].idx = i;
+ cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index;
+ cso->per_vertex[idx].src_offset = ve->src_offset;
+
+ idx = transkey.nr_elements++;
+ transkey.element[idx].input_format = ve->src_format;
+ transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index];
+ transkey.element[idx].input_offset = ve->src_offset;
+ transkey.element[idx].instance_divisor = 0;
+ transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL;
+ if(type)
+ {
+ transkey.element[idx].output_format = ve->src_format;
+ cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type;
+ }
+ else
+ {
+ unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT};
+ transkey.element[idx].output_format = float32[ncomp - 1];
+ cso->needs_translate = TRUE;
+ cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | NV34TCL_VTXFMT_TYPE_32_FLOAT;
}
+ transkey.element[idx].output_offset = transkey.output_stride;
+ transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3;
}
+ }
- OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
- OUT_RING (chan, 0);
+ cso->translate = translate_generic_create(&transkey);
+ cso->vertex_length = transkey.output_stride >> 2;
+ cso->max_vertices_per_packet = 2047 / cso->vertex_length;
- count -= vc;
- start = restart;
- }
+ return (void *)cso;
}
static void
-nvfx_draw_elements(struct pipe_context *pipe,
- struct pipe_resource *indexBuffer,
- unsigned indexSize, int indexBias,
- unsigned mode, unsigned start, unsigned count)
+nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
{
- struct nvfx_context *nvfx = nvfx_context(pipe);
- boolean idxbuf;
-
- idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize);
- if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
- nvfx_draw_elements_swtnl(pipe,
- indexBuffer, indexSize, indexBias,
- mode, start, count);
- return;
- }
-
- if (idxbuf) {
- nvfx_draw_elements_vbo(pipe, mode, start, count);
- } else {
- nvfx_draw_elements_inline(pipe,
- indexBuffer, indexSize, indexBias,
- mode, start, count);
- }
-
- pipe->flush(pipe, 0, NULL);
+ FREE(hwcso);
}
-void
-nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+static void
+nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nvfx_context *nvfx = nvfx_context(pipe);
- if (info->indexed && nvfx->idxbuf.buffer) {
- unsigned offset;
-
- assert(nvfx->idxbuf.offset % nvfx->idxbuf.index_size == 0);
- offset = nvfx->idxbuf.offset / nvfx->idxbuf.index_size;
-
- nvfx_draw_elements(pipe,
- nvfx->idxbuf.buffer,
- nvfx->idxbuf.index_size,
- info->index_bias,
- info->mode,
- info->start + offset,
- info->count);
- }
- else {
- nvfx_draw_arrays(pipe,
- info->mode,
- info->start,
- info->count);
- }
+ nvfx->vtxelt = hwcso;
+ nvfx->use_vertex_buffers = -1;
+ nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
}
-boolean
-nvfx_vbo_validate(struct nvfx_context *nvfx)
+static void
+nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+ const struct pipe_vertex_buffer *vb)
{
- struct nouveau_channel* chan = nvfx->screen->base.channel;
- struct pipe_resource *ib = nvfx->idxbuf_buffer;
- unsigned ib_format = nvfx->idxbuf_format;
- int i;
- int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
- uint32_t vtxfmt[16];
- unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
-
- if (!elements)
- return TRUE;
-
- nvfx->vbo_bo = 0;
-
- MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
- for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
- struct pipe_vertex_element *ve;
- struct pipe_vertex_buffer *vb;
- unsigned type, ncomp;
-
- ve = &nvfx->vtxelt->pipe[i];
- vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
-
- if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
- MARK_UNDO(chan);
- nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS;
- return FALSE;
- }
+ struct nvfx_context *nvfx = nvfx_context(pipe);
- if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) {
- nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp);
- vtxfmt[i] = type;
- } else {
- vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) |
- (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type);
- nvfx->vbo_bo |= (1 << i);
- }
+ for(unsigned i = 0; i < count; ++i)
+ {
+ pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
+ nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
+ nvfx->vtxbuf[i].max_index = vb[i].max_index;
+ nvfx->vtxbuf[i].stride = vb[i].stride;
}
- for(; i < elements; ++i)
- vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT;
-
- OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
- OUT_RINGp(chan, vtxfmt, elements);
-
- if(nvfx->is_nv4x) {
- unsigned i;
- /* seems to be some kind of cache flushing */
- for(i = 0; i < 3; ++i) {
- OUT_RING(chan, RING_3D(0x1718, 1));
- OUT_RING(chan, 0);
- }
- }
+ for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
+ pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
- OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
- for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
- struct pipe_vertex_element *ve;
- struct pipe_vertex_buffer *vb;
+ nvfx->vtxbuf_nr = count;
+ nvfx->use_vertex_buffers = -1;
+ nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
+}
- ve = &nvfx->vtxelt->pipe[i];
- vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+static void
+nvfx_set_index_buffer(struct pipe_context *pipe,
+ const struct pipe_index_buffer *ib)
+{
+ struct nvfx_context *nvfx = nvfx_context(pipe);
- if (!(nvfx->vbo_bo & (1 << i)))
- OUT_RING(chan, 0);
- else
- {
- struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
- OUT_RELOC(chan, bo,
- vb->buffer_offset + ve->src_offset,
- vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
- 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
- }
+ if(ib)
+ {
+ pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
+ nvfx->idxbuf.index_size = ib->index_size;
+ nvfx->idxbuf.offset = ib->offset;
}
-
- for (; i < elements; i++)
- OUT_RING(chan, 0);
-
- OUT_RING(chan, RING_3D(0x1710, 1));
- OUT_RING(chan, 0);
-
- if (ib) {
- unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
- struct nouveau_bo* bo = nvfx_resource(ib)->bo;
-
- assert(nvfx->screen->index_buffer_reloc_flags);
-
- OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
- OUT_RELOC(chan, bo, 0, ib_flags | NOUVEAU_BO_LOW, 0, 0);
- OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
- 0, NV34TCL_IDXBUF_FORMAT_DMA1);
+ else
+ {
+ pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
+ nvfx->idxbuf.index_size = 0;
+ nvfx->idxbuf.offset = 0;
}
- nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
- return TRUE;
+ nvfx->dirty |= NVFX_NEW_INDEX;
+ nvfx->draw_dirty |= NVFX_NEW_INDEX;
}
void
-nvfx_vbo_relocate(struct nvfx_context *nvfx)
+nvfx_init_vbo_functions(struct nvfx_context *nvfx)
{
- struct nouveau_channel* chan = nvfx->screen->base.channel;
- unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
- int i;
+ nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
+ nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
- MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
- for(i = 0; i < nvfx->vtxelt->num_elements; ++i) {
- if(nvfx->vbo_bo & (1 << i)) {
- struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i];
- struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
- struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
- OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1),
- vb_flags, 0, 0);
- OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset,
- vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
- 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
- }
- }
-
- if(nvfx->idxbuf_buffer)
- {
- unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
- struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf_buffer)->bo;
-
- assert(nvfx->screen->index_buffer_reloc_flags);
-
- OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2),
- ib_flags, 0, 0);
- OUT_RELOC(chan, bo, 0,
- ib_flags | NOUVEAU_BO_LOW, 0, 0);
- OUT_RELOC(chan, bo, nvfx->idxbuf_format,
- ib_flags | NOUVEAU_BO_OR,
- 0, NV34TCL_IDXBUF_FORMAT_DMA1);
- }
+ nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
+ nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
+ nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
}
diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c
index 24d9846310e..939d2b83aee 100644
--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
@@ -10,6 +10,7 @@
#include "nvfx_context.h"
#include "nvfx_state.h"
+#include "nvfx_resource.h"
/* TODO (at least...):
* 1. Indexed consts + ARL
@@ -874,7 +875,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
struct nouveau_grobj *eng3d = screen->eng3d;
struct nvfx_vertex_program *vp;
struct pipe_resource *constbuf;
- struct pipe_transfer *transfer = NULL;
boolean upload_code = FALSE, upload_data = FALSE;
int i;
@@ -983,11 +983,8 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
if (vp->nr_consts) {
float *map = NULL;
- if (constbuf) {
- map = pipe_buffer_map(pipe, constbuf,
- PIPE_TRANSFER_READ,
- &transfer);
- }
+ if (constbuf)
+ map = nvfx_buffer(constbuf)->data;
for (i = 0; i < vp->nr_consts; i++) {
struct nvfx_vertex_program_data *vpd = &vp->consts[i];
@@ -1005,9 +1002,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
OUT_RING (chan, i + vp->data->start);
OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
}
-
- if (constbuf)
- pipe_buffer_unmap(pipe, constbuf, transfer);
}
/* Upload vtxprog */