summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nv50
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nv50')
-rw-r--r--src/gallium/drivers/nv50/nv50_clear.c76
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c28
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h18
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c31
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c1223
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h11
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c29
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c200
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.h15
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c15
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c74
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c16
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c36
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c82
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c23
15 files changed, 1340 insertions, 537 deletions
diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c
index db44a9da0e3..e0b2d2880b0 100644
--- a/src/gallium/drivers/nv50/nv50_clear.c
+++ b/src/gallium/drivers/nv50/nv50_clear.c
@@ -27,64 +27,42 @@
#include "nv50_context.h"
void
-nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps,
- unsigned clearValue)
+nv50_clear(struct pipe_context *pipe, unsigned buffers,
+ const float *rgba, double depth, unsigned stencil)
{
struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct pipe_framebuffer_state fb, s_fb = nv50->framebuffer;
- struct pipe_scissor_state sc, s_sc = nv50->scissor;
- unsigned dirty = nv50->dirty;
+ struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ unsigned mode = 0, i;
- nv50->dirty = 0;
+ if (!nv50_state_validate(nv50))
+ return;
- if (ps->format == PIPE_FORMAT_Z24S8_UNORM ||
- ps->format == PIPE_FORMAT_Z16_UNORM) {
- fb.nr_cbufs = 0;
- fb.zsbuf = ps;
- } else {
- fb.nr_cbufs = 1;
- fb.cbufs[0] = ps;
- fb.zsbuf = NULL;
+ if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
+ BEGIN_RING(chan, tesla, NV50TCL_CLEAR_COLOR(0), 4);
+ OUT_RING (chan, fui(rgba[0]));
+ OUT_RING (chan, fui(rgba[1]));
+ OUT_RING (chan, fui(rgba[2]));
+ OUT_RING (chan, fui(rgba[3]));
+ mode |= 0x3c;
}
- fb.width = ps->width;
- fb.height = ps->height;
- pipe->set_framebuffer_state(pipe, &fb);
- sc.minx = sc.miny = 0;
- sc.maxx = fb.width;
- sc.maxy = fb.height;
- pipe->set_scissor_state(pipe, &sc);
+ if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
+ BEGIN_RING(chan, tesla, NV50TCL_CLEAR_DEPTH, 1);
+ OUT_RING (chan, fui(depth));
+ BEGIN_RING(chan, tesla, NV50TCL_CLEAR_STENCIL, 1);
+ OUT_RING (chan, stencil & 0xff);
- nv50_state_validate(nv50);
-
- switch (ps->format) {
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- BEGIN_RING(chan, tesla, 0x0d80, 4);
- OUT_RINGf (chan, ubyte_to_float((clearValue >> 16) & 0xff));
- OUT_RINGf (chan, ubyte_to_float((clearValue >> 8) & 0xff));
- OUT_RINGf (chan, ubyte_to_float((clearValue >> 0) & 0xff));
- OUT_RINGf (chan, ubyte_to_float((clearValue >> 24) & 0xff));
- BEGIN_RING(chan, tesla, 0x19d0, 1);
- OUT_RING (chan, 0x3c);
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- BEGIN_RING(chan, tesla, 0x0d90, 1);
- OUT_RINGf (chan, (float)(clearValue >> 8) * (1.0 / 16777215.0));
- BEGIN_RING(chan, tesla, 0x0da0, 1);
- OUT_RING (chan, clearValue & 0xff);
- BEGIN_RING(chan, tesla, 0x19d0, 1);
- OUT_RING (chan, 0x03);
- break;
- default:
- pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height,
- clearValue);
- break;
+ mode |= 0x03;
}
- pipe->set_framebuffer_state(pipe, &s_fb);
- pipe->set_scissor_state(pipe, &s_sc);
- nv50->dirty |= dirty;
+ BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1);
+ OUT_RING (chan, mode);
+
+ for (i = 1; i < fb->nr_cbufs; i++) {
+ BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1);
+ OUT_RING (chan, (i << 6) | 0x3c);
+ }
}
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 565a5da668c..e02afc4be99 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -33,7 +33,7 @@ nv50_flush(struct pipe_context *pipe, unsigned flags,
{
struct nv50_context *nv50 = (struct nv50_context *)pipe;
- FIRE_RING(nv50->screen->nvws->channel);
+ FIRE_RING(nv50->screen->base.channel);
}
static void
@@ -51,6 +51,29 @@ nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
{
}
+static unsigned int
+nv50_is_texture_referenced( struct pipe_context *pipe,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level)
+{
+ /**
+ * FIXME: Optimize.
+ */
+
+ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+}
+
+static unsigned int
+nv50_is_buffer_referenced( struct pipe_context *pipe,
+ struct pipe_buffer *buf)
+{
+ /**
+ * FIXME: Optimize.
+ */
+
+ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+}
+
struct pipe_context *
nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
{
@@ -76,6 +99,9 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv50->pipe.flush = nv50_flush;
+ nv50->pipe.is_texture_referenced = nv50_is_texture_referenced;
+ nv50->pipe.is_buffer_referenced = nv50_is_buffer_referenced;
+
nv50_init_surface_functions(nv50);
nv50_init_state_functions(nv50);
nv50_init_query_functions(nv50);
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 7b67a754397..9b8cc4d37d0 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -63,6 +63,11 @@ struct nv50_rasterizer_stateobj {
struct nouveau_stateobj *so;
};
+struct nv50_sampler_stateobj {
+ bool normalized;
+ unsigned tsc[8];
+};
+
struct nv50_miptree_level {
int *image_offset;
unsigned pitch;
@@ -70,7 +75,8 @@ struct nv50_miptree_level {
struct nv50_miptree {
struct pipe_texture base;
- struct pipe_buffer *buffer;
+
+ struct nouveau_bo *bo;
struct nv50_miptree_level level[PIPE_MAX_TEXTURE_LEVELS];
int image_nr;
@@ -93,13 +99,6 @@ nv50_surface(struct pipe_surface *pt)
return (struct nv50_surface *)pt;
}
-static INLINE struct pipe_buffer *
-nv50_surface_buffer(struct pipe_surface *surface)
-{
- struct nv50_miptree *mt = (struct nv50_miptree *)surface->texture;
- return mt->buffer;
-}
-
struct nv50_state {
unsigned dirty;
@@ -115,6 +114,7 @@ struct nv50_state {
unsigned viewport_bypass;
struct nouveau_stateobj *tsc_upload;
struct nouveau_stateobj *tic_upload;
+ unsigned miptree_nr;
struct nouveau_stateobj *vertprog;
struct nouveau_stateobj *fragprog;
struct nouveau_stateobj *vtxfmt;
@@ -147,7 +147,7 @@ struct nv50_context {
unsigned vtxbuf_nr;
struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
unsigned vtxelt_nr;
- unsigned *sampler[PIPE_MAX_SAMPLERS];
+ struct nv50_sampler_stateobj *sampler[PIPE_MAX_SAMPLERS];
unsigned sampler_nr;
struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS];
unsigned miptree_nr;
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index f79a7ca86c9..6b605ba416c 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -29,26 +29,35 @@
static struct pipe_texture *
nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
{
+ struct nouveau_device *dev = nouveau_screen(pscreen)->device;
struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
struct pipe_texture *pt = &mt->base;
- unsigned usage, width = tmp->width[0], height = tmp->height[0];
+ unsigned width = tmp->width[0], height = tmp->height[0];
unsigned depth = tmp->depth[0];
- int i, l;
+ uint32_t tile_mode, tile_flags, tile_h;
+ int ret, i, l;
mt->base = *tmp;
pipe_reference_init(&mt->base.reference, 1);
mt->base.screen = pscreen;
- usage = PIPE_BUFFER_USAGE_PIXEL;
switch (pt->format) {
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_Z16_UNORM:
- usage |= NOUVEAU_BUFFER_USAGE_ZETA;
+ tile_flags = 0x2800;
break;
default:
+ tile_flags = 0x7000;
break;
}
+ if (pt->height[0] > 32) tile_mode = 4;
+ else if (pt->height[0] > 16) tile_mode = 3;
+ else if (pt->height[0] > 8) tile_mode = 2;
+ else if (pt->height[0] > 4) tile_mode = 1;
+ else tile_mode = 0;
+ tile_h = 1 << (tile_mode + 2);
+
switch (pt->target) {
case PIPE_TEXTURE_3D:
mt->image_nr = pt->depth[0];
@@ -85,7 +94,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
size = align(pt->width[l], 8) * pt->block.size;
size = align(size, 64);
- size *= align(pt->height[l], 8) * pt->block.size;
+ size *= align(pt->height[l], tile_h) * pt->block.size;
lvl->image_offset[i] = mt->total_size;
@@ -93,12 +102,13 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
}
}
- mt->buffer = pscreen->buffer_create(pscreen, 256, usage, mt->total_size);
- if (!mt->buffer) {
+ ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, mt->total_size,
+ tile_mode, tile_flags, &mt->bo);
+ if (ret) {
FREE(mt);
return NULL;
}
-
+
return &mt->base;
}
@@ -106,6 +116,7 @@ static struct pipe_texture *
nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
const unsigned *stride, struct pipe_buffer *pb)
{
+ struct nouveau_bo *bo = nouveau_bo(pb);
struct nv50_miptree *mt;
/* Only supports 2D, non-mipmapped textures for the moment */
@@ -124,7 +135,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
mt->level[0].pitch = *stride;
mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
- pipe_buffer_reference(&mt->buffer, pb);
+ nouveau_bo_ref(bo, &mt->bo);
return &mt->base;
}
@@ -133,7 +144,7 @@ nv50_miptree_destroy(struct pipe_texture *pt)
{
struct nv50_miptree *mt = nv50_miptree(pt);
- pipe_buffer_reference(&mt->buffer, NULL);
+ nouveau_bo_ref(NULL, &mt->bo);
FREE(mt);
}
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 2d15868ae84..5f7d06dbecb 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -85,6 +85,9 @@ struct nv50_reg {
int hw;
int neg;
+
+ int rhw; /* result hw for FP outputs, or interpolant index */
+ int acc; /* instruction where this reg is last read (first insn == 1) */
};
struct nv50_pc {
@@ -108,12 +111,23 @@ struct nv50_pc {
struct nv50_reg *temp_temp[16];
unsigned temp_temp_nr;
+
+ unsigned interp_mode[32];
+ /* perspective interpolation registers */
+ struct nv50_reg *iv_p;
+ struct nv50_reg *iv_c;
+
+ /* current instruction and total number of insns */
+ unsigned insn_cur;
+ unsigned insn_nr;
+
+ boolean allow32;
};
static void
alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
{
- int i;
+ int i = 0;
if (reg->type == P_RESULT) {
if (pc->p->cfg.high_result < (reg->hw + 1))
@@ -131,7 +145,22 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
return;
}
- for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
+ if (reg->rhw != -1) {
+ /* try to allocate temporary with index rhw first */
+ if (!(pc->r_temp[reg->rhw])) {
+ pc->r_temp[reg->rhw] = reg;
+ reg->hw = reg->rhw;
+ if (pc->p->cfg.high_temp < (reg->rhw + 1))
+ pc->p->cfg.high_temp = reg->rhw + 1;
+ return;
+ }
+ /* make sure we don't get things like $r0 needs to go
+ * in $r1 and $r1 in $r0
+ */
+ i = pc->result_nr * 4;
+ }
+
+ for (; i < NV50_SU_MAX_TEMP; i++) {
if (!(pc->r_temp[i])) {
pc->r_temp[i] = reg;
reg->hw = i;
@@ -159,6 +188,7 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
r->type = P_TEMP;
r->index = -1;
r->hw = i;
+ r->rhw = -1;
pc->r_temp[i] = r;
return r;
}
@@ -168,6 +198,38 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
return NULL;
}
+/* Assign the hw of the discarded temporary register src
+ * to the tgsi register dst and free src.
+ */
+static void
+assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ assert(src->index == -1 && src->hw != -1);
+
+ if (dst->hw != -1)
+ pc->r_temp[dst->hw] = NULL;
+ pc->r_temp[src->hw] = dst;
+ dst->hw = src->hw;
+
+ FREE(src);
+}
+
+/* release the hardware resource held by r */
+static void
+release_hw(struct nv50_pc *pc, struct nv50_reg *r)
+{
+ assert(r->type == P_TEMP);
+ if (r->hw == -1)
+ return;
+
+ assert(pc->r_temp[r->hw] == r);
+ pc->r_temp[r->hw] = NULL;
+
+ r->acc = 0;
+ if (r->index == -1)
+ FREE(r);
+}
+
static void
free_temp(struct nv50_pc *pc, struct nv50_reg *r)
{
@@ -250,7 +312,13 @@ alloc_immd(struct nv50_pc *pc, float f)
struct nv50_reg *r = CALLOC_STRUCT(nv50_reg);
unsigned hw;
- hw = ctor_immd(pc, f, 0, 0, 0) * 4;
+ for (hw = 0; hw < pc->immd_nr * 4; hw++)
+ if (pc->immd_buf[hw] == f)
+ break;
+
+ if (hw == pc->immd_nr * 4)
+ hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4;
+
r->type = P_IMMD;
r->hw = hw;
r->index = -1;
@@ -341,7 +409,8 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
static INLINE void
set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
{
- unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */
+ float f = pc->immd_buf[imm->hw];
+ unsigned val = fui(imm->neg ? -f : f);
set_long(pc, e);
/*XXX: can't be predicated - bits overlap.. catch cases where both
@@ -354,20 +423,35 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
e->inst[1] |= (val >> 6) << 2;
}
+
+#define INTERP_LINEAR 0
+#define INTERP_FLAT 1
+#define INTERP_PERSPECTIVE 2
+#define INTERP_CENTROID 4
+
+/* interpolant index has been stored in dst->rhw */
static void
-emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
- struct nv50_reg *src, struct nv50_reg *iv)
+emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv,
+ unsigned mode)
{
+ assert(dst->rhw != -1);
struct nv50_program_exec *e = exec(pc);
e->inst[0] |= 0x80000000;
set_dst(pc, dst, e);
- alloc_reg(pc, src);
- e->inst[0] |= (src->hw << 16);
- if (iv) {
- e->inst[0] |= (1 << 25);
- alloc_reg(pc, iv);
- e->inst[0] |= (iv->hw << 9);
+ e->inst[0] |= (dst->rhw << 16);
+
+ if (mode & INTERP_FLAT) {
+ e->inst[0] |= (1 << 8);
+ } else {
+ if (mode & INTERP_PERSPECTIVE) {
+ e->inst[0] |= (1 << 25);
+ alloc_reg(pc, iv);
+ e->inst[0] |= (iv->hw << 9);
+ }
+
+ if (mode & INTERP_CENTROID)
+ e->inst[0] |= (1 << 24);
}
emit(pc, e);
@@ -378,22 +462,12 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
struct nv50_program_exec *e)
{
set_long(pc, e);
-#if 1
- e->inst[1] |= (1 << 22);
-#else
- if (src->type == P_IMMD) {
- e->inst[1] |= (NV50_CB_PMISC << 22);
- } else {
- if (pc->p->type == PIPE_SHADER_VERTEX)
- e->inst[1] |= (NV50_CB_PVP << 22);
- else
- e->inst[1] |= (NV50_CB_PFP << 22);
- }
-#endif
e->param.index = src->hw;
e->param.shift = s;
e->param.mask = m << (s % 32);
+
+ e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
}
static void
@@ -405,12 +479,11 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
set_dst(pc, dst, e);
- if (0 && dst->type != P_RESULT && src->type == P_IMMD) {
+ if (pc->allow32 && dst->type != P_RESULT && src->type == P_IMMD) {
set_immd(pc, src, e);
/*XXX: 32-bit, but steals part of "half" reg space - need to
* catch and handle this case if/when we do half-regs
*/
- e->inst[0] |= 0x00008000;
} else
if (src->type == P_IMMD || src->type == P_CONST) {
set_long(pc, e);
@@ -426,18 +499,25 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
e->inst[0] |= (src->hw << 9);
}
- /* We really should support "half" instructions here at some point,
- * but I don't feel confident enough about them yet.
- */
- set_long(pc, e);
if (is_long(e) && !is_immd(e)) {
e->inst[1] |= 0x04000000; /* 32-bit */
- e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
- }
+ e->inst[1] |= 0x0000c000; /* "subsubop" 0x3 */
+ if (!(e->inst[1] & 0x20000000))
+ e->inst[1] |= 0x00030000; /* "subsubop" 0xf */
+ } else
+ e->inst[0] |= 0x00008000;
emit(pc, e);
}
+static INLINE void
+emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
+{
+ struct nv50_reg *imm = alloc_immd(pc, f);
+ emit_mov(pc, dst, imm);
+ FREE(imm);
+}
+
static boolean
check_swap_src_0_1(struct nv50_pc *pc,
struct nv50_reg **s0, struct nv50_reg **s1)
@@ -541,12 +621,26 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_program_exec *e = exec(pc);
e->inst[0] |= 0xc0000000;
- set_long(pc, e);
+
+ if (!pc->allow32)
+ set_long(pc, e);
check_swap_src_0_1(pc, &src0, &src1);
set_dst(pc, dst, e);
set_src_0(pc, src0, e);
- set_src_1(pc, src1, e);
+ if (src1->type == P_IMMD && !is_long(e)) {
+ if (src0->neg)
+ e->inst[0] |= 0x00008000;
+ set_immd(pc, src1, e);
+ } else {
+ set_src_1(pc, src1, e);
+ if (src0->neg ^ src1->neg) {
+ if (is_long(e))
+ e->inst[1] |= 0x08000000;
+ else
+ e->inst[0] |= 0x00008000;
+ }
+ }
emit(pc, e);
}
@@ -560,11 +654,20 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
e->inst[0] |= 0xb0000000;
check_swap_src_0_1(pc, &src0, &src1);
+
+ if (!pc->allow32 || src0->neg || src1->neg) {
+ set_long(pc, e);
+ e->inst[1] |= (src0->neg << 26) | (src1->neg << 27);
+ }
+
set_dst(pc, dst, e);
set_src_0(pc, src0, e);
- if (is_long(e))
+ if (src1->type == P_CONST || src1->type == P_ATTR || is_long(e))
set_src_2(pc, src1, e);
else
+ if (src1->type == P_IMMD)
+ set_immd(pc, src1, e);
+ else
set_src_1(pc, src1, e);
emit(pc, e);
@@ -588,25 +691,13 @@ emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
emit(pc, e);
}
-static void
+static INLINE void
emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1)
{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] |= 0xb0000000;
-
- set_long(pc, e);
- if (check_swap_src_0_1(pc, &src0, &src1))
- e->inst[1] |= 0x04000000;
- else
- e->inst[1] |= 0x08000000;
-
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
- set_src_2(pc, src1, e);
-
- emit(pc, e);
+ src1->neg ^= 1;
+ emit_add(pc, dst, src0, src1);
+ src1->neg ^= 1;
}
static void
@@ -623,26 +714,21 @@ emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
set_src_1(pc, src1, e);
set_src_2(pc, src2, e);
+ if (src0->neg ^ src1->neg)
+ e->inst[1] |= 0x04000000;
+ if (src2->neg)
+ e->inst[1] |= 0x08000000;
+
emit(pc, e);
}
-static void
+static INLINE void
emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1, struct nv50_reg *src2)
{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] |= 0xe0000000;
- set_long(pc, e);
- e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
-
- check_swap_src_0_1(pc, &src0, &src1);
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
- set_src_1(pc, src1, e);
- set_src_2(pc, src2, e);
-
- emit(pc, e);
+ src2->neg ^= 1;
+ emit_mad(pc, dst, src0, src1, src2);
+ src2->neg ^= 1;
}
static void
@@ -693,6 +779,48 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
emit(pc, e);
}
+#define CVTOP_RN 0x01
+#define CVTOP_FLOOR 0x03
+#define CVTOP_CEIL 0x05
+#define CVTOP_TRUNC 0x07
+#define CVTOP_SAT 0x08
+#define CVTOP_ABS 0x10
+
+#define CVT_F32_F32 0xc4
+#define CVT_F32_S32 0x44
+#define CVT_F32_U32 0x64
+#define CVT_S32_F32 0x8c
+#define CVT_S32_S32 0x0c
+#define CVT_F32_F32_ROP 0xcc
+
+static void
+emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
+ int wp, unsigned cop, unsigned fmt)
+{
+ struct nv50_program_exec *e;
+
+ e = exec(pc);
+ set_long(pc, e);
+
+ e->inst[0] |= 0xa0000000;
+ e->inst[1] |= 0x00004000;
+ e->inst[1] |= (cop << 16);
+ e->inst[1] |= (fmt << 24);
+ set_src_0(pc, src, e);
+
+ if (wp >= 0)
+ set_pred_wr(pc, 1, wp, e);
+
+ if (dst)
+ set_dst(pc, dst, e);
+ else {
+ e->inst[0] |= 0x000001fc;
+ e->inst[1] |= 0x00000008;
+ }
+
+ emit(pc, e);
+}
+
static void
emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
struct nv50_reg *src0, struct nv50_reg *src1)
@@ -736,22 +864,10 @@ emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
free_temp(pc, dst);
}
-static void
+static INLINE void
emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xa0000000; /* cvt */
- set_long(pc, e);
- e->inst[1] |= (6 << 29); /* cvt */
- e->inst[1] |= 0x08000000; /* integer mode */
- e->inst[1] |= 0x04000000; /* 32 bit */
- e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
- e->inst[1] |= (1 << 14); /* src .f32 */
- set_dst(pc, dst, e);
- set_src_0(pc, src, e);
-
- emit(pc, e);
+ emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32_ROP);
}
static void
@@ -768,21 +884,10 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
free_temp(pc, temp);
}
-static void
+static INLINE void
emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xa0000000; /* cvt */
- set_long(pc, e);
- e->inst[1] |= (6 << 29); /* cvt */
- e->inst[1] |= 0x04000000; /* 32 bit */
- e->inst[1] |= (1 << 14); /* src .f32 */
- e->inst[1] |= ((1 << 6) << 14); /* .abs */
- set_dst(pc, dst, e);
- set_src_0(pc, src, e);
-
- emit(pc, e);
+ emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32);
}
static void
@@ -794,18 +899,12 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
struct nv50_reg *pos128 = alloc_immd(pc, 127.999999);
struct nv50_reg *tmp[4];
+ boolean allow32 = pc->allow32;
- if (mask & (1 << 0))
- emit_mov(pc, dst[0], one);
-
- if (mask & (1 << 3))
- emit_mov(pc, dst[3], one);
+ pc->allow32 = FALSE;
if (mask & (3 << 1)) {
- if (mask & (1 << 1))
- tmp[0] = dst[1];
- else
- tmp[0] = temp_temp(pc);
+ tmp[0] = alloc_temp(pc, NULL);
emit_minmax(pc, 4, tmp[0], src[0], zero);
}
@@ -823,6 +922,26 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
emit_mov(pc, dst[2], zero);
set_pred(pc, 3, 0, pc->p->exec_tail);
}
+
+ if (mask & (1 << 1))
+ assimilate_temp(pc, dst[1], tmp[0]);
+ else
+ if (mask & (1 << 2))
+ free_temp(pc, tmp[0]);
+
+ pc->allow32 = allow32;
+
+ /* do this last, in case src[i,j] == dst[0,3] */
+ if (mask & (1 << 0))
+ emit_mov(pc, dst[0], one);
+
+ if (mask & (1 << 3))
+ emit_mov(pc, dst[3], one);
+
+ FREE(pos128);
+ FREE(neg128);
+ FREE(zero);
+ FREE(one);
}
static void
@@ -853,6 +972,8 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
e->inst[1] = 0xc4014788;
set_src_0(pc, src, e);
set_pred_wr(pc, 1, r_pred, e);
+ if (src->neg)
+ e->inst[1] |= 0x20000000;
emit(pc, e);
/* This is probably KILP */
@@ -863,6 +984,180 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
emit(pc, e);
}
+static void
+emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
+ struct nv50_reg **src, unsigned unit, unsigned type, boolean proj)
+{
+ struct nv50_reg *temp, *t[4];
+ struct nv50_program_exec *e;
+
+ unsigned c, mode, dim;
+
+ switch (type) {
+ case TGSI_TEXTURE_1D:
+ dim = 1;
+ break;
+ case TGSI_TEXTURE_UNKNOWN:
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_SHADOW1D: /* XXX: x, z */
+ case TGSI_TEXTURE_RECT:
+ dim = 2;
+ break;
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT: /* XXX */
+ dim = 3;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ alloc_temp4(pc, t, 0);
+
+ if (proj) {
+ if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
+ mode = pc->interp_mode[src[0]->index];
+
+ t[3]->rhw = src[3]->rhw;
+ emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
+ emit_flop(pc, 0, t[3], t[3]);
+
+ for (c = 0; c < dim; c++) {
+ t[c]->rhw = src[c]->rhw;
+ emit_interp(pc, t[c], t[3],
+ (mode | INTERP_PERSPECTIVE));
+ }
+ } else {
+ emit_flop(pc, 0, t[3], src[3]);
+ for (c = 0; c < dim; c++)
+ emit_mul(pc, t[c], src[c], t[3]);
+
+ /* XXX: for some reason the blob sometimes uses MAD:
+ * emit_mad(pc, t[c], src[0][c], t[3], t[3])
+ * pc->p->exec_tail->inst[1] |= 0x080fc000;
+ */
+ }
+ } else {
+ if (type == TGSI_TEXTURE_CUBE) {
+ temp = temp_temp(pc);
+ emit_minmax(pc, 4, temp, src[0], src[1]);
+ emit_minmax(pc, 4, temp, temp, src[2]);
+ emit_flop(pc, 0, temp, temp);
+ for (c = 0; c < 3; c++)
+ emit_mul(pc, t[c], src[c], temp);
+ } else {
+ for (c = 0; c < dim; c++)
+ emit_mov(pc, t[c], src[c]);
+ }
+ }
+
+ e = exec(pc);
+ set_long(pc, e);
+ e->inst[0] |= 0xf0000000;
+ e->inst[1] |= 0x00000004;
+ set_dst(pc, t[0], e);
+ e->inst[0] |= (unit << 9);
+
+ if (dim == 2)
+ e->inst[0] |= 0x00400000;
+ else
+ if (dim == 3)
+ e->inst[0] |= 0x00800000;
+
+ e->inst[0] |= (mask & 0x3) << 25;
+ e->inst[1] |= (mask & 0xc) << 12;
+
+ emit(pc, e);
+
+#if 1
+ if (mask & 1) emit_mov(pc, dst[0], t[0]);
+ if (mask & 2) emit_mov(pc, dst[1], t[1]);
+ if (mask & 4) emit_mov(pc, dst[2], t[2]);
+ if (mask & 8) emit_mov(pc, dst[3], t[3]);
+
+ free_temp4(pc, t);
+#else
+ /* XXX: if p.e. MUL is used directly after TEX, it would still use
+ * the texture coordinates, not the fetched values: latency ? */
+
+ for (c = 0; c < 4; c++) {
+ if (mask & (1 << c))
+ assimilate_temp(pc, dst[c], t[c]);
+ else
+ free_temp(pc, t[c]);
+ }
+#endif
+}
+
+static void
+convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
+{
+ unsigned q = 0, m = ~0;
+
+ assert(!is_long(e));
+
+ switch (e->inst[0] >> 28) {
+ case 0x1:
+ /* MOV */
+ q = 0x0403c000;
+ m = 0xffff7fff;
+ break;
+ case 0x8:
+ /* INTERP */
+ m = ~0x02000000;
+ if (e->inst[0] & 0x02000000)
+ q = 0x00020000;
+ break;
+ case 0x9:
+ /* RCP */
+ break;
+ case 0xB:
+ /* ADD */
+ m = ~(127 << 16);
+ q = ((e->inst[0] & (~m)) >> 2);
+ break;
+ case 0xC:
+ /* MUL */
+ m = ~0x00008000;
+ q = ((e->inst[0] & (~m)) << 12);
+ break;
+ case 0xE:
+ /* MAD (if src2 == dst) */
+ q = ((e->inst[0] & 0x1fc) << 12);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ set_long(pc, e);
+ pc->p->exec_size++;
+
+ e->inst[0] &= m;
+ e->inst[1] |= q;
+}
+
+static boolean
+negate_supported(const struct tgsi_full_instruction *insn, int i)
+{
+ switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_KIL:
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_MAD:
+ return TRUE;
+ case TGSI_OPCODE_POW:
+ return (i == 1) ? TRUE : FALSE;
+ default:
+ return FALSE;
+ }
+}
+
static struct nv50_reg *
tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
{
@@ -881,11 +1176,14 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
}
static struct nv50_reg *
-tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
+tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
+ boolean neg)
{
struct nv50_reg *r = NULL;
struct nv50_reg *temp;
- unsigned c;
+ unsigned sgn, c;
+
+ sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
c = tgsi_util_get_full_src_register_extswizzle(src, chan);
switch (c) {
@@ -915,16 +1213,17 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
break;
case TGSI_EXTSWIZZLE_ZERO:
r = alloc_immd(pc, 0.0);
- break;
+ return r;
case TGSI_EXTSWIZZLE_ONE:
- r = alloc_immd(pc, 1.0);
- break;
+ if (sgn == TGSI_UTIL_SIGN_TOGGLE || sgn == TGSI_UTIL_SIGN_SET)
+ return alloc_immd(pc, -1.0);
+ return alloc_immd(pc, 1.0);
default:
assert(0);
break;
}
- switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
+ switch (sgn) {
case TGSI_UTIL_SIGN_KEEP:
break;
case TGSI_UTIL_SIGN_CLEAR:
@@ -933,14 +1232,21 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
r = temp;
break;
case TGSI_UTIL_SIGN_TOGGLE:
- temp = temp_temp(pc);
- emit_neg(pc, temp, r);
- r = temp;
+ if (neg)
+ r->neg = 1;
+ else {
+ temp = temp_temp(pc);
+ emit_neg(pc, temp, r);
+ r = temp;
+ }
break;
case TGSI_UTIL_SIGN_SET:
temp = temp_temp(pc);
emit_abs(pc, temp, r);
- emit_neg(pc, temp, r);
+ if (neg)
+ temp->neg = 1;
+ else
+ emit_neg(pc, temp, temp);
r = temp;
break;
default:
@@ -951,12 +1257,40 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
return r;
}
+/* returns TRUE if instruction can overwrite sources before they're read */
+static boolean
+direct2dest_op(const struct tgsi_full_instruction *insn)
+{
+ if (insn->Instruction.Saturate)
+ return FALSE;
+
+ switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_KIL:
+ case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SCS:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXP:
+ return FALSE;
+ default:
+ return TRUE;
+ }
+}
+
static boolean
nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
{
const struct tgsi_full_instruction *inst = &tok->FullInstruction;
struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
unsigned mask, sat, unit;
+ boolean assimilate = FALSE;
int i, c;
mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
@@ -967,6 +1301,10 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
else
dst[c] = NULL;
+ rdst[c] = NULL;
+ src[0][c] = NULL;
+ src[1][c] = NULL;
+ src[2][c] = NULL;
}
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
@@ -976,7 +1314,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
unit = fs->SrcRegister.Index;
for (c = 0; c < 4; c++)
- src[i][c] = tgsi_src(pc, c, fs);
+ src[i][c] = tgsi_src(pc, c, fs,
+ negate_supported(inst, i));
}
if (sat) {
@@ -984,6 +1323,25 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
rdst[c] = dst[c];
dst[c] = temp_temp(pc);
}
+ } else
+ if (direct2dest_op(inst)) {
+ for (c = 0; c < 4; c++) {
+ if (!dst[c] || dst[c]->type != P_TEMP)
+ continue;
+
+ for (i = c + 1; i < 4; i++) {
+ if (dst[c] == src[0][i] ||
+ dst[c] == src[1][i] ||
+ dst[c] == src[2][i])
+ break;
+ }
+ if (i == 4)
+ continue;
+
+ assimilate = TRUE;
+ rdst[c] = dst[c];
+ dst[c] = alloc_temp(pc, NULL);
+ }
}
switch (inst->Instruction.Opcode) {
@@ -1002,7 +1360,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
break;
case TGSI_OPCODE_COS:
- temp = alloc_temp(pc, NULL);
+ temp = temp_temp(pc);
emit_precossin(pc, temp, src[0][0]);
emit_flop(pc, 5, temp, temp);
for (c = 0; c < 4; c++) {
@@ -1012,7 +1370,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
break;
case TGSI_OPCODE_DP3:
- temp = alloc_temp(pc, NULL);
+ temp = temp_temp(pc);
emit_mul(pc, temp, src[0][0], src[1][0]);
emit_mad(pc, temp, src[0][1], src[1][1], temp);
emit_mad(pc, temp, src[0][2], src[1][2], temp);
@@ -1021,10 +1379,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
continue;
emit_mov(pc, dst[c], temp);
}
- free_temp(pc, temp);
break;
case TGSI_OPCODE_DP4:
- temp = alloc_temp(pc, NULL);
+ temp = temp_temp(pc);
emit_mul(pc, temp, src[0][0], src[1][0]);
emit_mad(pc, temp, src[0][1], src[1][1], temp);
emit_mad(pc, temp, src[0][2], src[1][2], temp);
@@ -1034,10 +1391,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
continue;
emit_mov(pc, dst[c], temp);
}
- free_temp(pc, temp);
break;
case TGSI_OPCODE_DPH:
- temp = alloc_temp(pc, NULL);
+ temp = temp_temp(pc);
emit_mul(pc, temp, src[0][0], src[1][0]);
emit_mad(pc, temp, src[0][1], src[1][1], temp);
emit_mad(pc, temp, src[0][2], src[1][2], temp);
@@ -1047,7 +1403,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
continue;
emit_mov(pc, dst[c], temp);
}
- free_temp(pc, temp);
break;
case TGSI_OPCODE_DST:
{
@@ -1064,7 +1419,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
break;
case TGSI_OPCODE_EX2:
- temp = alloc_temp(pc, NULL);
+ temp = temp_temp(pc);
emit_preex2(pc, temp, src[0][0]);
emit_flop(pc, 6, temp, temp);
for (c = 0; c < 4; c++) {
@@ -1072,7 +1427,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
continue;
emit_mov(pc, dst[c], temp);
}
- free_temp(pc, temp);
break;
case TGSI_OPCODE_FLR:
for (c = 0; c < 4; c++) {
@@ -1082,26 +1436,26 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
break;
case TGSI_OPCODE_FRC:
- temp = alloc_temp(pc, NULL);
+ temp = temp_temp(pc);
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
emit_flr(pc, temp, src[0][c]);
emit_sub(pc, dst[c], src[0][c], temp);
}
- free_temp(pc, temp);
break;
case TGSI_OPCODE_KIL:
emit_kil(pc, src[0][0]);
emit_kil(pc, src[0][1]);
emit_kil(pc, src[0][2]);
emit_kil(pc, src[0][3]);
+ pc->p->cfg.fp.regs[2] |= 0x00100000;
break;
case TGSI_OPCODE_LIT:
emit_lit(pc, &dst[0], mask, &src[0][0]);
break;
case TGSI_OPCODE_LG2:
- temp = alloc_temp(pc, NULL);
+ temp = temp_temp(pc);
emit_flop(pc, 3, temp, src[0][0]);
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
@@ -1110,15 +1464,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
break;
case TGSI_OPCODE_LRP:
+ temp = temp_temp(pc);
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- /*XXX: we can do better than this */
- temp = alloc_temp(pc, NULL);
- emit_neg(pc, temp, src[0][c]);
- emit_mad(pc, temp, temp, src[2][c], src[2][c]);
- emit_mad(pc, dst[c], src[0][c], src[1][c], temp);
- free_temp(pc, temp);
+ emit_sub(pc, temp, src[1][c], src[2][c]);
+ emit_mad(pc, dst[c], temp, src[0][c], src[2][c]);
}
break;
case TGSI_OPCODE_MAD:
@@ -1157,36 +1508,39 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
break;
case TGSI_OPCODE_POW:
- temp = alloc_temp(pc, NULL);
+ temp = temp_temp(pc);
emit_pow(pc, temp, src[0][0], src[1][0]);
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
emit_mov(pc, dst[c], temp);
}
- free_temp(pc, temp);
break;
case TGSI_OPCODE_RCP:
- for (c = 0; c < 4; c++) {
+ for (c = 3; c >= 0; c--) {
if (!(mask & (1 << c)))
continue;
emit_flop(pc, 0, dst[c], src[0][0]);
}
break;
case TGSI_OPCODE_RSQ:
- for (c = 0; c < 4; c++) {
+ for (c = 3; c >= 0; c--) {
if (!(mask & (1 << c)))
continue;
emit_flop(pc, 2, dst[c], src[0][0]);
}
break;
case TGSI_OPCODE_SCS:
- temp = alloc_temp(pc, NULL);
+ temp = temp_temp(pc);
emit_precossin(pc, temp, src[0][0]);
if (mask & (1 << 0))
emit_flop(pc, 5, dst[0], temp);
if (mask & (1 << 1))
emit_flop(pc, 4, dst[1], temp);
+ if (mask & (1 << 2))
+ emit_mov_immdval(pc, dst[2], 0.0);
+ if (mask & (1 << 3))
+ emit_mov_immdval(pc, dst[3], 1.0);
break;
case TGSI_OPCODE_SGE:
for (c = 0; c < 4; c++) {
@@ -1196,7 +1550,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
break;
case TGSI_OPCODE_SIN:
- temp = alloc_temp(pc, NULL);
+ temp = temp_temp(pc);
emit_precossin(pc, temp, src[0][0]);
emit_flop(pc, 4, temp, temp);
for (c = 0; c < 4; c++) {
@@ -1220,33 +1574,15 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
break;
case TGSI_OPCODE_TEX:
+ emit_tex(pc, dst, mask, src[0], unit,
+ inst->InstructionExtTexture.Texture, FALSE);
+ break;
case TGSI_OPCODE_TXP:
- {
- struct nv50_reg *t[4];
- struct nv50_program_exec *e;
-
- alloc_temp4(pc, t, 0);
- emit_mov(pc, t[0], src[0][0]);
- emit_mov(pc, t[1], src[0][1]);
-
- e = exec(pc);
- e->inst[0] = 0xf6400000;
- e->inst[0] |= (unit << 9);
- set_long(pc, e);
- e->inst[1] |= 0x0000c004;
- set_dst(pc, t[0], e);
- emit(pc, e);
-
- if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]);
- if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]);
- if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]);
- if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]);
-
- free_temp4(pc, t);
- }
+ emit_tex(pc, dst, mask, src[0], unit,
+ inst->InstructionExtTexture.Texture, TRUE);
break;
case TGSI_OPCODE_XPD:
- temp = alloc_temp(pc, NULL);
+ temp = temp_temp(pc);
if (mask & (1 << 0)) {
emit_mul(pc, temp, src[0][2], src[1][1]);
emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
@@ -1259,7 +1595,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
emit_mul(pc, temp, src[0][1], src[1][0]);
emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
}
- free_temp(pc, temp);
+ if (mask & (1 << 3))
+ emit_mov_immdval(pc, dst[3], 1.0);
break;
case TGSI_OPCODE_END:
break;
@@ -1270,21 +1607,26 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
if (sat) {
for (c = 0; c < 4; c++) {
- struct nv50_program_exec *e;
-
if (!(mask & (1 << c)))
continue;
- e = exec(pc);
+ emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT,
+ CVT_F32_F32);
+ }
+ } else if (assimilate) {
+ for (c = 0; c < 4; c++)
+ if (rdst[c])
+ assimilate_temp(pc, rdst[c], dst[c]);
+ }
- e->inst[0] = 0xa0000000; /* cvt */
- set_long(pc, e);
- e->inst[1] |= (6 << 29); /* cvt */
- e->inst[1] |= 0x04000000; /* 32 bit */
- e->inst[1] |= (1 << 14); /* src .f32 */
- e->inst[1] |= ((1 << 5) << 14); /* .sat */
- set_dst(pc, rdst[c], e);
- set_src_0(pc, dst[c], e);
- emit(pc, e);
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ for (c = 0; c < 4; c++) {
+ if (!src[i][c])
+ continue;
+ if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
+ FREE(src[i][c]);
+ else
+ if (src[i][c]->acc == pc->insn_cur)
+ release_hw(pc, src[i][c]);
}
}
@@ -1292,12 +1634,169 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
return TRUE;
}
+/* Adjust a bitmask that indicates what components of a source are used,
+ * we use this in tx_prep so we only load interpolants that are needed.
+ */
+static void
+insn_adjust_mask(const struct tgsi_full_instruction *insn, unsigned *mask)
+{
+ const struct tgsi_instruction_ext_texture *tex;
+
+ switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_DP3:
+ *mask = 0x7;
+ break;
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ *mask = 0xF;
+ break;
+ case TGSI_OPCODE_LIT:
+ *mask = 0xB;
+ break;
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ *mask = 0x1;
+ break;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXP:
+ assert(insn->Instruction.Extended);
+ tex = &insn->InstructionExtTexture;
+
+ *mask = 0x7;
+ if (tex->Texture == TGSI_TEXTURE_1D)
+ *mask = 0x1;
+ else
+ if (tex->Texture == TGSI_TEXTURE_2D)
+ *mask = 0x3;
+
+ if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
+ *mask |= 0x8;
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok,
+ unsigned *r_usage[2])
+{
+ const struct tgsi_full_instruction *insn;
+ const struct tgsi_full_src_register *src;
+ const struct tgsi_dst_register *dst;
+
+ unsigned i, c, k, n, mask, *acc_p;
+
+ insn = &tok->FullInstruction;
+ dst = &insn->FullDstRegisters[0].DstRegister;
+ mask = dst->WriteMask;
+
+ if (!r_usage[0])
+ r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned));
+ if (!r_usage[1])
+ r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned));
+
+ if (dst->File == TGSI_FILE_TEMPORARY) {
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ r_usage[0][dst->Index * 4 + c] = pc->insn_nr;
+ }
+ }
+
+ for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
+ src = &insn->FullSrcRegisters[i];
+
+ switch (src->SrcRegister.File) {
+ case TGSI_FILE_TEMPORARY:
+ acc_p = r_usage[0];
+ break;
+ case TGSI_FILE_INPUT:
+ acc_p = r_usage[1];
+ break;
+ default:
+ continue;
+ }
+
+ insn_adjust_mask(insn, &mask);
+
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+
+ k = tgsi_util_get_full_src_register_extswizzle(src, c);
+ switch (k) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ n = src->SrcRegister.Index * 4 + k;
+ acc_p[n] = pc->insn_nr;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+}
+
+static unsigned
+load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid,
+ int *aid, int *p_oid)
+{
+ struct nv50_reg *iv;
+ int oid, c, n;
+ unsigned mask = 0;
+
+ iv = (pc->interp_mode[i] & INTERP_CENTROID) ? pc->iv_c : pc->iv_p;
+
+ for (c = 0, n = i * 4; c < 4; c++, n++) {
+ oid = (*p_oid)++;
+ pc->attr[n].type = P_TEMP;
+ pc->attr[n].index = i;
+
+ if (pc->attr[n].acc == acc[n])
+ continue;
+ mask |= (1 << c);
+
+ pc->attr[n].acc = acc[n];
+ pc->attr[n].rhw = pc->attr[n].hw = -1;
+ alloc_reg(pc, &pc->attr[n]);
+
+ pc->attr[n].rhw = (*aid)++;
+ emit_interp(pc, &pc->attr[n], iv, pc->interp_mode[i]);
+
+ pc->p->cfg.fp.map[(*mid) / 4] |= oid << (8 * ((*mid) % 4));
+ (*mid)++;
+ pc->p->cfg.fp.regs[1] += 0x00010001;
+ }
+
+ return mask;
+}
+
static boolean
nv50_program_tx_prep(struct nv50_pc *pc)
{
struct tgsi_parse_context p;
boolean ret = FALSE;
unsigned i, c;
+ unsigned fcol, bcol, fcrd, depr;
+
+ /* count (centroid) perspective interpolations */
+ unsigned centroid_loads = 0;
+ unsigned perspect_loads = 0;
+
+ /* track register access for temps and attrs */
+ unsigned *r_usage[2];
+ r_usage[0] = NULL;
+ r_usage[1] = NULL;
+
+ depr = fcol = bcol = fcrd = 0xffff;
+
+ if (pc->p->type == PIPE_SHADER_FRAGMENT) {
+ pc->p->cfg.fp.regs[0] = 0x01000404;
+ pc->p->cfg.fp.regs[1] = 0x00000400;
+ }
tgsi_parse_init(&p, pc->p->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&p)) {
@@ -1319,9 +1818,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
case TGSI_TOKEN_TYPE_DECLARATION:
{
const struct tgsi_full_declaration *d;
- unsigned last;
+ unsigned last, first, mode;
d = &p.FullToken.FullDeclaration;
+ first = d->DeclarationRange.First;
last = d->DeclarationRange.Last;
switch (d->Declaration.File) {
@@ -1332,10 +1832,69 @@ nv50_program_tx_prep(struct nv50_pc *pc)
case TGSI_FILE_OUTPUT:
if (pc->result_nr < (last + 1))
pc->result_nr = last + 1;
+
+ if (!d->Declaration.Semantic)
+ break;
+
+ switch (d->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ depr = first;
+ pc->p->cfg.fp.regs[2] |= 0x00000100;
+ pc->p->cfg.fp.regs[3] |= 0x00000011;
+ break;
+ default:
+ break;
+ }
+
break;
case TGSI_FILE_INPUT:
+ {
if (pc->attr_nr < (last + 1))
pc->attr_nr = last + 1;
+
+ if (pc->p->type != PIPE_SHADER_FRAGMENT)
+ break;
+
+ switch (d->Declaration.Interpolate) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ mode = INTERP_FLAT;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ mode = INTERP_PERSPECTIVE;
+ break;
+ default:
+ mode = INTERP_LINEAR;
+ break;
+ }
+
+ if (d->Declaration.Semantic) {
+ switch (d->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ fcrd = first;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ fcol = first;
+ mode = INTERP_PERSPECTIVE;
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ bcol = first;
+ mode = INTERP_PERSPECTIVE;
+ break;
+ }
+ }
+
+ if (d->Declaration.Centroid) {
+ mode |= INTERP_CENTROID;
+ if (mode & INTERP_PERSPECTIVE)
+ centroid_loads++;
+ } else
+ if (mode & INTERP_PERSPECTIVE)
+ perspect_loads++;
+
+ assert(last < 32);
+ for (i = first; i <= last; i++)
+ pc->interp_mode[i] = mode;
+ }
break;
case TGSI_FILE_CONSTANT:
if (pc->param_nr < (last + 1))
@@ -1351,6 +1910,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
}
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
+ pc->insn_nr++;
+ prep_inspect_insn(pc, tok, r_usage);
break;
default:
break;
@@ -1366,56 +1927,95 @@ nv50_program_tx_prep(struct nv50_pc *pc)
for (c = 0; c < 4; c++) {
pc->temp[i*4+c].type = P_TEMP;
pc->temp[i*4+c].hw = -1;
+ pc->temp[i*4+c].rhw = -1;
pc->temp[i*4+c].index = i;
+ pc->temp[i*4+c].acc = r_usage[0][i*4+c];
}
}
}
if (pc->attr_nr) {
- struct nv50_reg *iv = NULL;
- int aid = 0;
+ int oid = 4, mid = 4, aid = 0;
+ /* oid = VP output id
+ * aid = FP attribute/interpolant id
+ * mid = VP output mapping field ID
+ */
pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg));
if (!pc->attr)
goto out_err;
if (pc->p->type == PIPE_SHADER_FRAGMENT) {
- iv = alloc_temp(pc, NULL);
- emit_interp(pc, iv, iv, NULL);
- emit_flop(pc, 0, iv, iv);
- aid++;
- }
+ /* position should be loaded first */
+ if (fcrd != 0xffff) {
+ unsigned mask;
+ mid = 0;
+ mask = load_fp_attrib(pc, fcrd, r_usage[1],
+ &mid, &aid, &oid);
+ oid = 0;
+ pc->p->cfg.fp.regs[1] |= (mask << 24);
+ pc->p->cfg.fp.map[0] = 0x04040404 * fcrd;
+ }
+ pc->p->cfg.fp.map[0] += 0x03020100;
- for (i = 0; i < pc->attr_nr; i++) {
- struct nv50_reg *a = &pc->attr[i*4];
+ /* should do MAD fcrd.xy, fcrd, SOME_CONST, fcrd */
- for (c = 0; c < 4; c++) {
- if (pc->p->type == PIPE_SHADER_FRAGMENT) {
- struct nv50_reg *at =
- alloc_temp(pc, NULL);
- pc->attr[i*4+c].type = at->type;
- pc->attr[i*4+c].hw = at->hw;
- pc->attr[i*4+c].index = at->index;
+ if (perspect_loads) {
+ pc->iv_p = alloc_temp(pc, NULL);
+
+ if (!(pc->p->cfg.fp.regs[1] & 0x08000000)) {
+ pc->p->cfg.fp.regs[1] |= 0x08000000;
+ pc->iv_p->rhw = aid++;
+ emit_interp(pc, pc->iv_p, NULL,
+ INTERP_LINEAR);
+ emit_flop(pc, 0, pc->iv_p, pc->iv_p);
} else {
- pc->p->cfg.vp.attr[aid/32] |=
- (1 << (aid % 32));
- pc->attr[i*4+c].type = P_ATTR;
- pc->attr[i*4+c].hw = aid++;
- pc->attr[i*4+c].index = i;
+ pc->iv_p->rhw = aid - 1;
+ emit_flop(pc, 0, pc->iv_p,
+ &pc->attr[fcrd * 4 + 3]);
}
}
- if (pc->p->type != PIPE_SHADER_FRAGMENT)
- continue;
+ if (centroid_loads) {
+ pc->iv_c = alloc_temp(pc, NULL);
+ pc->iv_c->rhw = pc->iv_p ? aid - 1 : aid++;
+ emit_interp(pc, pc->iv_c, NULL,
+ INTERP_CENTROID);
+ emit_flop(pc, 0, pc->iv_c, pc->iv_c);
+ pc->p->cfg.fp.regs[1] |= 0x08000000;
+ }
- emit_interp(pc, &a[0], &a[0], iv);
- emit_interp(pc, &a[1], &a[1], iv);
- emit_interp(pc, &a[2], &a[2], iv);
- emit_interp(pc, &a[3], &a[3], iv);
- }
+ for (c = 0; c < 4; c++) {
+ /* I don't know what these values do, but
+ * let's set them like the blob does:
+ */
+ if (fcol != 0xffff && r_usage[1][fcol * 4 + c])
+ pc->p->cfg.fp.regs[0] += 0x00010000;
+ if (bcol != 0xffff && r_usage[1][bcol * 4 + c])
+ pc->p->cfg.fp.regs[0] += 0x00010000;
+ }
- if (iv)
- free_temp(pc, iv);
+ for (i = 0; i < pc->attr_nr; i++)
+ load_fp_attrib(pc, i, r_usage[1],
+ &mid, &aid, &oid);
+
+ if (pc->iv_p)
+ free_temp(pc, pc->iv_p);
+ if (pc->iv_c)
+ free_temp(pc, pc->iv_c);
+
+ pc->p->cfg.fp.high_map = (mid / 4);
+ pc->p->cfg.fp.high_map += ((mid % 4) ? 1 : 0);
+ } else {
+ /* vertex program */
+ for (i = 0; i < pc->attr_nr * 4; i++) {
+ pc->p->cfg.vp.attr[aid / 32] |=
+ (1 << (aid % 32));
+ pc->attr[i].type = P_ATTR;
+ pc->attr[i].hw = aid++;
+ pc->attr[i].index = i / 4;
+ }
+ }
}
if (pc->result_nr) {
@@ -1430,12 +2030,20 @@ nv50_program_tx_prep(struct nv50_pc *pc)
if (pc->p->type == PIPE_SHADER_FRAGMENT) {
pc->result[i*4+c].type = P_TEMP;
pc->result[i*4+c].hw = -1;
+ pc->result[i*4+c].rhw = (i == depr) ?
+ -1 : rid++;
} else {
pc->result[i*4+c].type = P_RESULT;
pc->result[i*4+c].hw = rid++;
}
pc->result[i*4+c].index = i;
}
+
+ if (pc->p->type == PIPE_SHADER_FRAGMENT &&
+ depr != 0xffff) {
+ pc->result[depr * 4 + 2].rhw =
+ (pc->result_nr - 1) * 4;
+ }
}
}
@@ -1456,7 +2064,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
}
if (pc->immd_nr) {
- int rid = pc->param_nr * 4;
+ int rid = 0;
pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg));
if (!pc->immd)
@@ -1473,15 +2081,38 @@ nv50_program_tx_prep(struct nv50_pc *pc)
ret = TRUE;
out_err:
+ if (r_usage[0])
+ FREE(r_usage[0]);
+ if (r_usage[1])
+ FREE(r_usage[1]);
+
tgsi_parse_free(&p);
return ret;
}
+static void
+free_nv50_pc(struct nv50_pc *pc)
+{
+ if (pc->immd)
+ FREE(pc->immd);
+ if (pc->param)
+ FREE(pc->param);
+ if (pc->result)
+ FREE(pc->result);
+ if (pc->attr)
+ FREE(pc->attr);
+ if (pc->temp)
+ FREE(pc->temp);
+
+ FREE(pc);
+}
+
static boolean
nv50_program_tx(struct nv50_program *p)
{
struct tgsi_parse_context parse;
struct nv50_pc *pc;
+ unsigned k;
boolean ret;
pc = CALLOC_STRUCT(nv50_pc);
@@ -1498,10 +2129,16 @@ nv50_program_tx(struct nv50_program *p)
while (!tgsi_parse_end_of_tokens(&parse)) {
const union tgsi_full_token *tok = &parse.FullToken;
+ /* don't allow half insn/immd on first and last instruction */
+ pc->allow32 = TRUE;
+ if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr)
+ pc->allow32 = FALSE;
+
tgsi_parse_token(&parse);
switch (tok->Token.Type) {
case TGSI_TOKEN_TYPE_INSTRUCTION:
+ ++pc->insn_cur;
ret = nv50_program_tx_insn(pc, tok);
if (ret == FALSE)
goto out_err;
@@ -1515,8 +2152,40 @@ nv50_program_tx(struct nv50_program *p)
struct nv50_reg out;
out.type = P_TEMP;
- for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++)
- emit_mov(pc, &out, &pc->result[out.hw]);
+ for (k = 0; k < pc->result_nr * 4; k++) {
+ if (pc->result[k].rhw == -1)
+ continue;
+ if (pc->result[k].hw != pc->result[k].rhw) {
+ out.hw = pc->result[k].rhw;
+ emit_mov(pc, &out, &pc->result[k]);
+ }
+ if (pc->p->cfg.high_result < (pc->result[k].rhw + 1))
+ pc->p->cfg.high_result = pc->result[k].rhw + 1;
+ }
+ }
+
+ /* look for single half instructions and make them long */
+ struct nv50_program_exec *e, *e_prev;
+
+ for (k = 0, e = pc->p->exec_head, e_prev = NULL; e; e = e->next) {
+ if (!is_long(e))
+ k++;
+
+ if (!e->next || is_long(e->next)) {
+ if (k & 1)
+ convert_to_long(pc, e);
+ k = 0;
+ }
+
+ if (e->next)
+ e_prev = e;
+ }
+
+ if (!is_long(pc->p->exec_tail)) {
+ /* this may occur if moving FP results */
+ assert(e_prev && !is_long(e_prev));
+ convert_to_long(pc, e_prev);
+ convert_to_long(pc, pc->p->exec_tail);
}
assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head));
@@ -1530,6 +2199,7 @@ out_err:
tgsi_parse_free(&parse);
out_cleanup:
+ free_nv50_pc(pc);
return ret;
}
@@ -1543,16 +2213,16 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
static void
nv50_program_upload_data(struct nv50_context *nv50, float *map,
- unsigned start, unsigned count)
+ unsigned start, unsigned count, unsigned cbuf)
{
- struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
while (count) {
unsigned nr = count > 2047 ? 2047 : count;
BEGIN_RING(chan, tesla, 0x00000f00, 1);
- OUT_RING (chan, (NV50_CB_PMISC << 0) | (start << 8));
+ OUT_RING (chan, (cbuf << 0) | (start << 8));
BEGIN_RING(chan, tesla, 0x40000f04, nr);
OUT_RINGp (chan, map, nr);
@@ -1565,70 +2235,93 @@ nv50_program_upload_data(struct nv50_context *nv50, float *map,
static void
nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
{
- struct nouveau_winsys *nvws = nv50->screen->nvws;
- struct pipe_winsys *ws = nv50->pipe.winsys;
- unsigned nr = p->param_nr + p->immd_nr;
+ struct pipe_screen *pscreen = nv50->pipe.screen;
- if (!p->data && nr) {
- struct nouveau_resource *heap = nv50->screen->vp_data_heap;
+ if (!p->data[0] && p->immd_nr) {
+ struct nouveau_resource *heap = nv50->screen->immd_heap[0];
- if (nvws->res_alloc(heap, nr, p, &p->data)) {
- while (heap->next && heap->size < nr) {
+ if (nouveau_resource_alloc(heap, p->immd_nr, p, &p->data[0])) {
+ while (heap->next && heap->size < p->immd_nr) {
struct nv50_program *evict = heap->next->priv;
- nvws->res_free(&evict->data);
+ nouveau_resource_free(&evict->data[0]);
}
- if (nvws->res_alloc(heap, nr, p, &p->data))
+ if (nouveau_resource_alloc(heap, p->immd_nr, p,
+ &p->data[0]))
assert(0);
}
+
+ /* immediates only need to be uploaded again when freed */
+ nv50_program_upload_data(nv50, p->immd, p->data[0]->start,
+ p->immd_nr, NV50_CB_PMISC);
}
- if (p->param_nr) {
- float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
- PIPE_BUFFER_USAGE_CPU_READ);
- nv50_program_upload_data(nv50, map, p->data->start,
- p->param_nr);
- ws->buffer_unmap(ws, nv50->constbuf[p->type]);
+ if (!p->data[1] && p->param_nr) {
+ struct nouveau_resource *heap =
+ nv50->screen->parm_heap[p->type];
+
+ if (nouveau_resource_alloc(heap, p->param_nr, p, &p->data[1])) {
+ while (heap->next && heap->size < p->param_nr) {
+ struct nv50_program *evict = heap->next->priv;
+ nouveau_resource_free(&evict->data[1]);
+ }
+
+ if (nouveau_resource_alloc(heap, p->param_nr, p,
+ &p->data[1]))
+ assert(0);
+ }
}
- if (p->immd_nr) {
- nv50_program_upload_data(nv50, p->immd,
- p->data->start + p->param_nr,
- p->immd_nr);
+ if (p->param_nr) {
+ unsigned cbuf = NV50_CB_PVP;
+ float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
+ PIPE_BUFFER_USAGE_CPU_READ);
+ if (p->type == PIPE_SHADER_FRAGMENT)
+ cbuf = NV50_CB_PFP;
+ nv50_program_upload_data(nv50, map, p->data[1]->start,
+ p->param_nr, cbuf);
+ pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]);
}
}
static void
nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
{
- struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct pipe_screen *screen = nv50->pipe.screen;
struct nv50_program_exec *e;
struct nouveau_stateobj *so;
const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
unsigned start, count, *up, *ptr;
boolean upload = FALSE;
- if (!p->buffer) {
- p->buffer = screen->buffer_create(screen, 0x100, 0, p->exec_size * 4);
+ if (!p->bo) {
+ nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100,
+ p->exec_size * 4, &p->bo);
upload = TRUE;
}
- if (p->data && p->data->start != p->data_start) {
+ if ((p->data[0] && p->data[0]->start != p->data_start[0]) ||
+ (p->data[1] && p->data[1]->start != p->data_start[1])) {
for (e = p->exec_head; e; e = e->next) {
- unsigned ei, ci;
+ unsigned ei, ci, bs;
if (e->param.index < 0)
continue;
+ bs = (e->inst[1] >> 22) & 0x07;
+ assert(bs < 2);
ei = e->param.shift >> 5;
- ci = e->param.index + p->data->start;
+ ci = e->param.index + p->data[bs]->start;
e->inst[ei] &= ~e->param.mask;
e->inst[ei] |= (ci << e->param.shift);
}
- p->data_start = p->data->start;
+ if (p->data[0])
+ p->data_start[0] = p->data[0]->start;
+ if (p->data[1])
+ p->data_start[1] = p->data[1]->start;
+
upload = TRUE;
}
@@ -1637,13 +2330,11 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
#ifdef NV50_PROGRAM_DUMP
NOUVEAU_ERR("-------\n");
- up = ptr = MALLOC(p->exec_size * 4);
for (e = p->exec_head; e; e = e->next) {
NOUVEAU_ERR("0x%08x\n", e->inst[0]);
if (is_long(e))
NOUVEAU_ERR("0x%08x\n", e->inst[1]);
}
-
#endif
up = ptr = MALLOC(p->exec_size * 4);
@@ -1655,20 +2346,20 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
so = so_new(4,2);
so_method(so, nv50->screen->tesla, 0x1280, 3);
- so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_LOW, 0, 0);
so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
start = 0; count = p->exec_size;
while (count) {
- struct nouveau_winsys *nvws = nv50->screen->nvws;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
unsigned nr;
- so_emit(nvws, so);
+ so_emit(chan, so);
nr = MIN2(count, 2047);
- nr = MIN2(nvws->channel->pushbuf->remaining, nr);
- if (nvws->channel->pushbuf->remaining < (nr + 3)) {
+ nr = MIN2(chan->pushbuf->remaining, nr);
+ if (chan->pushbuf->remaining < (nr + 3)) {
FIRE_RING(chan);
continue;
}
@@ -1704,10 +2395,10 @@ nv50_vertprog_validate(struct nv50_context *nv50)
so = so_new(13, 2);
so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
- so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_LOW, 0, 0);
so_method(so, tesla, 0x1650, 2);
so_data (so, p->cfg.vp.attr[0]);
so_data (so, p->cfg.vp.attr[1]);
@@ -1728,6 +2419,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_program *p = nv50->fragprog;
struct nouveau_stateobj *so;
+ unsigned i;
if (!p->translated) {
nv50_program_validate(nv50, p);
@@ -1740,22 +2432,27 @@ nv50_fragprog_validate(struct nv50_context *nv50)
so = so_new(64, 2);
so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
- so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_LOW, 0, 0);
so_method(so, tesla, 0x1904, 4);
- so_data (so, 0x00040404); /* p: 0x01000404 */
+ so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */
so_data (so, 0x00000004);
so_data (so, 0x00000000);
so_data (so, 0x00000000);
- so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */
- so_data (so, 0x03020100);
- so_data (so, 0x07060504);
- so_data (so, 0x0b0a0908);
+ so_method(so, tesla, 0x16bc, p->cfg.fp.high_map);
+ for (i = 0; i < p->cfg.fp.high_map; i++)
+ so_data(so, p->cfg.fp.map[i]);
so_method(so, tesla, 0x1988, 2);
- so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */
+ so_data (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */
so_data (so, p->cfg.high_temp);
+ so_method(so, tesla, 0x1298, 1);
+ so_data (so, p->cfg.high_result);
+ so_method(so, tesla, 0x19a8, 1);
+ so_data (so, p->cfg.fp.regs[2]);
+ so_method(so, tesla, 0x196c, 1);
+ so_data (so, p->cfg.fp.regs[3]);
so_method(so, tesla, 0x1414, 1);
so_data (so, 0); /* program start offset */
so_ref(so, &nv50->state.fragprog);
@@ -1765,8 +2462,6 @@ nv50_fragprog_validate(struct nv50_context *nv50)
void
nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
{
- struct pipe_screen *pscreen = nv50->pipe.screen;
-
while (p->exec_head) {
struct nv50_program_exec *e = p->exec_head;
@@ -1776,10 +2471,10 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
p->exec_tail = NULL;
p->exec_size = 0;
- if (p->buffer)
- pipe_buffer_reference(&p->buffer, NULL);
+ nouveau_bo_ref(NULL, &p->bo);
- nv50->screen->nvws->res_free(&p->data);
+ nouveau_resource_free(&p->data[0]);
+ nouveau_resource_free(&p->data[1]);
p->translated = 0;
}
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 78deed6a384..096e0476aab 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -24,10 +24,10 @@ struct nv50_program {
struct nv50_program_exec *exec_head;
struct nv50_program_exec *exec_tail;
unsigned exec_size;
- struct nouveau_resource *data;
- unsigned data_start;
+ struct nouveau_resource *data[2];
+ unsigned data_start[2];
- struct pipe_buffer *buffer;
+ struct nouveau_bo *bo;
float *immd;
unsigned immd_nr;
@@ -39,6 +39,11 @@ struct nv50_program {
struct {
unsigned attr[2];
} vp;
+ struct {
+ unsigned regs[4];
+ unsigned map[5];
+ unsigned high_map;
+ } fp;
} cfg;
};
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 35cebdbdc32..940e04365f2 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -26,7 +26,7 @@
#include "nv50_context.h"
struct nv50_query {
- struct pipe_buffer *buffer;
+ struct nouveau_bo *bo;
unsigned type;
boolean ready;
uint64_t result;
@@ -41,14 +41,16 @@ nv50_query(struct pipe_query *pipe)
static struct pipe_query *
nv50_query_create(struct pipe_context *pipe, unsigned type)
{
- struct pipe_screen *screen = pipe->screen;
+ struct nouveau_device *dev = nouveau_screen(pipe->screen)->device;
struct nv50_query *q = CALLOC_STRUCT(nv50_query);
+ int ret;
assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER);
q->type = type;
- q->buffer = screen->buffer_create(screen, 256, 0, 16);
- if (!q->buffer) {
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 256,
+ 16, &q->bo);
+ if (ret) {
FREE(q);
return NULL;
}
@@ -62,7 +64,7 @@ nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
struct nv50_query *q = nv50_query(pq);
if (q) {
- pipe_buffer_reference(&q->buffer, NULL);
+ nouveau_bo_ref(NULL, &q->bo);
FREE(q);
}
}
@@ -71,7 +73,7 @@ static void
nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_query *q = nv50_query(pq);
@@ -87,15 +89,14 @@ static void
nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_query *q = nv50_query(pq);
- struct nouveau_bo *bo = nv50->screen->nvws->get_bo(q->buffer);
WAIT_RING (chan, 5);
BEGIN_RING(chan, tesla, 0x1b00, 4);
- OUT_RELOCh(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RING (chan, 0x00000000);
OUT_RING (chan, 0x0100f002);
FIRE_RING (chan);
@@ -105,7 +106,6 @@ static boolean
nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
boolean wait, uint64_t *result)
{
- struct pipe_winsys *ws = pipe->winsys;
struct nv50_query *q = nv50_query(pq);
/*XXX: Want to be able to return FALSE here instead of blocking
@@ -113,11 +113,10 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
*/
if (!q->ready) {
- uint32_t *map = ws->buffer_map(ws, q->buffer,
- PIPE_BUFFER_USAGE_CPU_READ);
- q->result = map[1];
+ nouveau_bo_map(q->bo, NOUVEAU_BO_RD);
+ q->result = ((uint32_t *)q->bo->map)[1];
q->ready = TRUE;
- ws->buffer_unmap(ws, q->buffer);
+ nouveau_bo_unmap(q->bo);
}
*result = q->result;
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 29805645948..fd39fa738b7 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -22,8 +22,6 @@
#include "pipe/p_screen.h"
-#include "util/u_simple_screen.h"
-
#include "nv50_context.h"
#include "nv50_screen.h"
@@ -68,23 +66,6 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
return FALSE;
}
-static const char *
-nv50_screen_get_name(struct pipe_screen *pscreen)
-{
- struct nv50_screen *screen = nv50_screen(pscreen);
- struct nouveau_device *dev = screen->nvws->channel->device;
- static char buffer[128];
-
- snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
- return buffer;
-}
-
-static const char *
-nv50_screen_get_vendor(struct pipe_screen *pscreen)
-{
- return "nouveau";
-}
-
static int
nv50_screen_get_param(struct pipe_screen *pscreen, int param)
{
@@ -153,37 +134,64 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, int param)
static void
nv50_screen_destroy(struct pipe_screen *pscreen)
{
- FREE(pscreen);
+ struct nv50_screen *screen = nv50_screen(pscreen);
+
+ nouveau_notifier_free(&screen->sync);
+ nouveau_grobj_free(&screen->tesla);
+ nouveau_grobj_free(&screen->eng2d);
+ nouveau_grobj_free(&screen->m2mf);
+ nouveau_screen_fini(&screen->base);
+ FREE(screen);
}
struct pipe_screen *
-nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
+nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
{
struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen);
+ struct nouveau_channel *chan;
+ struct pipe_screen *pscreen;
struct nouveau_stateobj *so;
- unsigned tesla_class = 0, ret;
- unsigned chipset = nvws->channel->device->chipset;
- int i;
+ unsigned chipset = dev->chipset;
+ unsigned tesla_class = 0;
+ int ret, i;
if (!screen)
return NULL;
- screen->nvws = nvws;
+ pscreen = &screen->base.base;
+
+ ret = nouveau_screen_init(&screen->base, dev);
+ if (ret) {
+ nv50_screen_destroy(pscreen);
+ return NULL;
+ }
+ chan = screen->base.channel;
+
+ pscreen->winsys = ws;
+ pscreen->destroy = nv50_screen_destroy;
+ pscreen->get_param = nv50_screen_get_param;
+ pscreen->get_paramf = nv50_screen_get_paramf;
+ pscreen->is_format_supported = nv50_screen_is_format_supported;
+
+ nv50_screen_init_miptree_functions(pscreen);
+ nv50_transfer_init_screen_functions(pscreen);
/* DMA engine object */
- ret = nvws->grobj_alloc(nvws, 0x5039, &screen->m2mf);
+ ret = nouveau_grobj_alloc(chan, 0xbeef5039, 0x5039, &screen->m2mf);
if (ret) {
NOUVEAU_ERR("Error creating M2MF object: %d\n", ret);
- nv50_screen_destroy(&screen->pipe);
+ nv50_screen_destroy(pscreen);
return NULL;
}
+ BIND_RING(chan, screen->m2mf, 1);
/* 2D object */
- ret = nvws->grobj_alloc(nvws, NV50_2D, &screen->eng2d);
+ ret = nouveau_grobj_alloc(chan, 0xbeef502d, 0x502d, &screen->eng2d);
if (ret) {
NOUVEAU_ERR("Error creating 2D object: %d\n", ret);
- nv50_screen_destroy(&screen->pipe);
+ nv50_screen_destroy(pscreen);
return NULL;
}
+ BIND_RING(chan, screen->eng2d, 2);
/* 3D object */
switch (chipset & 0xf0) {
@@ -199,70 +207,55 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
break;
default:
NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", chipset);
- nv50_screen_destroy(&screen->pipe);
+ nv50_screen_destroy(pscreen);
return NULL;
}
if (tesla_class == 0) {
NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset);
- nv50_screen_destroy(&screen->pipe);
+ nv50_screen_destroy(pscreen);
return NULL;
}
- ret = nvws->grobj_alloc(nvws, tesla_class, &screen->tesla);
+ ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class, &screen->tesla);
if (ret) {
NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
- nv50_screen_destroy(&screen->pipe);
+ nv50_screen_destroy(pscreen);
return NULL;
}
+ BIND_RING(chan, screen->tesla, 3);
/* Sync notifier */
- ret = nvws->notifier_alloc(nvws, 1, &screen->sync);
+ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
if (ret) {
NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
- nv50_screen_destroy(&screen->pipe);
+ nv50_screen_destroy(pscreen);
return NULL;
}
- /* Setup the pipe */
- screen->pipe.winsys = ws;
-
- screen->pipe.destroy = nv50_screen_destroy;
-
- screen->pipe.get_name = nv50_screen_get_name;
- screen->pipe.get_vendor = nv50_screen_get_vendor;
- screen->pipe.get_param = nv50_screen_get_param;
- screen->pipe.get_paramf = nv50_screen_get_paramf;
-
- screen->pipe.is_format_supported = nv50_screen_is_format_supported;
-
- nv50_screen_init_miptree_functions(&screen->pipe);
- nv50_transfer_init_screen_functions(&screen->pipe);
- u_simple_screen_init(&screen->pipe);
-
/* Static M2MF init */
so = so_new(32, 0);
so_method(so, screen->m2mf, 0x0180, 3);
so_data (so, screen->sync->handle);
- so_data (so, screen->nvws->channel->vram->handle);
- so_data (so, screen->nvws->channel->vram->handle);
- so_emit(nvws, so);
+ so_data (so, chan->vram->handle);
+ so_data (so, chan->vram->handle);
+ so_emit(chan, so);
so_ref (NULL, &so);
/* Static 2D init */
so = so_new(64, 0);
so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4);
so_data (so, screen->sync->handle);
- so_data (so, screen->nvws->channel->vram->handle);
- so_data (so, screen->nvws->channel->vram->handle);
- so_data (so, screen->nvws->channel->vram->handle);
+ so_data (so, chan->vram->handle);
+ so_data (so, chan->vram->handle);
+ so_data (so, chan->vram->handle);
so_method(so, screen->eng2d, NV50_2D_OPERATION, 1);
so_data (so, NV50_2D_OPERATION_SRCCOPY);
so_method(so, screen->eng2d, 0x0290, 1);
so_data (so, 0);
so_method(so, screen->eng2d, 0x0888, 1);
so_data (so, 1);
- so_emit(nvws, so);
+ so_emit(chan, so);
so_ref(NULL, &so);
/* Static tesla init */
@@ -275,11 +268,11 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0),
NV50TCL_DMA_UNK0__SIZE);
for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++)
- so_data(so, nvws->channel->vram->handle);
+ so_data(so, chan->vram->handle);
so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0),
NV50TCL_DMA_UNK1__SIZE);
for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++)
- so_data(so, nvws->channel->vram->handle);
+ so_data(so, chan->vram->handle);
so_method(so, screen->tesla, 0x121c, 1);
so_data (so, 1);
@@ -290,27 +283,81 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
so_method(so, screen->tesla, 0x16b8, 1);
so_data (so, 8);
- /* Shared constant buffer */
- screen->constbuf = screen->pipe.buffer_create(&screen->pipe, 0, 0, 128 * 4 * 4);
- if (nvws->res_init(&screen->vp_data_heap, 0, 128)) {
- NOUVEAU_ERR("Error initialising constant buffer\n");
- nv50_screen_destroy(&screen->pipe);
+ /* constant buffers for immediates and VP/FP parameters */
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
+ &screen->constbuf_misc[0]);
+ if (ret) {
+ nv50_screen_destroy(pscreen);
return NULL;
}
+ for (i = 0; i < 2; i++) {
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
+ &screen->constbuf_parm[i]);
+ if (ret) {
+ nv50_screen_destroy(pscreen);
+ return NULL;
+ }
+ }
+
+ if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) ||
+ nouveau_resource_init(&screen->parm_heap[0], 0, 128) ||
+ nouveau_resource_init(&screen->parm_heap[1], 0, 128))
+ {
+ NOUVEAU_ERR("Error initialising constant buffers.\n");
+ nv50_screen_destroy(pscreen);
+ return NULL;
+ }
+
+ /*
+ // map constant buffers:
+ // B = buffer ID (maybe more than 1 byte)
+ // N = CB index used in shader instruction
+ // P = program type (0 = VP, 2 = GP, 3 = FP)
+ so_method(so, screen->tesla, 0x1694, 1);
+ so_data (so, 0x000BBNP1);
+ */
+
so_method(so, screen->tesla, 0x1280, 3);
- so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+ so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM |
+ so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, (NV50_CB_PMISC << 16) | 0x00001000);
+ so_data (so, (NV50_CB_PMISC << 16) | 0x00000800);
+ so_method(so, screen->tesla, 0x1694, 1);
+ so_data (so, 0x00000001 | (NV50_CB_PMISC << 12));
+ so_method(so, screen->tesla, 0x1694, 1);
+ so_data (so, 0x00000031 | (NV50_CB_PMISC << 12));
+
+ so_method(so, screen->tesla, 0x1280, 3);
+ so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_PVP << 16) | 0x00000800);
+ so_method(so, screen->tesla, 0x1694, 1);
+ so_data (so, 0x00000101 | (NV50_CB_PVP << 12));
+
+ so_method(so, screen->tesla, 0x1280, 3);
+ so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+ so_data (so, (NV50_CB_PFP << 16) | 0x00000800);
+ so_method(so, screen->tesla, 0x1694, 1);
+ so_data (so, 0x00000131 | (NV50_CB_PFP << 12));
/* Texture sampler/image unit setup - we abuse the constant buffer
* upload mechanism for the moment to upload data to the tex config
* blocks. At some point we *may* want to go the NVIDIA way of doing
* things?
*/
- screen->tic = screen->pipe.buffer_create(&screen->pipe, 0, 0, 32 * 8 * 4);
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 32*8*4, &screen->tic);
+ if (ret) {
+ nv50_screen_destroy(pscreen);
+ return NULL;
+ }
+
so_method(so, screen->tesla, 0x1280, 3);
so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
@@ -324,7 +371,12 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
so_data (so, 0x00000800);
- screen->tsc = screen->pipe.buffer_create(&screen->pipe, 0, 0, 32 * 8 * 4);
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 32*8*4, &screen->tsc);
+ if (ret) {
+ nv50_screen_destroy(pscreen);
+ return NULL;
+ }
+
so_method(so, screen->tesla, 0x1280, 3);
so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
@@ -352,14 +404,12 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws)
so_method(so, screen->tesla, 0x1234, 1);
so_data (so, 1);
- so_method(so, screen->tesla, 0x1458, 1);
- so_data (so, 1);
- so_emit(nvws, so);
+ so_emit(chan, so);
so_ref (so, &screen->static_init);
so_ref (NULL, &so);
- nvws->push_flush(nvws, 0, NULL);
+ nouveau_pushbuf_flush(chan, 0);
- return &screen->pipe;
+ return pscreen;
}
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index db567aaac89..61e24a5b571 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -1,10 +1,10 @@
#ifndef __NV50_SCREEN_H__
#define __NV50_SCREEN_H__
-#include "pipe/p_screen.h"
+#include "nouveau/nouveau_screen.h"
struct nv50_screen {
- struct pipe_screen pipe;
+ struct nouveau_screen base;
struct nouveau_winsys *nvws;
@@ -15,11 +15,14 @@ struct nv50_screen {
struct nouveau_grobj *m2mf;
struct nouveau_notifier *sync;
- struct pipe_buffer *constbuf;
- struct nouveau_resource *vp_data_heap;
+ struct nouveau_bo *constbuf_misc[1];
+ struct nouveau_bo *constbuf_parm[2];
- struct pipe_buffer *tic;
- struct pipe_buffer *tsc;
+ struct nouveau_resource *immd_heap[1];
+ struct nouveau_resource *parm_heap[2];
+
+ struct nouveau_bo *tic;
+ struct nouveau_bo *tsc;
struct nouveau_stateobj *static_init;
};
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index ba852194cdd..116866a8e78 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -136,9 +136,11 @@ static void *
nv50_sampler_state_create(struct pipe_context *pipe,
const struct pipe_sampler_state *cso)
{
- unsigned *tsc = CALLOC(8, sizeof(unsigned));
+ struct nv50_sampler_stateobj *sso = CALLOC(1, sizeof(*sso));
+ unsigned *tsc = sso->tsc;
+ float limit;
- tsc[0] = (0x00024000 |
+ tsc[0] = (0x00026000 |
(wrap_mode(cso->wrap_s) << 0) |
(wrap_mode(cso->wrap_t) << 3) |
(wrap_mode(cso->wrap_r) << 6));
@@ -202,7 +204,14 @@ nv50_sampler_state_create(struct pipe_context *pipe,
tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7);
}
- return (void *)tsc;
+ limit = CLAMP(cso->lod_bias, -16.0, 15.0);
+ tsc[1] |= ((int)(limit * 256.0) & 0x1fff) << 11;
+
+ tsc[2] |= ((int)CLAMP(cso->max_lod, 0.0, 15.0) << 20) |
+ ((int)CLAMP(cso->min_lod, 0.0, 15.0) << 8);
+
+ sso->normalized = cso->normalized_coords;
+ return (void *)sso;
}
static void
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index c13d3de1cb9..0caf4b4e914 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -32,6 +32,9 @@ nv50_state_validate_fb(struct nv50_context *nv50)
unsigned i, w, h, gw = 0;
for (i = 0; i < fb->nr_cbufs; i++) {
+ struct pipe_texture *pt = fb->cbufs[i]->texture;
+ struct nouveau_bo *bo = nv50_miptree(pt)->bo;
+
if (!gw) {
w = fb->cbufs[i]->width;
h = fb->cbufs[i]->height;
@@ -46,12 +49,10 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_data (so, fb->cbufs[i]->height);
so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5);
- so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH |
- NOUVEAU_BO_RDWR, 0, 0);
- so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
- NOUVEAU_BO_RDWR, 0, 0);
+ so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0);
+ so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
switch (fb->cbufs[i]->format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
so_data(so, 0xcf);
@@ -65,7 +66,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_data(so, 0xe6);
break;
}
- so_data(so, 0x00000000);
+ so_data(so, bo->tile_mode << 4);
so_data(so, 0x00000000);
so_method(so, tesla, 0x1224, 1);
@@ -73,6 +74,9 @@ nv50_state_validate_fb(struct nv50_context *nv50)
}
if (fb->zsbuf) {
+ struct pipe_texture *pt = fb->zsbuf->texture;
+ struct nouveau_bo *bo = nv50_miptree(pt)->bo;
+
if (!gw) {
w = fb->zsbuf->width;
h = fb->zsbuf->height;
@@ -83,12 +87,10 @@ nv50_state_validate_fb(struct nv50_context *nv50)
}
so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5);
- so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH |
- NOUVEAU_BO_RDWR, 0, 0);
- so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
- NOUVEAU_BO_RDWR, 0, 0);
+ so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0);
+ so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
switch (fb->zsbuf->format) {
case PIPE_FORMAT_Z24S8_UNORM:
so_data(so, 0x16);
@@ -102,7 +104,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_data(so, 0x16);
break;
}
- so_data(so, 0x00000000);
+ so_data(so, bo->tile_mode << 4);
so_data(so, 0x00000000);
so_method(so, tesla, 0x1538, 1);
@@ -131,7 +133,7 @@ static void
nv50_state_emit(struct nv50_context *nv50)
{
struct nv50_screen *screen = nv50->screen;
- struct nouveau_winsys *nvws = screen->nvws;
+ struct nouveau_channel *chan = screen->base.channel;
if (nv50->pctx_id != screen->cur_pctx) {
nv50->state.dirty |= 0xffffffff;
@@ -139,40 +141,40 @@ nv50_state_emit(struct nv50_context *nv50)
}
if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER)
- so_emit(nvws, nv50->state.fb);
+ so_emit(chan, nv50->state.fb);
if (nv50->state.dirty & NV50_NEW_BLEND)
- so_emit(nvws, nv50->state.blend);
+ so_emit(chan, nv50->state.blend);
if (nv50->state.dirty & NV50_NEW_ZSA)
- so_emit(nvws, nv50->state.zsa);
+ so_emit(chan, nv50->state.zsa);
if (nv50->state.dirty & NV50_NEW_VERTPROG)
- so_emit(nvws, nv50->state.vertprog);
+ so_emit(chan, nv50->state.vertprog);
if (nv50->state.dirty & NV50_NEW_FRAGPROG)
- so_emit(nvws, nv50->state.fragprog);
+ so_emit(chan, nv50->state.fragprog);
if (nv50->state.dirty & NV50_NEW_RASTERIZER)
- so_emit(nvws, nv50->state.rast);
+ so_emit(chan, nv50->state.rast);
if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
- so_emit(nvws, nv50->state.blend_colour);
+ so_emit(chan, nv50->state.blend_colour);
if (nv50->state.dirty & NV50_NEW_STIPPLE)
- so_emit(nvws, nv50->state.stipple);
+ so_emit(chan, nv50->state.stipple);
if (nv50->state.dirty & NV50_NEW_SCISSOR)
- so_emit(nvws, nv50->state.scissor);
+ so_emit(chan, nv50->state.scissor);
if (nv50->state.dirty & NV50_NEW_VIEWPORT)
- so_emit(nvws, nv50->state.viewport);
+ so_emit(chan, nv50->state.viewport);
if (nv50->state.dirty & NV50_NEW_SAMPLER)
- so_emit(nvws, nv50->state.tsc_upload);
+ so_emit(chan, nv50->state.tsc_upload);
if (nv50->state.dirty & NV50_NEW_TEXTURE)
- so_emit(nvws, nv50->state.tic_upload);
+ so_emit(chan, nv50->state.tic_upload);
if (nv50->state.dirty & NV50_NEW_ARRAYS) {
- so_emit(nvws, nv50->state.vtxfmt);
- so_emit(nvws, nv50->state.vtxbuf);
+ so_emit(chan, nv50->state.vtxfmt);
+ so_emit(chan, nv50->state.vtxbuf);
}
nv50->state.dirty = 0;
- so_emit_reloc_markers(nvws, nv50->state.fb);
- so_emit_reloc_markers(nvws, nv50->state.vertprog);
- so_emit_reloc_markers(nvws, nv50->state.fragprog);
- so_emit_reloc_markers(nvws, nv50->state.vtxbuf);
- so_emit_reloc_markers(nvws, nv50->screen->static_init);
+ so_emit_reloc_markers(chan, nv50->state.fb);
+ so_emit_reloc_markers(chan, nv50->state.vertprog);
+ so_emit_reloc_markers(chan, nv50->state.fragprog);
+ so_emit_reloc_markers(chan, nv50->state.vtxbuf);
+ so_emit_reloc_markers(chan, nv50->screen->static_init);
}
boolean
@@ -293,12 +295,12 @@ viewport_uptodate:
so_data (so, NV50_CB_TSC);
so_method(so, tesla, 0x40000f04, nv50->sampler_nr * 8);
for (i = 0; i < nv50->sampler_nr; i++)
- so_datap (so, nv50->sampler[i], 8);
+ so_datap (so, nv50->sampler[i]->tsc, 8);
so_ref(so, &nv50->state.tsc_upload);
so_ref(NULL, &so);
}
- if (nv50->dirty & NV50_NEW_TEXTURE)
+ if (nv50->dirty & (NV50_NEW_TEXTURE | NV50_NEW_SAMPLER))
nv50_tex_validate(nv50);
if (nv50->dirty & NV50_NEW_ARRAYS)
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 0cc5168144d..8db3b6d344d 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -52,21 +52,17 @@ static int
nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
{
struct nv50_miptree *mt = nv50_miptree(ps->texture);
- struct nouveau_channel *chan = screen->nvws->channel;
+ struct nouveau_channel *chan = screen->eng2d->channel;
struct nouveau_grobj *eng2d = screen->eng2d;
- struct nouveau_bo *bo;
+ struct nouveau_bo *bo = nv50_miptree(ps->texture)->bo;
int format, mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT;
int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD);
-
- bo = screen->nvws->get_bo(nv50_miptree(ps->texture)->buffer);
- if (!bo)
- return 1;
format = nv50_format(ps->format);
if (format < 0)
return 1;
- if (!bo->tiled) {
+ if (!bo->tile_flags) {
BEGIN_RING(chan, eng2d, mthd, 2);
OUT_RING (chan, format);
OUT_RING (chan, 1);
@@ -80,7 +76,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
BEGIN_RING(chan, eng2d, mthd, 5);
OUT_RING (chan, format);
OUT_RING (chan, 0);
- OUT_RING (chan, 0);
+ OUT_RING (chan, bo->tile_mode << 4);
OUT_RING (chan, 1);
OUT_RING (chan, 0);
BEGIN_RING(chan, eng2d, mthd + 0x18, 4);
@@ -108,7 +104,7 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst,
int dx, int dy, struct pipe_surface *src, int sx, int sy,
int w, int h)
{
- struct nouveau_channel *chan = screen->nvws->channel;
+ struct nouveau_channel *chan = screen->eng2d->channel;
struct nouveau_grobj *eng2d = screen->eng2d;
int ret;
@@ -165,7 +161,7 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
{
struct nv50_context *nv50 = (struct nv50_context *)pipe;
struct nv50_screen *screen = nv50->screen;
- struct nouveau_channel *chan = screen->nvws->channel;
+ struct nouveau_channel *chan = screen->eng2d->channel;
struct nouveau_grobj *eng2d = screen->eng2d;
int format, ret;
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 223c8a3a456..ff40c2ad81b 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -26,7 +26,8 @@
#include "nouveau/nouveau_stateobj.h"
static int
-nv50_tex_construct(struct nouveau_stateobj *so, struct nv50_miptree *mt)
+nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
+ struct nv50_miptree *mt, int unit)
{
switch (mt->base.format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
@@ -117,15 +118,18 @@ nv50_tex_construct(struct nouveau_stateobj *so, struct nv50_miptree *mt)
return 1;
}
- so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+ so_reloc(so, mt->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
NOUVEAU_BO_RD, 0, 0);
- so_data (so, 0xd0005000);
+ if (nv50->sampler[unit]->normalized)
+ so_data (so, 0xd0005000 | mt->bo->tile_mode << 22);
+ else
+ so_data (so, 0x5001d000 | mt->bo->tile_mode << 22);
so_data (so, 0x00300000);
so_data (so, mt->base.width[0]);
- so_data (so, (mt->base.depth[0] << 16) | mt->base.height[0]);
+ so_data (so, (mt->base.last_level << 28) |
+ (mt->base.depth[0] << 16) | mt->base.height[0]);
so_data (so, 0x03000000);
- so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH |
- NOUVEAU_BO_RD, 0, 0);
+ so_data (so, mt->base.last_level << 4);
return 0;
}
@@ -135,23 +139,35 @@ nv50_tex_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nouveau_stateobj *so;
- int unit;
+ int unit, push;
+
+ push = nv50->miptree_nr * 9 + 2;
+ push += MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2;
- so = so_new(nv50->miptree_nr * 8 + 3, nv50->miptree_nr * 2);
+ so = so_new(push, nv50->miptree_nr * 2);
so_method(so, tesla, 0x0f00, 1);
so_data (so, NV50_CB_TIC);
- so_method(so, tesla, 0x40000f04, nv50->miptree_nr * 8);
for (unit = 0; unit < nv50->miptree_nr; unit++) {
struct nv50_miptree *mt = nv50->miptree[unit];
- if (nv50_tex_construct(so, mt)) {
+ so_method(so, tesla, 0x40000f04, 8);
+ if (nv50_tex_construct(nv50, so, mt, unit)) {
NOUVEAU_ERR("failed tex validate\n");
so_ref(NULL, &so);
return;
}
+
+ so_method(so, tesla, 0x1458, 1);
+ so_data (so, (unit << 9) | (unit << 1) | 1);
+ }
+
+ for (; unit < nv50->state.miptree_nr; unit++) {
+ so_method(so, tesla, 0x1458, 1);
+ so_data (so, (unit << 1) | 0);
}
so_ref(so, &nv50->state.tic_upload);
so_ref(NULL, &so);
+ nv50->state.miptree_nr = nv50->miptree_nr;
}
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 747195b4f63..d0b7f0bef43 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -6,8 +6,8 @@
struct nv50_transfer {
struct pipe_transfer base;
- struct pipe_buffer *buffer;
- struct nv50_miptree_level *level;
+ struct nouveau_bo *bo;
+ unsigned level_offset;
int level_pitch;
int level_width;
int level_height;
@@ -16,51 +16,48 @@ struct nv50_transfer {
};
static void
-nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct pipe_buffer *src,
- int src_pitch, int sx, int sy, int sw, int sh,
- struct pipe_buffer *dst, int dst_pitch, int dx, int dy,
+nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo,
+ unsigned src_offset, int src_pitch, int sx, int sy,
+ int sw, int sh, struct nouveau_bo *dst_bo,
+ unsigned dst_offset, int dst_pitch, int dx, int dy,
int dw, int dh, int cpp, int width, int height,
unsigned src_reloc, unsigned dst_reloc)
{
struct nv50_screen *screen = nv50_screen(pscreen);
- struct nouveau_winsys *nvws = screen->nvws;
- struct nouveau_channel *chan = nvws->channel;
+ struct nouveau_channel *chan = screen->m2mf->channel;
struct nouveau_grobj *m2mf = screen->m2mf;
- struct nouveau_bo *src_bo = nvws->get_bo(src);
- struct nouveau_bo *dst_bo = nvws->get_bo(dst);
- unsigned src_offset = 0, dst_offset = 0;
src_reloc |= NOUVEAU_BO_RD;
dst_reloc |= NOUVEAU_BO_WR;
WAIT_RING (chan, 14);
- if (!src_bo->tiled) {
+ if (!src_bo->tile_flags) {
BEGIN_RING(chan, m2mf, 0x0200, 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, m2mf, 0x0314, 1);
OUT_RING (chan, src_pitch);
- src_offset = (sy * src_pitch) + (sx * cpp);
+ src_offset += (sy * src_pitch) + (sx * cpp);
} else {
BEGIN_RING(chan, m2mf, 0x0200, 6);
OUT_RING (chan, 0);
- OUT_RING (chan, 0);
+ OUT_RING (chan, src_bo->tile_mode << 4);
OUT_RING (chan, sw * cpp);
OUT_RING (chan, sh);
OUT_RING (chan, 1);
OUT_RING (chan, 0);
}
- if (!dst_bo->tiled) {
+ if (!dst_bo->tile_flags) {
BEGIN_RING(chan, m2mf, 0x021c, 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, m2mf, 0x0318, 1);
OUT_RING (chan, dst_pitch);
- dst_offset = (dy * dst_pitch) + (dx * cpp);
+ dst_offset += (dy * dst_pitch) + (dx * cpp);
} else {
BEGIN_RING(chan, m2mf, 0x021c, 6);
OUT_RING (chan, 0);
- OUT_RING (chan, 0);
+ OUT_RING (chan, dst_bo->tile_mode << 4);
OUT_RING (chan, dw * cpp);
OUT_RING (chan, dh);
OUT_RING (chan, 1);
@@ -77,13 +74,13 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct pipe_buffer *src,
BEGIN_RING(chan, m2mf, 0x030c, 2);
OUT_RELOCl(chan, src_bo, src_offset, src_reloc);
OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc);
- if (src_bo->tiled) {
+ if (src_bo->tile_flags) {
BEGIN_RING(chan, m2mf, 0x0218, 1);
OUT_RING (chan, (dy << 16) | sx);
} else {
src_offset += (line_count * src_pitch);
}
- if (dst_bo->tiled) {
+ if (dst_bo->tile_flags) {
BEGIN_RING(chan, m2mf, 0x0234, 1);
OUT_RING (chan, (sy << 16) | dx);
} else {
@@ -108,10 +105,12 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
enum pipe_transfer_usage usage,
unsigned x, unsigned y, unsigned w, unsigned h)
{
+ struct nouveau_device *dev = nouveau_screen(pscreen)->device;
struct nv50_miptree *mt = nv50_miptree(pt);
struct nv50_miptree_level *lvl = &mt->level[level];
struct nv50_transfer *tx;
unsigned image = 0;
+ int ret;
if (pt->target == PIPE_TEXTURE_CUBE)
image = face;
@@ -133,20 +132,24 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->base.stride = (w * pt->block.size);
tx->base.usage = usage;
- tx->level = lvl;
tx->level_pitch = lvl->pitch;
tx->level_width = mt->base.width[level];
tx->level_height = mt->base.height[level];
+ tx->level_offset = lvl->image_offset[image];
tx->level_x = x;
tx->level_y = y;
- tx->buffer =
- pipe_buffer_create(pscreen, 0, NOUVEAU_BUFFER_USAGE_TRANSFER,
- w * tx->base.block.size * h);
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
+ w * pt->block.size * h, &tx->bo);
+ if (ret) {
+ FREE(tx);
+ return NULL;
+ }
if (usage != PIPE_TRANSFER_WRITE) {
- nv50_transfer_rect_m2mf(pscreen, mt->buffer, tx->level_pitch,
- x, y, tx->level_width, tx->level_height,
- tx->buffer, tx->base.stride, 0, 0,
+ nv50_transfer_rect_m2mf(pscreen, mt->bo, tx->level_offset,
+ tx->level_pitch, x, y, tx->level_width,
+ tx->level_height, tx->bo, 0,
+ tx->base.stride, 0, 0,
tx->base.width, tx->base.height,
tx->base.block.size, w, h,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
@@ -164,17 +167,18 @@ nv50_transfer_del(struct pipe_transfer *ptx)
if (ptx->usage != PIPE_TRANSFER_READ) {
struct pipe_screen *pscreen = ptx->texture->screen;
- nv50_transfer_rect_m2mf(pscreen, tx->buffer, tx->base.stride,
+ nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride,
0, 0, tx->base.width, tx->base.height,
- mt->buffer, tx->level_pitch,
- tx->level_x, tx->level_y,
- tx->level_width, tx->level_height,
- tx->base.block.size, tx->base.width,
- tx->base.height, NOUVEAU_BO_GART,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
+ mt->bo, tx->level_offset,
+ tx->level_pitch, tx->level_x,
+ tx->level_y, tx->level_width,
+ tx->level_height, tx->base.block.size,
+ tx->base.width, tx->base.height,
+ NOUVEAU_BO_GART, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_GART);
}
- pipe_buffer_reference(&tx->buffer, NULL);
+ nouveau_bo_ref(NULL, &tx->bo);
pipe_texture_reference(&ptx->texture, NULL);
FREE(ptx);
}
@@ -184,13 +188,17 @@ nv50_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
{
struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
unsigned flags = 0;
+ int ret;
if (ptx->usage & PIPE_TRANSFER_WRITE)
- flags |= PIPE_BUFFER_USAGE_CPU_WRITE;
+ flags |= NOUVEAU_BO_WR;
if (ptx->usage & PIPE_TRANSFER_READ)
- flags |= PIPE_BUFFER_USAGE_CPU_READ;
+ flags |= NOUVEAU_BO_RD;
- return pipe_buffer_map(pscreen, tx->buffer, flags);
+ ret = nouveau_bo_map(tx->bo, flags);
+ if (ret)
+ return NULL;
+ return tx->bo->map;
}
static void
@@ -198,7 +206,7 @@ nv50_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
{
struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
- pipe_buffer_unmap(pscreen, tx->buffer);
+ nouveau_bo_unmap(tx->bo);
}
void
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 0749c906914..f81929f2387 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -22,6 +22,7 @@
#include "pipe/p_context.h"
#include "pipe/p_state.h"
+#include "pipe/p_inlines.h"
#include "nv50_context.h"
@@ -53,7 +54,7 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
unsigned count)
{
struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
nv50_state_validate(nv50);
@@ -83,7 +84,7 @@ static INLINE void
nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
map += start;
@@ -112,7 +113,7 @@ static INLINE void
nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
map += start;
@@ -141,7 +142,7 @@ static INLINE void
nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint8_t *map,
unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
map += start;
@@ -163,10 +164,12 @@ nv50_draw_elements(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->nvws->channel;
+ struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct pipe_winsys *ws = pipe->winsys;
- void *map = ws->buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
+ struct pipe_screen *pscreen = pipe->screen;
+ void *map;
+
+ map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
nv50_state_validate(nv50);
@@ -193,6 +196,7 @@ nv50_draw_elements(struct pipe_context *pipe,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
+ pipe_buffer_unmap(pscreen, indexBuffer);
pipe->flush(pipe, 0, NULL);
return TRUE;
}
@@ -212,6 +216,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
struct pipe_vertex_element *ve = &nv50->vtxelt[i];
struct pipe_vertex_buffer *vb =
&nv50->vtxbuf[ve->vertex_buffer_index];
+ struct nouveau_bo *bo = nouveau_bo(vb->buffer);
switch (ve->src_format) {
case PIPE_FORMAT_R32G32B32A32_FLOAT:
@@ -240,10 +245,10 @@ nv50_vbo_validate(struct nv50_context *nv50)
so_method(vtxbuf, tesla, 0x900 + (i * 16), 3);
so_data (vtxbuf, 0x20000000 | vb->stride);
- so_reloc (vtxbuf, vb->buffer, vb->buffer_offset +
+ so_reloc (vtxbuf, bo, vb->buffer_offset +
ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (vtxbuf, vb->buffer, vb->buffer_offset +
+ so_reloc (vtxbuf, bo, vb->buffer_offset +
ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
}