summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nv50
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nv50')
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h14
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c43
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c512
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h2
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c21
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c64
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c52
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c91
-rw-r--r--src/gallium/drivers/nv50/nv50_texture.h1
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c87
12 files changed, 594 insertions, 297 deletions
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 4b0f0622953..5578a5838fb 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -65,7 +65,7 @@ struct nv50_rasterizer_stateobj {
};
struct nv50_sampler_stateobj {
- bool normalized;
+ boolean normalized;
unsigned tsc[8];
};
@@ -126,7 +126,7 @@ struct nv50_state {
unsigned viewport_bypass;
struct nouveau_stateobj *tsc_upload;
struct nouveau_stateobj *tic_upload;
- unsigned miptree_nr;
+ unsigned miptree_nr[PIPE_SHADER_TYPES];
struct nouveau_stateobj *vertprog;
struct nouveau_stateobj *fragprog;
struct nouveau_stateobj *programs;
@@ -162,10 +162,10 @@ struct nv50_context {
unsigned vtxbuf_nr;
struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
unsigned vtxelt_nr;
- struct nv50_sampler_stateobj *sampler[PIPE_MAX_SAMPLERS];
- unsigned sampler_nr;
- struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS];
- unsigned miptree_nr;
+ struct nv50_sampler_stateobj *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+ unsigned sampler_nr[PIPE_SHADER_TYPES];
+ struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+ unsigned miptree_nr[PIPE_SHADER_TYPES];
uint16_t vbo_fifo;
};
@@ -218,7 +218,7 @@ extern void nv50_state_flush_notify(struct nouveau_channel *chan);
extern void nv50_so_init_sifc(struct nv50_context *nv50,
struct nouveau_stateobj *so,
struct nouveau_bo *bo, unsigned reloc,
- unsigned size);
+ unsigned offset, unsigned size);
/* nv50_tex.c */
extern void nv50_tex_validate(struct nv50_context *);
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 3d58746793f..9e083b662dd 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -55,6 +55,20 @@ get_tile_mode(unsigned ny, unsigned d)
return tile_mode | 0x10;
}
+static INLINE unsigned
+get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned nb_h)
+{
+ unsigned tile_h = get_tile_height(tile_mode);
+ unsigned tile_d = get_tile_depth(tile_mode);
+
+ /* pitch_2d == to next slice within this volume-tile */
+ /* pitch_3d == size (in bytes) of a volume-tile */
+ unsigned pitch_2d = tile_h * 64;
+ unsigned pitch_3d = tile_d * align(nb_h, tile_h) * pitch;
+
+ return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
+}
+
static struct pipe_texture *
nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
{
@@ -91,13 +105,11 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
for (l = 0; l <= pt->last_level; l++) {
struct nv50_miptree_level *lvl = &mt->level[l];
-
- pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
- pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+ unsigned nblocksy = pf_get_nblocksy(pt->format, height);
lvl->image_offset = CALLOC(mt->image_nr, sizeof(int));
- lvl->pitch = align(pt->nblocksx[l] * pt->block.size, 64);
- lvl->tile_mode = get_tile_mode(pt->nblocksy[l], depth);
+ lvl->pitch = align(pf_get_stride(pt->format, width), 64);
+ lvl->tile_mode = get_tile_mode(nblocksy, depth);
width = u_minify(width, 1);
height = u_minify(height, 1);
@@ -118,7 +130,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
unsigned tile_d = get_tile_depth(lvl->tile_mode);
size = lvl->pitch;
- size *= align(pt->nblocksy[l], tile_h);
+ size *= align(pf_get_nblocksy(pt->format, u_minify(pt->height0, l)), tile_h);
size *= align(u_minify(pt->depth0, l), tile_d);
lvl->image_offset[i] = mt->total_size;
@@ -132,6 +144,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
mt->level[0].tile_mode, tile_flags,
&mt->base.bo);
if (ret) {
+ for (l = 0; l < pt->last_level; ++l)
+ FREE(mt->level[l].image_offset);
FREE(mt);
return NULL;
}
@@ -171,6 +185,10 @@ static void
nv50_miptree_destroy(struct pipe_texture *pt)
{
struct nv50_miptree *mt = nv50_miptree(pt);
+ unsigned l;
+
+ for (l = 0; l < pt->last_level; ++l)
+ FREE(mt->level[l].image_offset);
nouveau_bo_ref(NULL, &mt->base.bo);
FREE(mt);
@@ -184,15 +202,10 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
struct nv50_miptree *mt = nv50_miptree(pt);
struct nv50_miptree_level *lvl = &mt->level[level];
struct pipe_surface *ps;
- int img;
+ unsigned img = 0;
if (pt->target == PIPE_TEXTURE_CUBE)
img = face;
- else
- if (pt->target == PIPE_TEXTURE_3D)
- img = zslice;
- else
- img = 0;
ps = CALLOC_STRUCT(pipe_surface);
if (!ps)
@@ -208,6 +221,12 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
ps->zslice = zslice;
ps->offset = lvl->image_offset[img];
+ if (pt->target == PIPE_TEXTURE_3D) {
+ unsigned nb_h = pf_get_nblocksy(pt->format, ps->height);
+ ps->offset += get_zslice_offset(lvl->tile_mode, zslice,
+ lvl->pitch, nb_h);
+ }
+
return ps;
}
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index f0fe7e61684..e496cf4cad8 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -119,7 +119,7 @@ struct nv50_pc {
struct nv50_reg *param;
int param_nr;
struct nv50_reg *immd;
- float *immd_buf;
+ uint32_t *immd_buf;
int immd_nr;
struct nv50_reg **addr;
int addr_nr;
@@ -131,6 +131,9 @@ struct nv50_pc {
struct nv50_reg *r_brdc;
struct nv50_reg *r_dst[4];
+ struct nv50_reg reg_instances[16];
+ unsigned reg_instance_nr;
+
unsigned interp_mode[32];
/* perspective interpolation registers */
struct nv50_reg *iv_p;
@@ -150,6 +153,20 @@ struct nv50_pc {
boolean allow32;
};
+static INLINE struct nv50_reg *
+reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
+{
+ struct nv50_reg *ri;
+
+ assert(pc->reg_instance_nr < 16);
+ ri = &pc->reg_instances[pc->reg_instance_nr++];
+ if (reg) {
+ *ri = *reg;
+ reg->mod = 0;
+ }
+ return ri;
+}
+
static INLINE void
ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
{
@@ -342,25 +359,34 @@ static void
kill_temp_temp(struct nv50_pc *pc)
{
int i;
-
+
for (i = 0; i < pc->temp_temp_nr; i++)
free_temp(pc, pc->temp_temp[i]);
pc->temp_temp_nr = 0;
}
static int
-ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
+ctor_immd_4u32(struct nv50_pc *pc,
+ uint32_t x, uint32_t y, uint32_t z, uint32_t w)
{
- pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(float)),
- (pc->immd_nr + 1) * 4 * sizeof(float));
+ unsigned size = pc->immd_nr * 4 * sizeof(uint32_t);
+
+ pc->immd_buf = REALLOC(pc->immd_buf, size, size + 4 * sizeof(uint32_t));
+
pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
-
+
return pc->immd_nr++;
}
+static INLINE int
+ctor_immd_4f32(struct nv50_pc *pc, float x, float y, float z, float w)
+{
+ return ctor_immd_4u32(pc, fui(x), fui(y), fui(z), fui(w));
+}
+
static struct nv50_reg *
alloc_immd(struct nv50_pc *pc, float f)
{
@@ -368,11 +394,11 @@ alloc_immd(struct nv50_pc *pc, float f)
unsigned hw;
for (hw = 0; hw < pc->immd_nr * 4; hw++)
- if (pc->immd_buf[hw] == f)
+ if (pc->immd_buf[hw] == fui(f))
break;
if (hw == pc->immd_nr * 4)
- hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4;
+ hw = ctor_immd_4f32(pc, f, -f, 0.5 * f, 0) * 4;
ctor_reg(r, P_IMMD, -1, hw);
return r;
@@ -464,22 +490,24 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
static INLINE void
set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
{
- unsigned val;
- float f = pc->immd_buf[imm->hw];
+ union {
+ float f;
+ uint32_t ui;
+ } u;
+ u.ui = pc->immd_buf[imm->hw];
- if (imm->mod & NV50_MOD_ABS)
- f = fabsf(f);
- val = fui((imm->mod & NV50_MOD_NEG) ? -f : f);
+ u.f = (imm->mod & NV50_MOD_ABS) ? fabsf(u.f) : u.f;
+ u.f = (imm->mod & NV50_MOD_NEG) ? -u.f : u.f;
set_long(pc, e);
- /*XXX: can't be predicated - bits overlap.. catch cases where both
- * are required and avoid them. */
+ /* XXX: can't be predicated - bits overlap; cases where both
+ * are required should be avoided by using pc->allow32 */
set_pred(pc, 0, 0, e);
set_pred_wr(pc, 0, 0, e);
e->inst[1] |= 0x00000002 | 0x00000001;
- e->inst[0] |= (val & 0x3f) << 16;
- e->inst[1] |= (val >> 6) << 2;
+ e->inst[0] |= (u.ui & 0x3f) << 16;
+ e->inst[1] |= (u.ui >> 6) << 2;
}
static INLINE void
@@ -644,7 +672,7 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
if (src->type == P_IMMD || src->type == P_CONST) {
set_long(pc, e);
set_data(pc, src, 0x7f, 9, e);
- e->inst[1] |= 0x20000000; /* src0 const? */
+ e->inst[1] |= 0x20000000; /* mov from c[] */
} else {
if (src->type == P_ATTR) {
set_long(pc, e);
@@ -659,9 +687,9 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
if (is_long(e) && !is_immd(e)) {
e->inst[1] |= 0x04000000; /* 32-bit */
- e->inst[1] |= 0x0000c000; /* "subsubop" 0x3 */
+ e->inst[1] |= 0x0000c000; /* 32-bit c[] load / lane mask 0:1 */
if (!(e->inst[1] & 0x20000000))
- e->inst[1] |= 0x00030000; /* "subsubop" 0xf */
+ e->inst[1] |= 0x00030000; /* lane mask 2:3 */
} else
e->inst[0] |= 0x00008000;
@@ -676,6 +704,17 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
FREE(imm);
}
+static void
+emit_nop(struct nv50_pc *pc)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xf0000000;
+ set_long(pc, e);
+ e->inst[1] = 0xe0000000;
+ emit(pc, e);
+}
+
static boolean
check_swap_src_0_1(struct nv50_pc *pc,
struct nv50_reg **s0, struct nv50_reg **s1)
@@ -795,6 +834,33 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
}
static void
+emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ assert(dst->type == P_TEMP);
+ e->inst[1] = 0x20000000 | (pred << 12);
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_mov_to_pred(struct nv50_pc *pc, int pred, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0x000001fc;
+ e->inst[1] = 0xa0000008;
+ set_long(pc, e);
+ set_pred_wr(pc, 1, pred, e);
+ set_src_0_restricted(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1)
{
@@ -898,7 +964,6 @@ static INLINE void
emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1)
{
- assert(src0 != src1);
src1->mod ^= NV50_MOD_NEG;
emit_add(pc, dst, src0, src1);
src1->mod ^= NV50_MOD_NEG;
@@ -967,7 +1032,6 @@ static INLINE void
emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1, struct nv50_reg *src2)
{
- assert(src2 != src0 && src2 != src1);
src2->mod ^= NV50_MOD_NEG;
emit_mad(pc, dst, src0, src1, src2);
src2->mod ^= NV50_MOD_NEG;
@@ -1257,9 +1321,68 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
emit(pc, e);
}
+static struct nv50_program_exec *
+emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
+ struct nv50_program_exec **join)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ if (join) {
+ set_long(pc, e);
+ e->inst[0] |= 0xa0000002;
+ emit(pc, e);
+ *join = e;
+ e = exec(pc);
+ }
+
+ set_long(pc, e);
+ e->inst[0] |= 0x10000002;
+ if (pred >= 0)
+ set_pred(pc, cc, pred, e);
+ emit(pc, e);
+ return pc->p->exec_tail;
+}
+
+#define QOP_ADD 0
+#define QOP_SUBR 1
+#define QOP_SUB 2
+#define QOP_MOV_SRC1 3
+
+/* For a quad of threads / top left, top right, bottom left, bottom right
+ * pixels, do a different operation, and take src0 from a specific thread.
+ */
+static void
+emit_quadop(struct nv50_pc *pc, struct nv50_reg *dst, int wp, int lane_src0,
+ struct nv50_reg *src0, struct nv50_reg *src1, ubyte qop)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xc0000000;
+ e->inst[1] = 0x80000000;
+ set_long(pc, e);
+ e->inst[0] |= lane_src0 << 16;
+ set_src_0(pc, src0, e);
+ set_src_2(pc, src1, e);
+
+ if (wp >= 0)
+ set_pred_wr(pc, 1, wp, e);
+
+ if (dst)
+ set_dst(pc, dst, e);
+ else {
+ e->inst[0] |= 0x000001fc;
+ e->inst[1] |= 0x00000008;
+ }
+
+ e->inst[0] |= (qop & 3) << 20;
+ e->inst[1] |= (qop >> 2) << 22;
+
+ emit(pc, e);
+}
+
static void
load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
- struct nv50_reg **src, boolean proj)
+ struct nv50_reg **src, unsigned arg, boolean proj)
{
int mod[3] = { src[0]->mod, src[1]->mod, src[2]->mod };
@@ -1276,6 +1399,10 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
if (proj && 0 /* looks more correct without this */)
emit_mul(pc, t[2], t[2], src[3]);
+ else
+ if (arg == 4) /* there is no textureProj(samplerCubeShadow) */
+ emit_mov(pc, t[3], src[3]);
+
emit_flop(pc, 0, t[2], t[2]);
emit_mul(pc, t[0], src[0], t[2]);
@@ -1284,89 +1411,214 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
}
static void
-emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
- struct nv50_reg **src, unsigned unit, unsigned type, boolean proj)
+load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
+ struct nv50_reg **src, unsigned dim, unsigned arg)
{
- struct nv50_reg *t[4];
- struct nv50_program_exec *e;
+ unsigned c, mode;
+
+ if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
+ mode = pc->interp_mode[src[0]->index] | INTERP_PERSPECTIVE;
- unsigned c, mode, dim;
+ t[3]->rhw = src[3]->rhw;
+ emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
+ emit_flop(pc, 0, t[3], t[3]);
+ for (c = 0; c < dim; ++c) {
+ t[c]->rhw = src[c]->rhw;
+ emit_interp(pc, t[c], t[3], mode);
+ }
+ if (arg != dim) { /* depth reference value */
+ t[dim]->rhw = src[2]->rhw;
+ emit_interp(pc, t[dim], t[3], mode);
+ }
+ } else {
+ /* XXX: for some reason the blob sometimes uses MAD
+ * (mad f32 $rX $rY $rZ neg $r63)
+ */
+ emit_flop(pc, 0, t[3], src[3]);
+ for (c = 0; c < dim; ++c)
+ emit_mul(pc, t[c], src[c], t[3]);
+ if (arg != dim) /* depth reference value */
+ emit_mul(pc, t[dim], src[2], t[3]);
+ }
+}
+
+static INLINE void
+get_tex_dim(unsigned type, unsigned *dim, unsigned *arg)
+{
switch (type) {
case TGSI_TEXTURE_1D:
- dim = 1;
+ *arg = *dim = 1;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ *dim = 1;
+ *arg = 2;
break;
case TGSI_TEXTURE_UNKNOWN:
case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_SHADOW1D: /* XXX: x, z */
case TGSI_TEXTURE_RECT:
- dim = 2;
+ *arg = *dim = 2;
+ break;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ *dim = 2;
+ *arg = 3;
break;
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_SHADOWRECT: /* XXX */
- dim = 3;
+ *dim = *arg = 3;
break;
default:
assert(0);
break;
}
+}
- /* some cards need t[0]'s hw index to be a multiple of 4 */
- alloc_temp4(pc, t, 0);
+/* We shouldn't execute TEXLOD if any of the pixels in a quad have
+ * different LOD values, so branch off groups of equal LOD.
+ */
+static void
+emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod,
+ struct nv50_reg *src, struct nv50_program_exec *tex)
+{
+ struct nv50_program_exec *join_at;
+ unsigned i, target = pc->p->exec_size + 7 * 2;
- if (type == TGSI_TEXTURE_CUBE) {
- load_cube_tex_coords(pc, t, src, proj);
- } else
- if (proj) {
- if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
- mode = pc->interp_mode[src[0]->index];
-
- t[3]->rhw = src[3]->rhw;
- emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
- emit_flop(pc, 0, t[3], t[3]);
-
- for (c = 0; c < dim; c++) {
- t[c]->rhw = src[c]->rhw;
- emit_interp(pc, t[c], t[3],
- (mode | INTERP_PERSPECTIVE));
- }
- } else {
- emit_flop(pc, 0, t[3], src[3]);
- for (c = 0; c < dim; c++)
- emit_mul(pc, t[c], src[c], t[3]);
+ /* Subtract lod of each pixel from lod of top left pixel, jump
+ * texlod insn if result is 0, then repeat for 2 other pixels.
+ */
+ emit_quadop(pc, NULL, 0, 0, tlod, tlod, 0x55);
+ emit_branch(pc, 0, 2, &join_at)->param.index = target;
- /* XXX: for some reason the blob sometimes uses MAD:
- * emit_mad(pc, t[c], src[0][c], t[3], t[3])
- * pc->p->exec_tail->inst[1] |= 0x080fc000;
- */
+ for (i = 1; i < 4; ++i) {
+ emit_quadop(pc, NULL, 0, i, tlod, tlod, 0x55);
+ emit_branch(pc, 0, 2, NULL)->param.index = target;
+ }
+
+ emit_mov(pc, tlod, src); /* target */
+ emit(pc, tex); /* texlod */
+
+ join_at->param.index = target + 2 * 2;
+ emit_nop(pc);
+ pc->p->exec_tail->inst[1] |= 2; /* join _after_ tex */
+}
+
+static void
+emit_texbias_sequence(struct nv50_pc *pc, struct nv50_reg *t[4], unsigned arg,
+ struct nv50_program_exec *tex)
+{
+ struct nv50_program_exec *e;
+ struct nv50_reg imm_1248, *t123[4][4], *r_bits = alloc_temp(pc, NULL);
+ int r_pred = 0;
+ unsigned n, c, i, cc[4] = { 0x0a, 0x13, 0x11, 0x10 };
+
+ pc->allow32 = FALSE;
+ ctor_reg(&imm_1248, P_IMMD, -1, ctor_immd_4u32(pc, 1, 2, 4, 8) * 4);
+
+ /* Subtract bias value of thread i from bias values of each thread,
+ * store result in r_pred, and set bit i in r_bits if result was 0.
+ */
+ assert(arg < 4);
+ for (i = 0; i < 4; ++i, ++imm_1248.hw) {
+ emit_quadop(pc, NULL, r_pred, i, t[arg], t[arg], 0x55);
+ emit_mov(pc, r_bits, &imm_1248);
+ set_pred(pc, 2, r_pred, pc->p->exec_tail);
+ }
+ emit_mov_to_pred(pc, r_pred, r_bits);
+
+ /* The lanes of a quad are now grouped by the bit in r_pred they have
+ * set. Put the input values for TEX into a new register set for each
+ * group and execute TEX only for a specific group.
+ * We cannot use the same register set for each group because we need
+ * the derivatives, which are implicitly calculated, to be correct.
+ */
+ for (i = 1; i < 4; ++i) {
+ alloc_temp4(pc, t123[i], 0);
+
+ for (c = 0; c <= arg; ++c)
+ emit_mov(pc, t123[i][c], t[c]);
+
+ *(e = exec(pc)) = *(tex);
+ e->inst[0] &= ~0x01fc;
+ set_dst(pc, t123[i][0], e);
+ set_pred(pc, cc[i], r_pred, e);
+ emit(pc, e);
+ }
+ /* finally TEX on the original regs (where we kept the input) */
+ set_pred(pc, cc[0], r_pred, tex);
+ emit(pc, tex);
+
+ /* put the 3 * n other results into regs for lane 0 */
+ n = popcnt4(((e->inst[0] >> 25) & 0x3) | ((e->inst[1] >> 12) & 0xc));
+ for (i = 1; i < 4; ++i) {
+ for (c = 0; c < n; ++c) {
+ emit_mov(pc, t[c], t123[i][c]);
+ set_pred(pc, cc[i], r_pred, pc->p->exec_tail);
}
- } else {
- for (c = 0; c < dim; c++)
- emit_mov(pc, t[c], src[c]);
+ free_temp4(pc, t123[i]);
}
+ emit_nop(pc);
+ free_temp(pc, r_bits);
+}
+
+static void
+emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
+ struct nv50_reg **src, unsigned unit, unsigned type,
+ boolean proj, int bias_lod)
+{
+ struct nv50_reg *t[4];
+ struct nv50_program_exec *e;
+ unsigned c, dim, arg;
+
+ /* t[i] must be within a single 128 bit super-reg */
+ alloc_temp4(pc, t, 0);
+
e = exec(pc);
+ e->inst[0] = 0xf0000000;
set_long(pc, e);
- e->inst[0] |= 0xf0000000;
- e->inst[1] |= 0x00000004;
set_dst(pc, t[0], e);
- e->inst[0] |= (unit << 9);
- if (dim == 2)
- e->inst[0] |= 0x00400000;
- else
- if (dim == 3) {
- e->inst[0] |= 0x00800000;
- if (type == TGSI_TEXTURE_CUBE)
- e->inst[0] |= 0x08000000;
+ /* TIC and TSC binding indices (TSC is ignored as TSC_LINKED = TRUE): */
+ e->inst[0] |= (unit << 9) /* | (unit << 17) */;
+
+ /* live flag (don't set if TEX results affect input to another TEX): */
+ /* e->inst[0] |= 0x00000004; */
+
+ get_tex_dim(type, &dim, &arg);
+
+ if (type == TGSI_TEXTURE_CUBE) {
+ e->inst[0] |= 0x08000000;
+ load_cube_tex_coords(pc, t, src, arg, proj);
+ } else
+ if (proj)
+ load_proj_tex_coords(pc, t, src, dim, arg);
+ else {
+ for (c = 0; c < dim; c++)
+ emit_mov(pc, t[c], src[c]);
+ if (arg != dim) /* depth reference value (always src.z here) */
+ emit_mov(pc, t[dim], src[2]);
}
e->inst[0] |= (mask & 0x3) << 25;
e->inst[1] |= (mask & 0xc) << 12;
- emit(pc, e);
+ if (!bias_lod) {
+ e->inst[0] |= (arg - 1) << 22;
+ emit(pc, e);
+ } else
+ if (bias_lod < 0) {
+ e->inst[0] |= arg << 22;
+ e->inst[1] |= 0x20000000; /* texbias */
+ emit_mov(pc, t[arg], src[3]);
+ emit_texbias_sequence(pc, t, arg, e);
+ } else {
+ e->inst[0] |= arg << 22;
+ e->inst[1] |= 0x40000000; /* texlod */
+ emit_mov(pc, t[arg], src[3]);
+ emit_texlod_sequence(pc, t[arg], src[3], e);
+ }
+
#if 1
c = 0;
if (mask & 1) emit_mov(pc, dst[0], t[c++]);
@@ -1389,38 +1641,6 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
}
static void
-emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
- struct nv50_program_exec **join)
-{
- struct nv50_program_exec *e = exec(pc);
-
- if (join) {
- set_long(pc, e);
- e->inst[0] |= 0xa0000002;
- emit(pc, e);
- *join = e;
- e = exec(pc);
- }
-
- set_long(pc, e);
- e->inst[0] |= 0x10000002;
- if (pred >= 0)
- set_pred(pc, cc, pred, e);
- emit(pc, e);
-}
-
-static void
-emit_nop(struct nv50_pc *pc)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xf0000000;
- set_long(pc, e);
- e->inst[1] = 0xe0000000;
- emit(pc, e);
-}
-
-static void
emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
struct nv50_program_exec *e = exec(pc);
@@ -1515,8 +1735,6 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
static boolean
negate_supported(const struct tgsi_full_instruction *insn, int i)
{
- int s;
-
switch (insn->Instruction.Opcode) {
case TGSI_OPCODE_DDY:
case TGSI_OPCODE_DP3:
@@ -1526,29 +1744,14 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)
case TGSI_OPCODE_ADD:
case TGSI_OPCODE_SUB:
case TGSI_OPCODE_MAD:
- break;
+ return TRUE;
case TGSI_OPCODE_POW:
if (i == 1)
- break;
+ return TRUE;
return FALSE;
default:
return FALSE;
}
-
- /* Watch out for possible multiple uses of an nv50_reg, we
- * can't use nv50_reg::neg in these cases.
- */
- for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) {
- if (s == i)
- continue;
- if ((insn->Src[s].Register.Index ==
- insn->Src[i].Register.Index) &&
- (insn->Src[s].Register.File ==
- insn->Src[i].Register.File))
- return FALSE;
- }
-
- return TRUE;
}
/* Return a read mask for source registers deduced from opcode & write mask. */
@@ -1576,9 +1779,13 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
case TGSI_OPCODE_RSQ:
case TGSI_OPCODE_SCS:
return 0x1;
+ case TGSI_OPCODE_IF:
+ return 0x1;
case TGSI_OPCODE_LIT:
return 0xb;
case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
{
const struct tgsi_instruction_texture *tex;
@@ -1587,13 +1794,17 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
tex = &insn->Texture;
mask = 0x7;
- if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
- mask |= 0x8;
+ if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
+ insn->Instruction.Opcode != TGSI_OPCODE_TXD)
+ mask |= 0x8; /* bias, lod or proj */
switch (tex->Texture) {
case TGSI_TEXTURE_1D:
mask &= 0x9;
break;
+ case TGSI_TEXTURE_SHADOW1D:
+ mask &= 0x5;
+ break;
case TGSI_TEXTURE_2D:
mask &= 0xb;
break;
@@ -1676,7 +1887,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
/* Indicate indirection by setting r->acc < 0 and
* use the index field to select the address reg.
*/
- r = MALLOC_STRUCT(nv50_reg);
+ r = reg_instance(pc, NULL);
swz = tgsi_util_get_src_register_swizzle(
&src->Indirect, 0);
ctor_reg(r, P_CONST,
@@ -1730,6 +1941,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
break;
}
+ if (r && r->acc >= 0 && r != temp)
+ return reg_instance(pc, r);
return r;
}
@@ -1785,6 +1998,8 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c)
case TGSI_OPCODE_LIT:
case TGSI_OPCODE_SCS:
case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
/* these take care of dangerous swizzles themselves */
return 0x0;
@@ -2187,11 +2402,19 @@ nv50_program_tx_insn(struct nv50_pc *pc,
break;
case TGSI_OPCODE_TEX:
emit_tex(pc, dst, mask, src[0], unit,
- inst->Texture.Texture, FALSE);
+ inst->Texture.Texture, FALSE, 0);
+ break;
+ case TGSI_OPCODE_TXB:
+ emit_tex(pc, dst, mask, src[0], unit,
+ inst->Texture.Texture, FALSE, -1);
+ break;
+ case TGSI_OPCODE_TXL:
+ emit_tex(pc, dst, mask, src[0], unit,
+ inst->Texture.Texture, FALSE, 1);
break;
case TGSI_OPCODE_TXP:
emit_tex(pc, dst, mask, src[0], unit,
- inst->Texture.Texture, TRUE);
+ inst->Texture.Texture, TRUE, 0);
break;
case TGSI_OPCODE_TRUNC:
for (c = 0; c < 4; c++) {
@@ -2245,20 +2468,9 @@ nv50_program_tx_insn(struct nv50_pc *pc,
}
}
- for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- for (c = 0; c < 4; c++) {
- if (!src[i][c])
- continue;
- src[i][c]->mod = 0;
- if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
- FREE(src[i][c]);
- else
- if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST)
- FREE(src[i][c]); /* indirect constant */
- }
- }
-
kill_temp_temp(pc);
+ pc->reg_instance_nr = 0;
+
return TRUE;
}
@@ -2541,10 +2753,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
const struct tgsi_full_immediate *imm =
&tp.FullToken.FullImmediate;
- ctor_immd(pc, imm->u[0].Float,
- imm->u[1].Float,
- imm->u[2].Float,
- imm->u[3].Float);
+ ctor_immd_4f32(pc, imm->u[0].Float,
+ imm->u[1].Float,
+ imm->u[2].Float,
+ imm->u[3].Float);
}
break;
case TGSI_TOKEN_TYPE_DECLARATION:
@@ -3024,7 +3236,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
}
static void
-nv50_program_upload_data(struct nv50_context *nv50, float *map,
+nv50_program_upload_data(struct nv50_context *nv50, uint32_t *map,
unsigned start, unsigned count, unsigned cbuf)
{
struct nouveau_channel *chan = nv50->screen->base.channel;
@@ -3072,8 +3284,8 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
if (p->param_nr) {
unsigned cb;
- float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
- PIPE_BUFFER_USAGE_CPU_READ);
+ uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
+ PIPE_BUFFER_USAGE_CPU_READ);
if (p->type == PIPE_SHADER_VERTEX)
cb = NV50_CB_PVP;
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 255c7c737ef..4a90c372ce3 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -37,7 +37,7 @@ struct nv50_program {
struct nouveau_bo *bo;
- float *immd;
+ uint32_t *immd;
unsigned immd_nr;
unsigned param_nr;
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 5305c93d59a..268c9823f7d 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -93,7 +93,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_query *q = nv50_query(pq);
- WAIT_RING (chan, 5);
+ MARK_RING (chan, 5, 2); /* flush on lack of space or relocs */
BEGIN_RING(chan, tesla, NV50TCL_QUERY_ADDRESS_HIGH, 4);
OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index e1b2f11239a..d443ca3ad06 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -76,6 +76,7 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
case PIPE_FORMAT_DXT3_RGBA:
case PIPE_FORMAT_DXT5_RGBA:
case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_R16G16B16A16_SNORM:
case PIPE_FORMAT_R16G16B16A16_UNORM:
@@ -97,6 +98,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
switch (param) {
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return 32;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 32;
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 64;
case PIPE_CAP_NPOT_TEXTURES:
return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
@@ -122,8 +127,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
return 1;
- case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return 0;
case PIPE_CAP_TGSI_CONT_SUPPORTED:
return 0;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
@@ -315,6 +318,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_method(so, screen->tesla, 0x1400, 1);
so_data (so, 0xf);
+ /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */
+ so_method(so, screen->tesla, 0x13b4, 1);
+ so_data (so, 0x54);
so_method(so, screen->tesla, 0x13bc, 1);
so_data (so, 0x54);
/* origin is top left (set to 1 for bottom left) */
@@ -387,7 +393,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x00000131 | (NV50_CB_PFP << 12));
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tic);
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+ &screen->tic);
if (ret) {
nv50_screen_destroy(pscreen);
return NULL;
@@ -398,9 +405,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, 0x000007ff);
+ so_data (so, PIPE_SHADER_TYPES * 32 - 1);
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tsc);
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+ &screen->tsc);
if (ret) {
nv50_screen_destroy(pscreen);
return NULL;
@@ -411,7 +419,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, 0x00000000);
+ so_data (so, 0x00000000); /* ignored if TSC_LINKED (0x1234) = 1 */
/* Vertex array limits - max them out */
@@ -425,6 +433,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_data (so, fui(0.0));
so_data (so, fui(1.0));
+ /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */
so_method(so, screen->tesla, 0x1234, 1);
so_data (so, 1);
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 07318f23947..88aef52d08c 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -196,8 +196,9 @@ nv50_sampler_state_create(struct pipe_context *pipe,
}
if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- tsc[0] |= (1 << 8);
- tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7);
+ /* XXX: must be deactivated for non-shadow textures */
+ tsc[0] |= (1 << 9);
+ tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
}
limit = CLAMP(cso->lod_bias, -16.0, 15.0);
@@ -215,41 +216,66 @@ nv50_sampler_state_create(struct pipe_context *pipe,
return (void *)sso;
}
-static void
-nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+static INLINE void
+nv50_sampler_state_bind(struct pipe_context *pipe, unsigned type,
+ unsigned nr, void **sampler)
{
struct nv50_context *nv50 = nv50_context(pipe);
- int i;
- nv50->sampler_nr = nr;
- for (i = 0; i < nv50->sampler_nr; i++)
- nv50->sampler[i] = sampler[i];
+ memcpy(nv50->sampler[type], sampler, nr * sizeof(void *));
+ nv50->sampler_nr[type] = nr;
nv50->dirty |= NV50_NEW_SAMPLER;
}
static void
+nv50_vp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nv50_sampler_state_bind(pipe, PIPE_SHADER_VERTEX, nr, s);
+}
+
+static void
+nv50_fp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nv50_sampler_state_bind(pipe, PIPE_SHADER_FRAGMENT, nr, s);
+}
+
+static void
nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
{
FREE(hwcso);
}
-static void
-nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
- struct pipe_texture **pt)
+static INLINE void
+nv50_set_sampler_texture(struct pipe_context *pipe, unsigned type,
+ unsigned nr, struct pipe_texture **pt)
{
struct nv50_context *nv50 = nv50_context(pipe);
- int i;
+ unsigned i;
for (i = 0; i < nr; i++)
- pipe_texture_reference((void *)&nv50->miptree[i], pt[i]);
- for (i = nr; i < nv50->miptree_nr; i++)
- pipe_texture_reference((void *)&nv50->miptree[i], NULL);
+ pipe_texture_reference((void *)&nv50->miptree[type][i], pt[i]);
+ for (i = nr; i < nv50->miptree_nr[type]; i++)
+ pipe_texture_reference((void *)&nv50->miptree[type][i], NULL);
- nv50->miptree_nr = nr;
+ nv50->miptree_nr[type] = nr;
nv50->dirty |= NV50_NEW_TEXTURE;
}
+static void
+nv50_set_vp_sampler_textures(struct pipe_context *pipe,
+ unsigned nr, struct pipe_texture **pt)
+{
+ nv50_set_sampler_texture(pipe, PIPE_SHADER_VERTEX, nr, pt);
+}
+
+static void
+nv50_set_fp_sampler_textures(struct pipe_context *pipe,
+ unsigned nr, struct pipe_texture **pt)
+{
+ nv50_set_sampler_texture(pipe, PIPE_SHADER_FRAGMENT, nr, pt);
+}
+
static void *
nv50_rasterizer_state_create(struct pipe_context *pipe,
const struct pipe_rasterizer_state *cso)
@@ -648,9 +674,11 @@ nv50_init_state_functions(struct nv50_context *nv50)
nv50->pipe.delete_blend_state = nv50_blend_state_delete;
nv50->pipe.create_sampler_state = nv50_sampler_state_create;
- nv50->pipe.bind_fragment_sampler_states = nv50_sampler_state_bind;
nv50->pipe.delete_sampler_state = nv50_sampler_state_delete;
- nv50->pipe.set_fragment_sampler_textures = nv50_set_sampler_texture;
+ nv50->pipe.bind_fragment_sampler_states = nv50_fp_sampler_state_bind;
+ nv50->pipe.bind_vertex_sampler_states = nv50_vp_sampler_state_bind;
+ nv50->pipe.set_fragment_sampler_textures = nv50_set_fp_sampler_textures;
+ nv50->pipe.set_vertex_sampler_textures = nv50_set_vp_sampler_textures;
nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create;
nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index c871acaab8d..871e8097b65 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -156,6 +156,30 @@ nv50_state_validate_fb(struct nv50_context *nv50)
}
static void
+nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so,
+ unsigned p)
+{
+ struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+ unsigned i, j, dw = nv50->sampler_nr[p] * 8;
+
+ if (!dw)
+ return;
+ nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM,
+ p * (32 * 8 * 4), dw * 4);
+
+ so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), dw);
+
+ for (i = 0; i < nv50->sampler_nr[p]; ++i) {
+ if (nv50->sampler[p][i])
+ so_datap(so, nv50->sampler[p][i]->tsc, 8);
+ else {
+ for (j = 0; j < 8; ++j) /* you get punished */
+ so_data(so, 0); /* ... for leaving holes */
+ }
+ }
+}
+
+static void
nv50_state_emit(struct nv50_context *nv50)
{
struct nv50_screen *screen = nv50->screen;
@@ -246,7 +270,6 @@ boolean
nv50_state_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_grobj *eng2d = nv50->screen->eng2d;
struct nouveau_stateobj *so;
unsigned i;
@@ -369,22 +392,16 @@ scissor_uptodate:
viewport_uptodate:
if (nv50->dirty & NV50_NEW_SAMPLER) {
- unsigned i;
+ unsigned nr = 0;
- so = so_new(nv50->sampler_nr * 9 + 23 + 4, 2);
+ for (i = 0; i < PIPE_SHADER_TYPES; ++i)
+ nr += nv50->sampler_nr[i];
- nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM,
- nv50->sampler_nr * 8 * 4);
+ so = so_new(nr * 8 + 24 * PIPE_SHADER_TYPES + 2, 4);
- for (i = 0; i < nv50->sampler_nr; i++) {
- if (!nv50->sampler[i])
- continue;
- so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
- so_datap (so, nv50->sampler[i]->tsc, 8);
- }
+ nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
+ nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
- so_method(so, tesla, 0x1440, 1); /* sync SIFC */
- so_data (so, 0);
so_method(so, tesla, 0x1334, 1); /* flush TSC */
so_data (so, 0);
@@ -407,10 +424,13 @@ viewport_uptodate:
void nv50_so_init_sifc(struct nv50_context *nv50,
struct nouveau_stateobj *so,
- struct nouveau_bo *bo, unsigned reloc, unsigned size)
+ struct nouveau_bo *bo, unsigned reloc,
+ unsigned offset, unsigned size)
{
struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+ reloc |= NOUVEAU_BO_WR;
+
so_method(so, eng2d, NV50_2D_DST_FORMAT, 2);
so_data (so, NV50_2D_DST_FORMAT_R8_UNORM);
so_data (so, 1);
@@ -418,8 +438,8 @@ void nv50_so_init_sifc(struct nv50_context *nv50,
so_data (so, 262144);
so_data (so, 65536);
so_data (so, 1);
- so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_LOW, 0, 0);
+ so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0);
so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2);
so_data (so, 0);
so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM);
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 6bf6f773b0c..79655fc08d5 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -62,6 +62,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
return 1;
if (!bo->tile_flags) {
+ MARK_RING (chan, 9, 2); /* flush on lack of space or relocs */
BEGIN_RING(chan, eng2d, mthd, 2);
OUT_RING (chan, format);
OUT_RING (chan, 1);
@@ -72,6 +73,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
OUT_RELOCh(chan, bo, ps->offset, flags);
OUT_RELOCl(chan, bo, ps->offset, flags);
} else {
+ MARK_RING (chan, 11, 2); /* flush on lack of space or relocs */
BEGIN_RING(chan, eng2d, mthd, 5);
OUT_RING (chan, format);
OUT_RING (chan, 0);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 55a601deb8c..c4ca096d6ac 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -70,6 +70,7 @@ static const struct nv50_texture_format nv50_tex_format_list[] =
_(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5),
_MIXED(Z24S8_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8),
+ _MIXED(S8Z24_UNORM, UNORM, UINT, UINT, UINT, C0, C0, C0, ONE, 8_24),
_(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16),
_(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16),
@@ -87,7 +88,7 @@ static const struct nv50_texture_format nv50_tex_format_list[] =
static int
nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
- struct nv50_miptree *mt, int unit)
+ struct nv50_miptree *mt, int unit, unsigned p)
{
unsigned i;
uint32_t mode;
@@ -99,7 +100,7 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
if (i == NV50_TEX_FORMAT_LIST_SIZE)
return 1;
- if (nv50->sampler[unit]->normalized)
+ if (nv50->sampler[p][unit]->normalized)
mode = 0x50001000 | (1 << 31);
else {
mode = 0x50001000 | (7 << 14);
@@ -146,48 +147,78 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
return 0;
}
-void
-nv50_tex_validate(struct nv50_context *nv50)
+#ifndef NV50TCL_BIND_TIC
+#define NV50TCL_BIND_TIC(n) (0x1448 + 8 * n)
+#endif
+
+static boolean
+nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so,
+ unsigned p)
{
+ static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2 };
+
struct nouveau_grobj *eng2d = nv50->screen->eng2d;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so;
- unsigned i, unit, push;
-
- push = MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2 + 23 + 6;
- so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr * 2 + 2);
+ unsigned unit, j, p_hw = p_remap[p];
nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM,
- nv50->miptree_nr * 8 * 4);
+ p * (32 * 8 * 4), nv50->miptree_nr[p] * 8 * 4);
- for (i = 0, unit = 0; unit < nv50->miptree_nr; ++unit) {
- struct nv50_miptree *mt = nv50->miptree[unit];
-
- if (!mt)
- continue;
+ for (unit = 0; unit < nv50->miptree_nr[p]; ++unit) {
+ struct nv50_miptree *mt = nv50->miptree[p][unit];
so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
- if (nv50_tex_construct(nv50, so, mt, unit)) {
- NOUVEAU_ERR("failed tex validate\n");
- so_ref(NULL, &so);
- return;
+ if (mt) {
+ if (nv50_tex_construct(nv50, so, mt, unit, p))
+ return FALSE;
+ /* Set TEX insn $t src binding $unit in program type p
+ * to TIC, TSC entry (32 * p + unit), mark valid (1).
+ */
+ so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+ so_data (so, ((32 * p + unit) << 9) | (unit << 1) | 1);
+ } else {
+ for (j = 0; j < 8; ++j)
+ so_data(so, 0);
+ so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+ so_data (so, (unit << 1) | 0);
}
+ }
+
+ for (; unit < nv50->state.miptree_nr[p]; unit++) {
+ /* Make other bindings invalid. */
+ so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+ so_data (so, (unit << 1) | 0);
+ }
+
+ nv50->state.miptree_nr[p] = nv50->miptree_nr[p];
+ return TRUE;
+}
- so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
- so_data (so, (i++ << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) |
- (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) |
- NV50TCL_SET_SAMPLER_TEX_VALID);
+void
+nv50_tex_validate(struct nv50_context *nv50)
+{
+ struct nouveau_stateobj *so;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ unsigned p, push, nrlc;
+
+ for (nrlc = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) {
+ push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]);
+ nrlc += nv50->miptree_nr[p];
}
+ push = push * 11 + 23 * PIPE_SHADER_TYPES + 4;
+ nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES;
+
+ so = so_new(push, nrlc);
+
+ if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE ||
+ nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) {
+ so_ref(NULL, &so);
- for (; unit < nv50->state.miptree_nr; unit++) {
- so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
- so_data (so,
- (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0);
+ NOUVEAU_ERR("failed tex validate\n");
+ return;
}
/* not sure if the following really do what I think: */
- so_method(so, tesla, 0x1440, 1); /* sync SIFC */
- so_data (so, 0);
so_method(so, tesla, 0x1330, 1); /* flush TIC */
so_data (so, 0);
so_method(so, tesla, 0x1338, 1); /* flush texture caches */
@@ -195,6 +226,4 @@ nv50_tex_validate(struct nv50_context *nv50)
so_ref(so, &nv50->state.tic_upload);
so_ref(NULL, &so);
- nv50->state.miptree_nr = nv50->miptree_nr;
}
-
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
index d531e611327..b870302019a 100644
--- a/src/gallium/drivers/nv50/nv50_texture.h
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -82,6 +82,7 @@
#define NV50TIC_0_0_FMT_RGTC1 0x00000027
#define NV50TIC_0_0_FMT_RGTC2 0x00000028
#define NV50TIC_0_0_FMT_24_8 0x00000029
+#define NV50TIC_0_0_FMT_8_24 0x0000002a
#define NV50TIC_0_0_FMT_32_DEPTH 0x0000002f
#define NV50TIC_0_0_FMT_32_8 0x00000030
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 39d65279fc0..6240a0c757a 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -16,16 +16,19 @@ struct nv50_transfer {
int level_depth;
int level_x;
int level_y;
+ int level_z;
+ unsigned nblocksx;
+ unsigned nblocksy;
};
static void
nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
struct nouveau_bo *src_bo, unsigned src_offset,
int src_pitch, unsigned src_tile_mode,
- int sx, int sy, int sw, int sh, int sd,
+ int sx, int sy, int sz, int sw, int sh, int sd,
struct nouveau_bo *dst_bo, unsigned dst_offset,
int dst_pitch, unsigned dst_tile_mode,
- int dx, int dy, int dw, int dh, int dd,
+ int dx, int dy, int dz, int dw, int dh, int dd,
int cpp, int width, int height,
unsigned src_reloc, unsigned dst_reloc)
{
@@ -54,7 +57,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
OUT_RING (chan, sw * cpp);
OUT_RING (chan, sh);
OUT_RING (chan, sd);
- OUT_RING (chan, 0);
+ OUT_RING (chan, sz); /* copying only 1 zslice per call */
}
if (!dst_bo->tile_flags) {
@@ -73,13 +76,13 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
OUT_RING (chan, dw * cpp);
OUT_RING (chan, dh);
OUT_RING (chan, dd);
- OUT_RING (chan, 0);
+ OUT_RING (chan, dz); /* copying only 1 zslice per call */
}
while (height) {
int line_count = height > 2047 ? 2047 : height;
- WAIT_RING (chan, 15);
+ MARK_RING (chan, 15, 4); /* flush on lack of space or relocs */
BEGIN_RING(chan, m2mf,
NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH, 2);
OUT_RELOCh(chan, src_bo, src_offset, src_reloc);
@@ -116,20 +119,6 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
}
}
-static INLINE unsigned
-get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned ny)
-{
- unsigned tile_h = get_tile_height(tile_mode);
- unsigned tile_d = get_tile_depth(tile_mode);
-
- /* pitch_2d == to next slice within this volume-tile */
- /* pitch_3d == to next slice in next 2D array of blocks */
- unsigned pitch_2d = tile_h * 64;
- unsigned pitch_3d = tile_d * align(ny, tile_h) * pitch;
-
- return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
-}
-
static struct pipe_transfer *
nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
unsigned face, unsigned level, unsigned zslice,
@@ -151,20 +140,11 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
pipe_texture_reference(&tx->base.texture, pt);
- tx->base.format = pt->format;
+ tx->nblocksx = pf_get_nblocksx(pt->format, u_minify(pt->width0, level));
+ tx->nblocksy = pf_get_nblocksy(pt->format, u_minify(pt->height0, level));
tx->base.width = w;
tx->base.height = h;
- tx->base.block = pt->block;
- if (!pt->nblocksx[level]) {
- tx->base.nblocksx = pf_get_nblocksx(&pt->block,
- u_minify(pt->width0, level));
- tx->base.nblocksy = pf_get_nblocksy(&pt->block,
- u_minify(pt->height0, level));
- } else {
- tx->base.nblocksx = pt->nblocksx[level];
- tx->base.nblocksy = pt->nblocksy[level];
- }
- tx->base.stride = tx->base.nblocksx * pt->block.size;
+ tx->base.stride = tx->nblocksx * pf_get_blocksize(pt->format);
tx->base.usage = usage;
tx->level_pitch = lvl->pitch;
@@ -173,34 +153,30 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->level_depth = u_minify(mt->base.base.depth0, level);
tx->level_offset = lvl->image_offset[image];
tx->level_tiling = lvl->tile_mode;
- tx->level_x = pf_get_nblocksx(&tx->base.block, x);
- tx->level_y = pf_get_nblocksy(&tx->base.block, y);
+ tx->level_z = zslice;
+ tx->level_x = pf_get_nblocksx(pt->format, x);
+ tx->level_y = pf_get_nblocksy(pt->format, y);
ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
- tx->base.nblocksy * tx->base.stride, &tx->bo);
+ tx->nblocksy * tx->base.stride, &tx->bo);
if (ret) {
FREE(tx);
return NULL;
}
- if (pt->target == PIPE_TEXTURE_3D)
- tx->level_offset += get_zslice_offset(lvl->tile_mode, zslice,
- lvl->pitch,
- tx->base.nblocksy);
-
if (usage & PIPE_TRANSFER_READ) {
- nx = pf_get_nblocksx(&tx->base.block, tx->base.width);
- ny = pf_get_nblocksy(&tx->base.block, tx->base.height);
+ nx = pf_get_nblocksx(pt->format, tx->base.width);
+ ny = pf_get_nblocksy(pt->format, tx->base.height);
nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset,
tx->level_pitch, tx->level_tiling,
- x, y,
- tx->base.nblocksx, tx->base.nblocksy,
+ x, y, zslice,
+ tx->nblocksx, tx->nblocksy,
tx->level_depth,
tx->bo, 0,
tx->base.stride, tx->bo->tile_mode,
- 0, 0,
- tx->base.nblocksx, tx->base.nblocksy, 1,
- tx->base.block.size, nx, ny,
+ 0, 0, 0,
+ tx->nblocksx, tx->nblocksy, 1,
+ pf_get_blocksize(pt->format), nx, ny,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
NOUVEAU_BO_GART);
}
@@ -213,23 +189,24 @@ nv50_transfer_del(struct pipe_transfer *ptx)
{
struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
struct nv50_miptree *mt = nv50_miptree(ptx->texture);
+ struct pipe_texture *pt = ptx->texture;
- unsigned nx = pf_get_nblocksx(&tx->base.block, tx->base.width);
- unsigned ny = pf_get_nblocksy(&tx->base.block, tx->base.height);
+ unsigned nx = pf_get_nblocksx(pt->format, tx->base.width);
+ unsigned ny = pf_get_nblocksy(pt->format, tx->base.height);
if (ptx->usage & PIPE_TRANSFER_WRITE) {
- struct pipe_screen *pscreen = ptx->texture->screen;
+ struct pipe_screen *pscreen = pt->screen;
nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
tx->base.stride, tx->bo->tile_mode,
- 0, 0,
- tx->base.nblocksx, tx->base.nblocksy, 1,
+ 0, 0, 0,
+ tx->nblocksx, tx->nblocksy, 1,
mt->base.bo, tx->level_offset,
tx->level_pitch, tx->level_tiling,
- tx->level_x, tx->level_y,
- tx->base.nblocksx, tx->base.nblocksy,
+ tx->level_x, tx->level_y, tx->level_z,
+ tx->nblocksx, tx->nblocksy,
tx->level_depth,
- tx->base.block.size, nx, ny,
+ pf_get_blocksize(pt->format), nx, ny,
NOUVEAU_BO_GART, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART);
}
@@ -288,7 +265,7 @@ nv50_upload_sifc(struct nv50_context *nv50,
reloc |= NOUVEAU_BO_WR;
- WAIT_RING (chan, 32);
+ MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */
if (bo->tile_flags) {
BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5);