From 8e572998fc28c1daacca9d9835a71e94a62c6c6c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 21 Jan 2011 16:23:44 +0100 Subject: nvc0: accept neg abs modifiers on lg2 --- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index db8055d91cd..e3097c95694 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -446,7 +446,7 @@ emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op) pc->emit[0] |= op << 26; - if (op >= 4) { + if (op >= 3) { if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9; if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7; } else { -- cgit v1.2.3 From c18aa3c73f11c80a1f92f99d4a697900945903b8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 21 Jan 2011 16:27:31 +0100 Subject: nvc0: commute sources of SET too if beneficial --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 2 +- src/gallium/drivers/nvc0/nvc0_pc_print.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index acc72bff14c..404b4dccbb6 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -240,7 +240,7 @@ check_swap_src_0_1(struct nv_instruction *nvi) struct nv_ref *src0 = nvi->src[0]; struct nv_ref *src1 = nvi->src[1]; - if (!nv_op_commutative(nvi->opcode)) + if (!nv_op_commutative(nvi->opcode) && NV_BASEOP(nvi->opcode) != NV_OP_SET) return; assert(src0 && src1 && src0->value && src1->value); diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index b03826484e4..6c71abee69f 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -369,7 +369,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, -- cgit v1.2.3 From 49f16c96f150b192bfd6828ae4ba03afe3a7b8f3 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 21 Jan 2011 16:46:36 +0100 Subject: nvc0: don't apply base vertex to per-instance arrays --- src/gallium/drivers/nvc0/nvc0_push.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index 74c3451c19a..fcbb7da41a3 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -217,6 +217,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) struct push_context ctx; unsigned i, index_size; unsigned inst = info->instance_count; + boolean apply_bias = info->indexed && info->index_bias; ctx.chan = nvc0->screen->base.channel; ctx.translate = nvc0->vertex->translate; @@ -230,7 +231,8 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) data = nvc0_resource_map_offset(nvc0, res, vb->buffer_offset, NOUVEAU_BO_RD); - if (info->indexed) + + if (apply_bias && likely(!(nvc0->vertex->instance_bufs & (1 << i)))) data += info->index_bias * vb->stride; ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); -- cgit v1.2.3 From bf1df06773d6eca8b71a687f838edccd1a6c9cb8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 21 Jan 2011 16:52:17 +0100 Subject: nvc0: add MARK_RING where missing to avoid too many relocs errors --- src/gallium/drivers/nvc0/nvc0_context.c | 6 ++++-- src/gallium/drivers/nvc0/nvc0_fence.c | 1 + src/gallium/drivers/nvc0/nvc0_query.c | 2 ++ src/gallium/drivers/nvc0/nvc0_screen.c | 1 + src/gallium/drivers/nvc0/nvc0_state_validate.c | 11 +++++++++-- src/gallium/drivers/nvc0/nvc0_vbo.c | 2 ++ 6 files changed, 19 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 2118abb5d5d..1ebf9e2bafb 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -148,12 +148,14 @@ nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) { struct resident *rsd; struct util_dynarray *array; - unsigned ctx, i; + unsigned ctx, i, n; for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) { array = &nvc0->residents[ctx]; - for (i = 0; i < array->size / sizeof(struct resident); ++i) { + n = array->size / sizeof(struct resident); + MARK_RING(nvc0->screen->base.channel, n, n); + for (i = 0; i < n; ++i) { rsd = util_dynarray_element(array, struct resident, i); nvc0_resource_validate(rsd->res, rsd->flags); diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c index 9d2c48cf14d..3a3dd75c152 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.c +++ b/src/gallium/drivers/nvc0/nvc0_fence.c @@ -55,6 +55,7 @@ nvc0_fence_emit(struct nvc0_fence *fence) assert(fence->state == NVC0_FENCE_STATE_AVAILABLE); + MARK_RING (chan, 5, 2); BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c index cc83fbe771c..e5e43c0e7a5 100644 --- a/src/gallium/drivers/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nvc0/nvc0_query.c @@ -312,6 +312,7 @@ nvc0_render_condition(struct pipe_context *pipe, if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + MARK_RING (chan, 5, 2); BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4); OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); @@ -319,6 +320,7 @@ nvc0_render_condition(struct pipe_context *pipe, OUT_RING (chan, 0x00001001); } + MARK_RING (chan, 4, 2); BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3); OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index f608b32e1cb..68f3867fd0e 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -628,6 +628,7 @@ nvc0_screen_make_buffers_resident(struct nvc0_screen *screen) const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + MARK_RING(chan, 5, 5); nouveau_bo_validate(chan, screen->text, flags); nouveau_bo_validate(chan, screen->uniforms, flags); nouveau_bo_validate(chan, screen->txc, flags); diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 25aec0244db..b41ca056b6a 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -25,6 +25,7 @@ nvc0_validate_zcull(struct nvc0_context *nvc0) else width = fb->width; + MARK_RING (chan, 23, 4); BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */ OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */ @@ -66,12 +67,14 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, fb->width << 16); OUT_RING (chan, fb->height << 16); + MARK_RING(chan, 9 * fb->nr_cbufs, 2 * fb->nr_cbufs); + for (i = 0; i < fb->nr_cbufs; ++i) { struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture); struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]); struct nouveau_bo *bo = mt->base.bo; uint32_t offset = sf->offset; - + BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8); OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); @@ -93,7 +96,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0) struct nouveau_bo *bo = mt->base.bo; int unk = mt->base.base.target == PIPE_TEXTURE_2D; uint32_t offset = sf->offset; - + + MARK_RING (chan, 12, 2); BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); @@ -238,6 +242,7 @@ nvc0_validate_clip(struct nvc0_context *nvc0) if (nvc0->clip.nr) { struct nouveau_bo *bo = nvc0->screen->uniforms; + MARK_RING (chan, 6 + nvc0->clip.nr * 4, 2); BEGIN_RING(chan, RING_3D(CB_SIZE), 3); OUT_RING (chan, 256); OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); @@ -340,6 +345,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); if (rebind) { + MARK_RING (chan, 4, 2); BEGIN_RING(chan, RING_3D(CB_SIZE), 3); OUT_RING (chan, align(res->base.width0, 0x100)); OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); @@ -357,6 +363,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) } nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1); + MARK_RING (chan, nr + 5, 2); BEGIN_RING(chan, RING_3D(CB_SIZE), 3); OUT_RING (chan, align(res->base.width0, 0x100)); OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index a51a887ed89..486909c1eb0 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -227,6 +227,7 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0) } offset = vb->buffer_offset + ve->src_offset; + MARK_RING (chan, 6, 4); BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); OUT_RING (chan, i); OUT_RESRCh(chan, buf, size - 1, NOUVEAU_BO_RD); @@ -292,6 +293,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) size = vb->buffer->width0; offset = ve->pipe.src_offset + vb->buffer_offset; + MARK_RING (chan, 8, 4); BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); OUT_RING (chan, (1 << 12) | vb->stride); BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); -- cgit v1.2.3 From 419ff10b0ebdeec06bd3466beda2a9e1a9d054d6 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 21 Jan 2011 17:04:25 +0100 Subject: nvc0: recognize r63 as zero in constant folding --- src/gallium/drivers/nvc0/nvc0_pc.c | 10 ++++++++-- src/gallium/drivers/nvc0/nvc0_pc.h | 2 +- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 10 ---------- 3 files changed, 9 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index 304a1919768..1d1b9e19b78 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -103,6 +103,12 @@ nvc0_pc_replace_value(struct nv_pc *pc, return n; } +static INLINE boolean +is_gpr63(struct nv_value *val) +{ + return (val->reg.file == NV_FILE_GPR && val->reg.id == 63); +} + struct nv_value * nvc0_pc_find_constant(struct nv_ref *ref) { @@ -116,7 +122,7 @@ nvc0_pc_find_constant(struct nv_ref *ref) assert(!src->insn->src[0]->mod); src = src->insn->src[0]->value; } - if ((src->reg.file == NV_FILE_IMM) || + if ((src->reg.file == NV_FILE_IMM) || is_gpr63(src) || (src->insn && src->insn->opcode == NV_OP_LD && src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && @@ -130,7 +136,7 @@ nvc0_pc_find_immediate(struct nv_ref *ref) { struct nv_value *src = nvc0_pc_find_constant(ref); - return (src && src->reg.file == NV_FILE_IMM) ? src : NULL; + return (src && (src->reg.file == NV_FILE_IMM || is_gpr63(src))) ? src : NULL; } static void diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 969cc68c596..01ca95b0741 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -310,7 +310,7 @@ struct nv_reg { int32_t s32; int64_t s64; uint64_t u64; - uint32_t u32; + uint32_t u32; /* expected to be 0 for $r63 */ float f32; double f64; } imm; diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 950bee2eda4..3709369ca2f 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1388,16 +1388,6 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, return nvi; } -/* -static boolean -bld_is_constant(struct nv_value *val) -{ - if (val->reg.file == NV_FILE_IMM) - return TRUE; - return val->insn && nvCG_find_constant(val->insn->src[0]); -} -*/ - static void bld_tex(struct bld_context *bld, struct nv_value *dst0[4], const struct tgsi_full_instruction *insn) -- cgit v1.2.3 From 005d186d6634abaeef348ca89c527bd5c34d0e87 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 21 Jan 2011 18:40:41 +0100 Subject: nvc0: don't omit highest bit of branch target Fixes negative relative branch offsets. --- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index e3097c95694..e4b243bda87 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -236,7 +236,7 @@ emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op) */ pc->emit[0] |= (pcrel & 0x3f) << 26; - pc->emit[1] |= (pcrel >> 6) & 0x1ffff; + pc->emit[1] |= (pcrel >> 6) & 0x3ffff; } } -- cgit v1.2.3 From f9bb1c8b3332d26596dd37063d0b9866bc40e63d Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 23 Jan 2011 13:05:44 +0100 Subject: nvc0: fix address and value slot assignment in load combining --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 404b4dccbb6..e0d4e2daf9b 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -700,6 +700,9 @@ struct pass_reld_elim { int alloc; }; +/* Extend the load operation in @rec to also cover the data loaded by @ld. + * The two loads may not overlap but reference adjacent memory locations. + */ static void combine_load(struct mem_record *rec, struct nv_instruction *ld) { @@ -716,7 +719,7 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld) return; rec->ofst = mem->reg.address; for (j = 0; j < d; ++j) - fv->def[d + j] = fv->def[j]; + fv->def[mem->reg.size / 4 + j] = fv->def[j]; d = 0; } else if ((size == 8 && rec->ofst & 3) || @@ -729,6 +732,7 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld) fv->def[d++]->insn = fv; } + fv->src[0]->value->reg.address = rec->ofst; fv->src[0]->value->reg.size = rec->size = size; nvc0_insn_delete(ld); @@ -793,6 +797,7 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) ((it->ofst >> 4) == (ofst >> 4)) && ((it->ofst + it->size == ofst) || (it->ofst - mem->reg.size == ofst))) { + /* only NV_OP_VFETCH can load exactly 12 bytes */ if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12) continue; if (it->ofst < ofst) { -- cgit v1.2.3 From 95eef7a7059c5323230badbf024f3af74a62a6cb Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 22 Jan 2011 13:59:47 +0100 Subject: nvc0: remove bad assert and emit TEMP movs instead --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 3709369ca2f..be1bb44931d 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -878,11 +878,10 @@ emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst, break; case TGSI_FILE_TEMPORARY: assert(idx < BLD_MAX_TEMPS); - if (!res->insn) + if (!res->insn || res->insn->bb != bld->pc->current_block) res = bld_insn_1(bld, NV_OP_MOV, res); assert(res->reg.file == NV_FILE_GPR); - assert(res->insn->bb = bld->pc->current_block); if (bld->ti->require_stores) bld_lmem_store(bld, ptr, idx * 4 + chan, res); -- cgit v1.2.3 From 835c4ea1053730c8eea98337c9da1b14fcff6b5e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 23 Jan 2011 13:09:10 +0100 Subject: nvc0: fix emit_cvt for ceil, floor and trunc --- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 33 ++++++++++++++++++++------------ src/gallium/drivers/nvc0/nvc0_pc_print.c | 2 +- 2 files changed, 22 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index e4b243bda87..88a59cfb518 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -629,25 +629,28 @@ emit_slct(struct nv_pc *pc, struct nv_instruction *i) static void emit_cvt(struct nv_pc *pc, struct nv_instruction *i) { + uint32_t rint; + pc->emit[0] = 0x00000004; pc->emit[1] = 0x10000000; - if (i->opcode != NV_OP_CVT) + /* if no type conversion specified, get type from opcode */ + if (i->opcode != NV_OP_CVT && i->ext.cvt.d == i->ext.cvt.s) i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode); switch (i->ext.cvt.d) { case NV_TYPE_F32: switch (i->ext.cvt.s) { case NV_TYPE_F32: pc->emit[1] = 0x10000000; break; - case NV_TYPE_S32: pc->emit[0] |= 0x200; + case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */ case NV_TYPE_U32: pc->emit[1] = 0x18000000; break; } break; - case NV_TYPE_S32: pc->emit[0] |= 0x80; + case NV_TYPE_S32: pc->emit[0] |= 0x80; /* fall through */ case NV_TYPE_U32: switch (i->ext.cvt.s) { case NV_TYPE_F32: pc->emit[1] = 0x14000000; break; - case NV_TYPE_S32: pc->emit[0] |= 0x200; + case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */ case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break; } break; @@ -656,14 +659,20 @@ emit_cvt(struct nv_pc *pc, struct nv_instruction *i) break; } - if (i->opcode == NV_OP_FLOOR) - pc->emit[1] |= 0x00020000; - else - if (i->opcode == NV_OP_CEIL) - pc->emit[1] |= 0x00040000; - else - if (i->opcode == NV_OP_TRUNC) - pc->emit[1] |= 0x00060000; + rint = (i->ext.cvt.d == NV_TYPE_F32) ? 1 << 7 : 0; + + if (i->opcode == NV_OP_FLOOR) { + pc->emit[0] |= rint; + pc->emit[1] |= 2 << 16; + } else + if (i->opcode == NV_OP_CEIL) { + pc->emit[0] |= rint; + pc->emit[1] |= 4 << 16; + } else + if (i->opcode == NV_OP_TRUNC) { + pc->emit[0] |= rint; + pc->emit[1] |= 6 << 16; + } if (i->saturate || i->opcode == NV_OP_SAT) pc->emit[0] |= 0x20; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 6c71abee69f..76dd0f57500 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -302,7 +302,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_TRUNC, "trunc", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, -- cgit v1.2.3 From a287a758c6567405a7ea10df21e586d1e2ff08ec Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 23 Jan 2011 21:29:30 +0100 Subject: nvc0: implement point coord replacement But we have to cheat and peek at the GENERIC semantic indices the state tracker uses for TEXn. Only outputs from 0x300 to 0x37c can be replaced, and so we have to know on shader compilation which ones to put there in order to keep doing separate shader objects properly. At some point I'll probably create a patch that makes gallium not force us to discard the information about what is a TexCoord. --- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 10 +++++---- src/gallium/drivers/nvc0/nvc0_program.c | 27 ++++++++++++++++++------ src/gallium/drivers/nvc0/nvc0_program.h | 4 +++- src/gallium/drivers/nvc0/nvc0_state_validate.c | 29 ++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 61932ff2b6a..af6526c8759 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -814,8 +814,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERTEX_BASE_LOW 0x000015f8 #define NVC0_3D_POINT_COORD_REPLACE 0x00001604 -#define NVC0_3D_POINT_COORD_REPLACE_BITS__MASK 0x00001fff -#define NVC0_3D_POINT_COORD_REPLACE_BITS__SHIFT 0 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__MASK 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__SHIFT 2 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT 0x00000000 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__MASK 0x000007f8 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__SHIFT 3 #define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608 @@ -864,8 +868,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000 #define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001 -#define NVC0_3D_POINT_SPRITE_CTRL 0x00001660 - #define NVC0_3D_TEX_MISC 0x00001664 #define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004 diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index aefaf7b98ad..613dc431bfd 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -185,8 +185,17 @@ nvc0_varying_location(unsigned sn, unsigned si) return 0x2e0; */ case TGSI_SEMANTIC_GENERIC: + /* We'd really like to distinguish between TEXCOORD and GENERIC here, + * since only 0x300 to 0x37c can be replaced by sprite coordinates. + * Also, gl_PointCoord should be a system value and must be assigned to + * address 0x2e0. For now, let's cheat: + */ assert(si < 31); - return 0x80 + (si * 16); + if (si <= 7) + return 0x300 + si * 16; + if (si == 9) + return 0x2e0; + return 0x80 + ((si - 8) * 16); case TGSI_SEMANTIC_NORMAL: return 0x360; case TGSI_SEMANTIC_PRIMID: @@ -256,12 +265,14 @@ prog_decl(struct nvc0_translation_info *ti, case TGSI_FILE_INPUT: for (i = first; i <= last; ++i) { if (ti->prog->type == PIPE_SHADER_VERTEX) { - sn = TGSI_SEMANTIC_GENERIC; - si = i; + for (c = 0; c < 4; ++c) + ti->input_loc[i][c] = 0x80 + i * 16 + c * 4; + } else { + for (c = 0; c < 4; ++c) + ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + /* for sprite coordinates: */ + ti->prog->fp.in_pos[i] = ti->input_loc[i][0] / 4; } - for (c = 0; c < 4; ++c) - ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; - if (ti->prog->type == PIPE_SHADER_FRAGMENT) ti->interp_mode[i] = nvc0_interp_mode(decl); } @@ -281,6 +292,8 @@ prog_decl(struct nvc0_translation_info *ti, } else { for (c = 0; c < 4; ++c) ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + /* for TFB_VARYING_LOCS: */ + ti->prog->vp.out_pos[i] = ti->output_loc[i][0] / 4; } } break; @@ -518,6 +531,8 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) if (!ti->input_access[i][c]) continue; a = ti->input_loc[i][c] / 2; + if (ti->input_loc[i][c] >= 0x2c0) + a -= 32; if ((a & ~7) == 0x70/2) fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */ else diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h index e6b210d1355..3450cec175d 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -21,16 +21,18 @@ struct nvc0_program { unsigned code_size; unsigned parm_size; - uint32_t hdr[20]; + uint32_t hdr[20]; /* TODO: move this into code to save space */ uint32_t flags[2]; struct { uint8_t edgeflag; uint8_t num_ucps; + uint8_t out_pos[PIPE_MAX_SHADER_OUTPUTS]; } vp; struct { uint8_t early_z; + uint8_t in_pos[PIPE_MAX_SHADER_INPUTS]; } fp; void *relocs; diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index b41ca056b6a..6419011132a 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -285,6 +285,34 @@ nvc0_validate_rasterizer(struct nvc0_context *nvc0) OUT_RINGp(chan, nvc0->rast->state, nvc0->rast->size); } +static void +nvc0_validate_sprite_coords(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + uint32_t reg; + + if (nvc0->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT) + reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT; + else + reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT; + + if (nvc0->rast->pipe.point_quad_rasterization) { + uint32_t en = nvc0->rast->pipe.sprite_coord_enable; + int i; + struct nvc0_program *prog = nvc0->fragprog; + + while (en) { + i = ffs(en) - 1; + en &= ~(1 << i); + if (prog->fp.in_pos[i] >= 0xc0 && prog->fp.in_pos[i] < 0xe0) + reg |= 8 << ((prog->fp.in_pos[i] - 0xc0) / 4); + } + } + + BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1); + OUT_RING (chan, reg); +} + static void nvc0_constbufs_validate(struct nvc0_context *nvc0) { @@ -404,6 +432,7 @@ static struct state_validate { { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, + { nvc0_validate_sprite_coords, NVC0_NEW_RASTERIZER | NVC0_NEW_FRAGPROG }, { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, { nvc0_validate_textures, NVC0_NEW_TEXTURES }, { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, -- cgit v1.2.3 From db4f6c7eeb7134c837b6832f60e973e818ec3977 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Mon, 24 Jan 2011 20:04:31 -0800 Subject: nvc0: Move declaration before code. Fixes nvc0 SCons build. --- src/gallium/drivers/nvc0/nvc0_mm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c index 0629dad19c9..516d2e31b55 100644 --- a/src/gallium/drivers/nvc0/nvc0_mm.c +++ b/src/gallium/drivers/nvc0/nvc0_mm.c @@ -96,13 +96,13 @@ mm_bucket_by_size(struct nvc0_mman *cache, unsigned size) static INLINE uint32_t mm_default_slab_size(unsigned chunk_order) { - assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); - static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] = { 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22 }; + assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); + return 1 << slab_order[chunk_order - MM_MIN_ORDER]; } -- cgit v1.2.3 From 7fd29468ec68b5cd08222428577a7dbe8f123426 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 29 Jan 2011 15:06:22 +0100 Subject: nvc0: enable PIPE_CAP_ARRAY_TEXTURES and fix them --- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 2 ++ src/gallium/drivers/nvc0/nvc0_screen.c | 2 ++ src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 11 ++++----- src/gallium/drivers/nvc0/nvc0_transfer.c | 38 ++++++++++++++++++++---------- 4 files changed, 34 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index 88a59cfb518..644b9ef61a1 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -393,6 +393,8 @@ emit_tex(struct nv_pc *pc, struct nv_instruction *i) { int src1 = i->tex_array + i->tex_dim + i->tex_cube; + assert(src1 < 6); + pc->emit[0] = 0x00000086; pc->emit[1] = 0x80000000; diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 68f3867fd0e..88daf31d46a 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -75,6 +75,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 10; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; + case PIPE_CAP_ARRAY_TEXTURES: + return 1; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_TEXTURE_SWIZZLE: diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index be1bb44931d..9b5d4290787 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1198,11 +1198,6 @@ describe_texture_target(unsigned target, int *dim, *dim = 2; *cube = 1; break; - /* - case TGSI_TEXTURE_CUBE_ARRAY: - *dim = 2; - *cube = *array = 1; - break; case TGSI_TEXTURE_1D_ARRAY: *dim = *array = 1; break; @@ -1210,6 +1205,7 @@ describe_texture_target(unsigned target, int *dim, *dim = 2; *array = 1; break; + /* case TGSI_TEXTURE_SHADOW1D_ARRAY: *dim = *array = *shadow = 1; break; @@ -1219,7 +1215,7 @@ describe_texture_target(unsigned target, int *dim, break; case TGSI_TEXTURE_CUBE_ARRAY: *dim = 2; - *array = *cube = 1; + *cube = *array = 1; break; */ default: @@ -1382,6 +1378,7 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, nvi->tex_dim = dim; nvi->tex_cube = cube; nvi->tex_shadow = shadow; + nvi->tex_array = array; nvi->tex_live = 0; return nvi; @@ -1402,7 +1399,7 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4], assert(dim + array + shadow + lodbias <= 5); - if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP) + if (!cube && !array && insn->Instruction.Opcode == TGSI_OPCODE_TXP) load_proj_tex_coords(bld, t, dim, shadow, insn); else { for (c = 0; c < dim + cube + array; ++c) diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index 286b382f58e..92e006cba49 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -10,7 +10,8 @@ struct nvc0_transfer { struct pipe_transfer base; struct nvc0_m2mf_rect rect[2]; uint32_t nblocksx; - uint32_t nblocksy; + uint16_t nblocksy; + uint16_t nlayers; }; static void @@ -242,23 +243,36 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, struct nvc0_miptree_level *lvl = &mt->level[level]; struct nvc0_transfer *tx; uint32_t size; - uint32_t w, h, d, z, layer; + uint32_t w, h, d, z, layer, box_h, box_y; int ret; + tx = CALLOC_STRUCT(nvc0_transfer); + if (!tx) + return NULL; + + box_y = box->y; + box_h = box->height; + if (mt->layout_3d) { z = box->z; d = u_minify(res->depth0, level); layer = 0; + tx->nlayers = box->depth; } else { z = 0; d = 1; - layer = box->z; + if (res->target == PIPE_TEXTURE_1D || + res->target == PIPE_TEXTURE_1D_ARRAY) { + box_y = 0; + box_h = 1; + layer = box->y; + tx->nlayers = box->height; + } else { + layer = box->z; + tx->nlayers = box->depth; + } } - tx = CALLOC_STRUCT(nvc0_transfer); - if (!tx) - return NULL; - pipe_resource_reference(&tx->base.resource, res); tx->base.level = level; @@ -266,7 +280,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->base.box = *box; tx->nblocksx = util_format_get_nblocksx(res->format, box->width); - tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + tx->nblocksy = util_format_get_nblocksy(res->format, box_h); tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); tx->base.layer_stride = tx->nblocksy * tx->base.stride; @@ -280,7 +294,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[0].base = lvl->offset + layer * mt->layer_stride; tx->rect[0].tile_mode = lvl->tile_mode; tx->rect[0].x = util_format_get_nblocksx(res->format, box->x); - tx->rect[0].y = util_format_get_nblocksy(res->format, box->y); + tx->rect[0].y = util_format_get_nblocksy(res->format, box_y); tx->rect[0].z = z; tx->rect[0].width = util_format_get_nblocksx(res->format, w); tx->rect[0].height = util_format_get_nblocksy(res->format, h); @@ -291,7 +305,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, size = tx->base.layer_stride; ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, - size * tx->base.box.depth, &tx->rect[1].bo); + size * tx->nlayers, &tx->rect[1].bo); if (ret) { FREE(tx); return NULL; @@ -305,7 +319,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, if (usage & PIPE_TRANSFER_READ) { unsigned i; - for (i = 0; i < box->depth; ++i) { + for (i = 0; i < tx->nlayers; ++i) { nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], tx->nblocksx, tx->nblocksy); if (mt->layout_3d) @@ -331,7 +345,7 @@ nvc0_miptree_transfer_del(struct pipe_context *pctx, unsigned i; if (tx->base.usage & PIPE_TRANSFER_WRITE) { - for (i = 0; i < tx->base.box.depth; ++i) { + for (i = 0; i < tx->nlayers; ++i) { nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1], tx->nblocksx, tx->nblocksy); if (mt->layout_3d) -- cgit v1.2.3 From f8a7a0b6f30ff38b2743860cbc4caeab102c2c29 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 30 Jan 2011 01:24:56 +0100 Subject: nvc0: implement transform feedback state --- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 20 +++++-- src/gallium/drivers/nvc0/nvc0_context.h | 9 ++++ src/gallium/drivers/nvc0/nvc0_shader_state.c | 58 +++++++++++++++++++- src/gallium/drivers/nvc0/nvc0_state.c | 73 ++++++++++++++++++++++++++ src/gallium/drivers/nvc0/nvc0_state_validate.c | 3 +- src/gallium/drivers/nvc0/nvc0_stateobj.h | 8 +-- src/gallium/drivers/nvc0/nvc0_vbo.c | 51 +----------------- 7 files changed, 163 insertions(+), 59 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index af6526c8759..1a34313912c 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -84,6 +84,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210 +#define NVC0_3D_MEM_BARRIER 0x0000021c +#define NVC0_3D_MEM_BARRIER_UNK0 0x00000001 +#define NVC0_3D_MEM_BARRIER_UNK1 0x00000002 +#define NVC0_3D_MEM_BARRIER_UNK2 0x00000004 +#define NVC0_3D_MEM_BARRIER_UNK4 0x00000010 +#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100 +#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000 + #define NVC0_3D_TESS_MODE 0x00000320 #define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f #define NVC0_3D_TESS_MODE_PRIM__SHIFT 0 @@ -122,11 +130,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0)) -#define NVC0_3D_TFB_UNK0700(i0) (0x00000700 + 0x10*(i0)) +#define NVC0_3D_TFB_UNK07X0(i0) (0x00000700 + 0x10*(i0)) +#define NVC0_3D_TFB_UNK07X0__ESIZE 0x00000010 +#define NVC0_3D_TFB_UNK07X0__LEN 0x00000004 #define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0)) +#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010 +#define NVC0_3D_TFB_VARYING_COUNT__LEN 0x00000004 #define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0)) +#define NVC0_3D_TFB_BUFFER_STRIDE__ESIZE 0x00000010 +#define NVC0_3D_TFB_BUFFER_STRIDE__LEN 0x00000004 #define NVC0_3D_TFB_ENABLE 0x00000744 @@ -1157,9 +1171,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600 -#define NVC0_3D_TFB_VARYING_LOCS(i0) (0x00002800 + 0x4*(i0)) +#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1)) #define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 -#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000080 +#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020 #define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808 diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 94117988e50..a082ad4575c 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -54,6 +54,8 @@ #define NVC0_NEW_CONSTBUF (1 << 18) #define NVC0_NEW_TEXTURES (1 << 19) #define NVC0_NEW_SAMPLERS (1 << 20) +#define NVC0_NEW_TFB (1 << 21) +#define NVC0_NEW_TFB_BUFFERS (1 << 22) #define NVC0_BUFCTX_CONSTANT 0 #define NVC0_BUFCTX_FRAME 1 @@ -123,6 +125,11 @@ struct nvc0_context { boolean vbo_dirty; boolean vbo_push_hint; + struct nvc0_transform_feedback_state *tfb; + struct pipe_resource *tfbbuf[4]; + unsigned num_tfbbufs; + unsigned tfb_offset[4]; + struct draw_context *draw; }; @@ -177,6 +184,8 @@ void nvc0_tevlprog_validate(struct nvc0_context *); void nvc0_gmtyprog_validate(struct nvc0_context *); void nvc0_fragprog_validate(struct nvc0_context *); +void nvc0_tfb_validate(struct nvc0_context *); + /* nvc0_state.c */ extern void nvc0_init_state_functions(struct nvc0_context *); diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index 981b5488d08..633641713dc 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -55,7 +55,7 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) prog->code_base + NVC0_SHADER_HEADER_SIZE, prog->code_size, prog->code); - BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x021c), 1); + BEGIN_RING(nvc0->screen->base.channel, RING_3D(MEM_BARRIER), 1); OUT_RING (nvc0->screen->base.channel, 0x1111); return TRUE; @@ -178,3 +178,59 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1); OUT_RING (chan, gp->max_gpr); } + +/* It's *is* kind of shader related. We need to inspect the program + * to get the output locations right. + */ +void +nvc0_tfb_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *vp; + struct nvc0_transform_feedback_state *tfb = nvc0->tfb; + int b; + + BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); + if (!tfb) { + OUT_RING(chan, 0); + return; + } + OUT_RING(chan, 1); + + vp = nvc0->vertprog ? nvc0->vertprog : nvc0->gmtyprog; + + for (b = 0; b < nvc0->num_tfbbufs; ++b) { + uint8_t idx, var[128]; + int i, n; + struct nvc0_resource *buf = nvc0_resource(nvc0->tfbbuf[b]); + + BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5); + OUT_RING (chan, 1); + OUT_RESRCh(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR); + OUT_RESRCl(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR); + OUT_RING (chan, buf->base.width0 - nvc0->tfb_offset[b]); + OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID <- offset ? */ + + if (!(nvc0->dirty & NVC0_NEW_TFB)) + continue; + + BEGIN_RING(chan, RING_3D(TFB_UNK07X0(b)), 3); + OUT_RING (chan, 0); + OUT_RING (chan, tfb->varying_count[b]); + OUT_RING (chan, tfb->stride[b]); + + n = b ? tfb->varying_count[b - 1] : 0; + i = 0; + for (; i < tfb->varying_count[b]; ++i) { + idx = tfb->varying_index[n + i]; + var[i] = vp->vp.out_pos[idx >> 2] + (idx & 3); + } + for (; i & 3; ++i) + var[i] = 0; + + BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4); + OUT_RINGp (chan, var, i / 4); + } + for (; b < 4; ++b) + IMMED_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 0); +} diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index c08f3693f5e..f6a7f824d58 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -808,6 +808,74 @@ nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso) nvc0->dirty |= NVC0_NEW_VERTEX; } +static void * +nvc0_tfb_state_create(struct pipe_context *pipe, + const struct pipe_stream_output_state *pso) +{ + struct nvc0_transform_feedback_state *so; + int n = 0; + int i, c, b; + + so = MALLOC(sizeof(*so) + pso->num_outputs * 4 * sizeof(uint8_t)); + if (!so) + return NULL; + + for (b = 0; b < 4; ++b) { + for (i = 0; i < pso->num_outputs; ++i) { + if (pso->output_buffer[i] != b) + continue; + for (c = 0; c < 4; ++c) { + if (!(pso->register_mask[i] & (1 << c))) + continue; + so->varying_count[b]++; + so->varying_index[n++] = (pso->register_index[i] << 2) | c; + } + } + so->stride[b] = so->varying_count[b] * 4; + } + if (pso->stride) + so->stride[0] = pso->stride; + + return so; +} + +static void +nvc0_tfb_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvc0_tfb_state_bind(struct pipe_context *pipe, void *hwcso) +{ + nvc0_context(pipe)->tfb = hwcso; + nvc0_context(pipe)->dirty |= NVC0_NEW_TFB; +} + +static void +nvc0_set_transform_feedback_buffers(struct pipe_context *pipe, + struct pipe_resource **buffers, + int *offsets, + int num_buffers) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + int i; + + assert(num_buffers >= 0 && num_buffers <= 4); /* why signed ? */ + + for (i = 0; i < num_buffers; ++i) { + assert(offsets[i] >= 0); + nvc0->tfb_offset[i] = offsets[i]; + pipe_resource_reference(&nvc0->tfbbuf[i], buffers[i]); + } + for (; i < nvc0->num_tfbbufs; ++i) + pipe_resource_reference(&nvc0->tfbbuf[i], NULL); + + nvc0->num_tfbbufs = num_buffers; + + nvc0->dirty |= NVC0_NEW_TFB_BUFFERS; +} + void nvc0_init_state_functions(struct nvc0_context *nvc0) { @@ -861,5 +929,10 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) nvc0->pipe.set_vertex_buffers = nvc0_set_vertex_buffers; nvc0->pipe.set_index_buffer = nvc0_set_index_buffer; + + nvc0->pipe.create_stream_output_state = nvc0_tfb_state_create; + nvc0->pipe.delete_stream_output_state = nvc0_tfb_state_delete; + nvc0->pipe.bind_stream_output_state = nvc0_tfb_state_bind; + nvc0->pipe.set_stream_output_buffers = nvc0_set_transform_feedback_buffers; } diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 6419011132a..7406f6c7917 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -436,7 +436,8 @@ static struct state_validate { { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, { nvc0_validate_textures, NVC0_NEW_TEXTURES }, { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, - { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS } + { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, + { nvc0_tfb_validate, NVC0_NEW_TFB | NVC0_NEW_TFB_BUFFERS } }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index 6c8028aba13..752e927e2aa 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -69,14 +69,14 @@ struct nvc0_vertex_stateobj { uint32_t instance_bufs; unsigned vtx_size; unsigned vtx_per_packet_max; - struct nvc0_vertex_element element[1]; + struct nvc0_vertex_element element[0]; }; /* will have to lookup index -> location qualifier from nvc0_program */ -struct nvc0_tfb_state { - uint8_t varying_count[4]; +struct nvc0_transform_feedback_state { uint32_t stride[4]; - uint8_t varying_indices[1]; + uint8_t varying_count[4]; + uint8_t varying_index[0]; }; #endif diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 486909c1eb0..aa5decfc233 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -54,7 +54,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, assert(num_elements); so = MALLOC(sizeof(*so) + - (num_elements - 1) * sizeof(struct nvc0_vertex_element)); + num_elements * sizeof(struct nvc0_vertex_element)); if (!so) return NULL; so->num_elements = num_elements; @@ -351,55 +351,6 @@ nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) nvc0_bufctx_emit_relocs(nvc0); } -#if 0 -static struct nouveau_bo * -nvc0_tfb_setup(struct nvc0_context *nvc0) -{ - struct nouveau_channel *chan = nvc0->screen->base.channel; - struct nouveau_bo *tfb = NULL; - int ret, i; - - ret = nouveau_bo_new(nvc0->screen->base.device, - NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &tfb); - if (ret) - return NULL; - - ret = nouveau_bo_map(tfb, NOUVEAU_BO_WR); - if (ret) - return NULL; - memset(tfb->map, 0xee, 8 * 4 * 3); - nouveau_bo_unmap(tfb); - - BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(0)), 5); - OUT_RING (chan, 1); - OUT_RELOCh(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RELOCl(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RING (chan, tfb->size); - OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID(0) */ - BEGIN_RING(chan, RING_3D(TFB_UNK0700(0)), 3); - OUT_RING (chan, 0); - OUT_RING (chan, 8); /* TFB_VARYING_COUNT(0) */ - OUT_RING (chan, 32); /* TFB_BUFFER_STRIDE(0) */ - BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(0)), 2); - OUT_RING (chan, 0x1f1e1d1c); - OUT_RING (chan, 0xa3a2a1a0); - for (i = 1; i < 4; ++i) { - BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(i)), 1); - OUT_RING (chan, 0); - } - BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, RING_3D_(0x135c), 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, RING_3D_(0x135c), 1); - OUT_RING (chan, 0); - - return tfb; -} -#endif - static void nvc0_draw_arrays(struct nvc0_context *nvc0, unsigned mode, unsigned start, unsigned count, -- cgit v1.2.3 From f0d742962377948a9688f4fa3b92c2f8bbca03e9 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 1 Feb 2011 20:52:49 +0100 Subject: nvc0: detect no-op MIN/MAX, do CSE earlier to succeed more often --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 127 +++++++++++++++++----------- 1 file changed, 79 insertions(+), 48 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index e0d4e2daf9b..b6d99724a10 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -607,25 +607,83 @@ constant_operand(struct nv_pc *pc, } } +static void +handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi) +{ + struct nv_value *src0 = nvi->src[0]->value; + struct nv_value *src1 = nvi->src[1]->value; + + if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod)) + return; + if (src0->reg.file != NV_FILE_GPR) + return; + nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0); + nvc0_insn_delete(nvi); +} + +/* check if we can MUL + ADD -> MAD/FMA */ +static void +handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi) +{ + struct nv_value *src0 = nvi->src[0]->value; + struct nv_value *src1 = nvi->src[1]->value; + struct nv_value *src; + int s; + uint8_t mod[4]; + + if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0; + else + if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1; + else + return; + + if ((src0->insn && src0->insn->bb != nvi->bb) || + (src1->insn && src1->insn->bb != nvi->bb)) + return; + + /* check for immediates from prior constant folding */ + if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) + return; + src = nvi->src[s]->value; + + mod[0] = nvi->src[0]->mod; + mod[1] = nvi->src[1]->mod; + mod[2] = src->insn->src[0]->mod; + mod[3] = src->insn->src[1]->mod; + + if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) + return; + + nvi->opcode = NV_OP_MAD_F32; + + nv_reference(ctx->pc, nvi, s, NULL); + nvi->src[2] = nvi->src[!s]; + nvi->src[!s] = NULL; + + nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value); + nvi->src[0]->mod = mod[2] ^ mod[s]; + nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value); + nvi->src[1]->mod = mod[3]; +} + static int -nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) +nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_instruction *nvi, *next; int j; for (nvi = b->entry; nvi; nvi = next) { - struct nv_value *src0, *src1, *src; - int s; - uint8_t mod[4]; + struct nv_value *src0, *src1; + uint baseop = NV_BASEOP(nvi->opcode); next = nvi->next; src0 = nvc0_pc_find_immediate(nvi->src[0]); src1 = nvc0_pc_find_immediate(nvi->src[1]); - if (src0 && src1) + if (src0 && src1) { constant_expression(ctx->pc, nvi, src0, src1); - else { + } else { if (src0) constant_operand(ctx->pc, nvi, src0, 0); else @@ -633,44 +691,13 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) constant_operand(ctx->pc, nvi, src1, 1); } - /* check if we can MUL + ADD -> MAD/FMA */ - if (nvi->opcode != NV_OP_ADD) - continue; - - src0 = nvi->src[0]->value; - src1 = nvi->src[1]->value; - - if (SRC_IS_MUL(src0) && src0->refc == 1) - src = src0; - else - if (SRC_IS_MUL(src1) && src1->refc == 1) - src = src1; + if (baseop == NV_OP_MIN || baseop == NV_OP_MAX) + handle_min_max(ctx, nvi); else - continue; - - /* could have an immediate from above constant_* */ - if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) - continue; - s = (src == src0) ? 0 : 1; - - mod[0] = nvi->src[0]->mod; - mod[1] = nvi->src[1]->mod; - mod[2] = src->insn->src[0]->mod; - mod[3] = src->insn->src[0]->mod; - - if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) - continue; - - nvi->opcode = NV_OP_MAD; - nv_reference(ctx->pc, nvi, s, NULL); - nvi->src[2] = nvi->src[!s]; - - nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value); - nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value); - nvi->src[0]->mod = mod[2] ^ mod[s]; - nvi->src[1]->mod = mod[3]; + if (nvi->opcode == NV_OP_ADD_F32) + handle_add_mul(ctx, nvi); } - DESCEND_ARBITRARY(j, nv_pass_lower_arith); + DESCEND_ARBITRARY(j, nv_pass_algebraic_opt); return 0; } @@ -1158,11 +1185,17 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) pass.n = 0; pass.pc = pc; + /* Do CSE so we can just compare values by pointer in subsequent passes. */ + pc->pass_seq++; + ret = nv_pass_cse(&pass, root); + if (ret) + return ret; + /* Do this first, so we don't have to pay attention * to whether sources are supported memory loads. */ pc->pass_seq++; - ret = nv_pass_lower_arith(&pass, root); + ret = nv_pass_algebraic_opt(&pass, root); if (ret) return ret; @@ -1190,11 +1223,9 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) reldelim->pc = pc; } - pc->pass_seq++; - ret = nv_pass_cse(&pass, root); - if (ret) - return ret; - + /* May run DCE before load-combining since that pass will clean up + * after itself. + */ dce.pc = pc; do { dce.removed = 0; -- cgit v1.2.3 From d3ea15f5ca570b9d885781cd72232026e50046d0 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 2 Feb 2011 21:23:09 +0100 Subject: nvc0: don't combine memory loads across block boundaries --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index b6d99724a10..57bf4b77f3e 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -849,6 +849,11 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) } } + ctx->alloc = 0; + ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL; + for (s = 0; s < 16; ++s) + ctx->mem_c[s] = NULL; + DESCEND_ARBITRARY(s, nv_pass_mem_opt); return 0; } -- cgit v1.2.3 From c62fc50c884e2755c0731c395f200d23b975fbde Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 2 Feb 2011 21:38:07 +0100 Subject: nvc0: reset texture base address after read transfer --- src/gallium/drivers/nvc0/nvc0_transfer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index 92e006cba49..b279bdc6e7d 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -318,6 +318,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[1].domain = NOUVEAU_BO_GART; if (usage & PIPE_TRANSFER_READ) { + unsigned base = tx->rect[0].base; unsigned i; for (i = 0; i < tx->nlayers; ++i) { nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], @@ -328,9 +329,10 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[0].base += mt->layer_stride; tx->rect[1].base += size; } + tx->rect[0].z = z; + tx->rect[0].base = base; + tx->rect[1].base = 0; } - tx->rect[0].z = z; - tx->rect[1].base = 0; return &tx->base; } -- cgit v1.2.3 From 92d8af582d2584ed95bbb4c7965812f7bc47c9ff Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 18:59:46 +0100 Subject: nvc0: try to fix register conflicts for vector instructions Vector here means using multiple 32 bit regs which are forced to be consecutive in the register file. This still isn't quite nice. --- src/gallium/drivers/nvc0/nvc0_pc.h | 3 ++- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 23 +++++++++--------- src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 37 ++++++++++++++++++++--------- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 12 ++++------ 4 files changed, 43 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 01ca95b0741..0756288daf7 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -53,7 +53,8 @@ /** * BIND forces source operand i into the same register as destination operand i, - * and the operands will be assigned consecutive registers (needed for TEX) + * and the operands will be assigned consecutive registers (needed for TEX). + * Beware conflicts ! * SELECT forces its multiple source operands and its destination operand into * one and the same register. */ diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 57bf4b77f3e..a6791529fa7 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -1147,13 +1147,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy * neighbouring registers. CSE might have messed this up. + * Just generate a MOV for each source to avoid conflicts if they're used in + * multiple NV_OP_BIND at different positions. */ static int nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_value *val; struct nv_instruction *bnd, *nvi, *next; - int s, t; + int s; for (bnd = b->entry; bnd; bnd = next) { next = bnd->next; @@ -1161,20 +1163,17 @@ nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) continue; for (s = 0; s < 4 && bnd->src[s]; ++s) { val = bnd->src[s]->value; - for (t = s + 1; t < 4 && bnd->src[t]; ++t) { - if (bnd->src[t]->value != val) - continue; - nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); - nvi->def[0] = new_value_like(ctx->pc, val); - nvi->def[0]->insn = nvi; - nv_reference(ctx->pc, nvi, 0, val); - nvc0_insn_insert_before(bnd, nvi); - nv_reference(ctx->pc, bnd, t, nvi->def[0]); - } + nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); + nvi->def[0] = new_value_like(ctx->pc, val); + nvi->def[0]->insn = nvi; + nv_reference(ctx->pc, nvi, 0, val); + nv_reference(ctx->pc, bnd, s, nvi->def[0]); + + nvc0_insn_insert_before(bnd, nvi); } } - DESCEND_ARBITRARY(t, nv_pass_fix_bind); + DESCEND_ARBITRARY(s, nv_pass_fix_bind); return 0; } diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index d24f09a1507..ee282680061 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -477,7 +477,7 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) break; case NV_OP_MOV: if ((iter == 2) && i->src[0]->value->insn && - !nv_is_texture_op(i->src[0]->value->join->insn->opcode)) + !nv_is_vector_op(i->src[0]->value->join->insn->opcode)) try_join_values(ctx, i->def[0], i->src[0]->value); break; case NV_OP_SELECT: @@ -488,18 +488,16 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) do_join_values(ctx, i->def[0], i->src[c]->value); } break; - case NV_OP_TEX: - case NV_OP_TXB: - case NV_OP_TXL: - case NV_OP_TXQ: - /* on nvc0, TEX src and dst can differ */ - break; case NV_OP_BIND: if (iter) break; - for (c = 0; c < 6 && i->src[c]; ++c) + for (c = 0; c < 4 && i->src[c]; ++c) do_join_values(ctx, i->def[c], i->src[c]->value); break; + case NV_OP_TEX: + case NV_OP_TXB: + case NV_OP_TXL: + case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */ default: break; } @@ -730,6 +728,21 @@ nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set) set->pc = pc; } +/* We allocate registers for all defs of a vector instruction at once. + * Since we'll encounter all of them in the allocation loop, do the allocation + * when we're at the one with the live range that starts latest. + */ +static boolean +is_best_representative(struct nv_value *val) +{ + struct nv_instruction *nvi = val->insn; + int i; + for (i = 0; i < 4 && val->insn->def[i]; ++i) + if (nvi->def[i]->livei && nvi->def[i]->livei->bgn > val->livei->bgn) + return FALSE; + return TRUE; +} + static void insert_ordered_tail(struct nv_value *list, struct nv_value *nval) { @@ -821,11 +834,13 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) boolean mem = FALSE; int v = nvi_vector_size(cur->insn); - if (v > 1) - mem = !reg_assign(&f, &cur->insn->def[0], v); - else + if (v > 1) { + if (is_best_representative(cur)) + mem = !reg_assign(&f, &cur->insn->def[0], v); + } else { if (iter) mem = !reg_assign(&f, &cur, 1); + } if (mem) { NOUVEAU_ERR("out of registers\n"); diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 9b5d4290787..f53af6c49c3 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1333,10 +1333,6 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, if (array) arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]); - /* ensure that all inputs reside in a GPR */ - for (c = 0; c < dim + array + cube + shadow; ++c) - (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1; - /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */ bnd = new_instruction(bld->pc, NV_OP_BIND); @@ -1878,10 +1874,10 @@ bld_instruction(struct bld_context *bld, } for (c = 0; c < 4; ++c) - if ((mask & (1 << c)) && - ((dst0[c]->reg.file == NV_FILE_IMM) || - (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR))) - dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); + if (mask & (1 << c)) + if ((dst0[c]->reg.file == NV_FILE_IMM) || + (dst0[c]->reg.file == NV_FILE_GPR && dst0[c]->reg.id == 63)) + dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); c = 0; if ((mask & 0x3) == 0x3) { -- cgit v1.2.3 From 8f051345807494ae0aeaf75e698477f65f29322d Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 19:01:54 +0100 Subject: nvc0: set basic block on manual instruction insertion --- src/gallium/drivers/nvc0/nvc0_pc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index 1d1b9e19b78..a2006321021 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -518,6 +518,8 @@ nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni) ni->prev = at; ni->next->prev = ni; ni->prev->next = ni; + ni->bb = at->bb; + ni->bb->num_instructions++; } void -- cgit v1.2.3 From d5263e4093e7fefacbbe3bbbec717cdf64856cbe Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 19:03:09 +0100 Subject: nv50,nvc0: fix condition code change when commuting SET sources --- src/gallium/drivers/nv50/nv50_pc_optimize.c | 2 +- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 27eb3817bf1..679e5ea1485 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -299,7 +299,7 @@ check_swap_src_0_1(struct nv_instruction *nvi) } if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0) - nvi->set_cond = cc_swapped[nvi->set_cond]; + nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; } static int diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index a6791529fa7..2e554dbe4e4 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -258,8 +258,8 @@ check_swap_src_0_1(struct nv_instruction *nvi) } } - if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET) - nvi->set_cond = cc_swapped[nvi->set_cond]; + if (nvi->src[0] != src0 && NV_BASEOP(nvi->opcode) == NV_OP_SET) + nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; } static void -- cgit v1.2.3 From 8e240e6153e089d23f646c7b3f2c5edff7ac223c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 19:05:55 +0100 Subject: nvc0: store only one value per basic block for TGSI regs --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index f53af6c49c3..8c0967dfa87 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -63,7 +63,13 @@ bld_register_access(struct bld_register *reg, unsigned i) static INLINE void bld_register_add_val(struct bld_register *reg, struct nv_value *val) { - util_dynarray_append(®->vals, struct nv_value *, val); + struct nv_basic_block *bb = val->insn->bb; + + if (reg->vals.size && + (util_dynarray_top(®->vals, struct nv_value *))->insn->bb == bb) + *(util_dynarray_top_ptr(®->vals, struct nv_value *)) = val; + else + util_dynarray_append(®->vals, struct nv_value *, val); } static INLINE boolean -- cgit v1.2.3 From c485368efea8527da68a476af4ed48541b5ed93e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 6 Feb 2011 13:09:24 +0100 Subject: nvc0: do not generate a backwards jump if a loop ends with BRK --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 8c0967dfa87..8b6cb0e7c76 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1693,9 +1693,11 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */ + bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); - nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + } bld_loop_end(bld, bb); /* replace loop-side operand of the phis */ -- cgit v1.2.3 From 7401590dedf6f2abb1f0f0db988be90acb1fb84f Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 14:54:17 +0100 Subject: nv50,nvc0: do not forget to apply sign mode to saved TGSI inputs --- src/gallium/drivers/nv50/nv50_tgsi_to_nc.c | 2 +- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 12 ++---------- 2 files changed, 3 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c index d6b80c3ea79..ce9300ad8fd 100644 --- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c +++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c @@ -1130,7 +1130,7 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, case TGSI_FILE_INPUT: res = bld_saved_input(bld, idx, swz); if (res && (insn->Instruction.Opcode != TGSI_OPCODE_TXP)) - return res; + break; res = new_value(bld->pc, bld->ti->input_file, type); res->reg.id = bld->ti->input_map[idx][swz]; diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 8b6cb0e7c76..dd1c7f73f5c 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -983,14 +983,6 @@ bld_new_block(struct bld_context *bld, struct nv_basic_block *b) bld->out_kind = CFG_EDGE_FORWARD; } -static struct nv_value * -bld_get_saved_input(struct bld_context *bld, unsigned i, unsigned c) -{ - if (bld->saved_inputs[i][c]) - return bld->saved_inputs[i][c]; - return NULL; -} - static struct nv_value * bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val) { @@ -1058,9 +1050,9 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, case TGSI_FILE_INPUT: assert(!src->Register.Dimension); if (!ptr) { - res = bld_get_saved_input(bld, idx, swz); + res = bld->saved_inputs[idx][swz]; if (res) - return res; + break; } res = new_value(bld->pc, bld->ti->input_file, 4); if (ptr) -- cgit v1.2.3 From 4124feabcbbcf9ebe1ba37cf64419edbeda9c519 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 21:17:37 +0100 Subject: nvc0: make sure phi-ops really have one source per in-block --- src/gallium/drivers/nvc0/nvc0_pc.h | 2 + src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 57 +++++++++++++++++++---------- 2 files changed, 40 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 0756288daf7..40d728aefc7 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -345,6 +345,8 @@ struct nv_ref { uint8_t flags; }; +#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0) + struct nv_basic_block; struct nv_instruction { diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index ee282680061..718943bdbdf 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -360,20 +360,32 @@ need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) return (b->num_in > 1) && (n == 2); } +/* Look for the @phi's operand whose definition reaches @b. */ static int phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, struct nv_basic_block *tb) { + struct nv_ref *srci, *srcj; int i, j; for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) { - if (!nvc0_bblock_reachable_by(b, phi->src[i]->value->insn->bb, tb)) + srci = phi->src[i]; + /* if already replaced, check with original source first */ + if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV) + srci = srci->value->insn->src[0]; + if (!nvc0_bblock_reachable_by(b, srci->value->insn->bb, NULL)) continue; /* NOTE: back-edges are ignored by the reachable-by check */ - if (j < 0 || !nvc0_bblock_reachable_by(phi->src[j]->value->insn->bb, - phi->src[i]->value->insn->bb, tb)) + if (j < 0 || !nvc0_bblock_reachable_by(srcj->value->insn->bb, + srci->value->insn->bb, NULL)) { j = i; + srcj = srci; + } } + if (j >= 0 && nvc0_bblock_reachable_by(b, phi->def[0]->insn->bb, NULL)) + if (!nvc0_bblock_reachable_by(srcj->value->insn->bb, + phi->def[0]->insn->bb, NULL)) + j = -1; return j; } @@ -420,21 +432,23 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) ctx->pc->current_block = pn; for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { - if ((j = phi_opnd_for_bb(i, p, b)) < 0) - continue; - val = i->src[j]->value; - - if (i->src[j]->flags) { - /* value already encountered from a different in-block */ - val = val->insn->src[0]->value; - while (j < 6 && i->src[j]) - ++j; - assert(j < 6); + j = phi_opnd_for_bb(i, p, b); + + if (j < 0) { + val = i->def[0]; + } else { + val = i->src[j]->value; + if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) { + j = -1; + /* use original value, we already encountered & replaced it */ + val = val->insn->src[0]->value; + } } + if (j < 0) /* need an additional source ? */ + for (j = 0; j < 6 && i->src[j] && i->src[j]->value != val; ++j); + assert(j < 6); /* XXX: really ugly shaders */ ni = new_instruction(ctx->pc, NV_OP_MOV); - - /* TODO: insert instruction at correct position in the first place */ if (ni->prev && ni->prev->target) nvc0_insns_permute(ni->prev, ni); @@ -442,7 +456,7 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) ni->def[0]->insn = ni; nv_reference(ctx->pc, ni, 0, val); nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */ - i->src[j]->flags = 1; + i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV; } if (pn != p && pn->exit) { @@ -619,15 +633,16 @@ static void collect_live_values(struct nv_basic_block *b, const int n) { int i; - if (b->out[0]) { - if (b->out[1]) { /* what to do about back-edges ? */ + /* XXX: what to do about back/fake-edges (used to include both here) ? */ + if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) { + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { for (i = 0; i < n; ++i) b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i]; } else { memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t)); } } else - if (b->out[1]) { + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t)); } else { memset(b->live_set, 0, n * sizeof(uint32_t)); @@ -877,6 +892,10 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) ret = pass_generate_phi_movs(ctx, root); assert(!ret); +#ifdef NVC0_RA_DEBUG_LIVEI + nvc0_print_function(root); +#endif + for (i = 0; i < pc->loop_nesting_bound; ++i) { pc->pass_seq++; ret = pass_build_live_sets(ctx, root); -- cgit v1.2.3 From 0f776fea432052c00972ae1c6a0fbf76ec5e0b6c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 21:19:23 +0100 Subject: nvc0: implement local memory load and store ops --- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 35 +++++++++++++++++++++++++----- src/gallium/drivers/nvc0/nvc0_pc_print.c | 4 ++-- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 11 ++++++---- 3 files changed, 38 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index 644b9ef61a1..b2a80566a02 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -804,11 +804,8 @@ emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i) } static void -emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) +emit_ld_common(struct nv_pc *pc, struct nv_instruction *i) { - pc->emit[0] = 0x00000006; - pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10); - emit_ldst_size(pc, i); set_pred(pc, i); @@ -818,6 +815,15 @@ emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) DID(pc, i->def[0], 14); } +static void +emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000006; + pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10); + + emit_ld_common(pc, i); +} + static void emit_ld(struct nv_pc *pc, struct nv_instruction *i) { @@ -829,6 +835,12 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i) } else { emit_ld_const(pc, i); } + } else + if (SFILE(i, 0) == NV_FILE_MEM_L) { + pc->emit[0] = 0x00000005; + pc->emit[1] = 0xc0000000; + + emit_ld_common(pc, i); } else { NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0)); abort(); @@ -838,8 +850,19 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i) static void emit_st(struct nv_pc *pc, struct nv_instruction *i) { - NOUVEAU_ERR("emit_st: not handled yet\n"); - abort(); + if (SFILE(i, 0) != NV_FILE_MEM_L) + NOUVEAU_ERR("emit_st(%u): file not handled yet\n", SFILE(i, 0)); + + pc->emit[0] = 0x00000005 | (0 << 8); /* write-back caching */ + pc->emit[1] = 0xc8000000; + + emit_ldst_size(pc, i); + + set_pred(pc, i); + set_address_16(pc, i->src[0]); + + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); + DID(pc, i->src[1]->value, 14); } void diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 76dd0f57500..9e0bffacd60 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -280,8 +280,8 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 }, { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index dd1c7f73f5c..43c27fd8906 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -569,11 +569,12 @@ bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst, loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); - loc->reg.id = ofst * 4; + loc->reg.address = ofst * 4; nv_reference(bld->pc, insn, 0, loc); - nv_reference(bld->pc, insn, 1, ptr); - nv_reference(bld->pc, insn, 2, val); + nv_reference(bld->pc, insn, 1, val); + if (ptr) + bld_src_pointer(bld, insn, 2, ptr); } static struct nv_value * @@ -585,7 +586,9 @@ bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst) loc->reg.address = ofst * 4; - val = bld_insn_2(bld, NV_OP_LD, loc, ptr); + val = bld_insn_1(bld, NV_OP_LD, loc); + if (ptr) + bld_src_pointer(bld, val->insn, 1, ptr); return val; } -- cgit v1.2.3 From 0691530b7f01f3106f7b4d697cd7a42f86fa23d5 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 8 Feb 2011 16:55:06 +0100 Subject: nvc0: replace branching with predicated insns where feasible --- src/gallium/drivers/nvc0/nvc0_pc.c | 13 ++--- src/gallium/drivers/nvc0/nvc0_pc.h | 12 ++-- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 86 ++++++++++++++++++++++++++--- src/gallium/drivers/nvc0/nvc0_pc_print.c | 28 +++++----- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 30 +++++++--- 5 files changed, 123 insertions(+), 46 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index a2006321021..e0cba05b976 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -55,15 +55,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s, boolean nvc0_insn_is_predicateable(struct nv_instruction *nvi) { - int s; - - if (!nv_op_predicateable(nvi->opcode)) + if (nvi->predicate >= 0) /* already predicated */ return FALSE; - if (nvi->predicate >= 0) + if (!nvc0_op_info_table[nvi->opcode].predicate && + !nvc0_op_info_table[nvi->opcode].pseudo) return FALSE; - for (s = 0; s < 4 && nvi->src[s]; ++s) - if (nvi->src[s]->value->reg.file == NV_FILE_IMM) - return FALSE; return TRUE; } @@ -505,6 +501,9 @@ nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i) i->bb = b; b->num_instructions++; + + if (i->prev && i->prev->terminator) + nvc0_insns_permute(i->prev, i); } void diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 40d728aefc7..efa073a9201 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -205,6 +205,10 @@ #define NV_CC_C 0x11 #define NV_CC_A 0x12 #define NV_CC_S 0x13 +#define NV_CC_INVERSE(cc) ((cc) ^ 0x7) +/* for 1 bit predicates: */ +#define NV_CC_P 0 +#define NV_CC_NOT_P 1 #define NV_PC_MAX_INSTRUCTIONS 2048 #define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) @@ -260,12 +264,6 @@ nv_op_supported_src_mods(uint opcode) return nvc0_op_info_table[opcode].mods; } -static INLINE boolean -nv_op_predicateable(uint opcode) -{ - return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE; -} - static INLINE uint nv_type_order(ubyte type) { @@ -488,7 +486,7 @@ nv_alloc_instruction(struct nv_pc *pc, uint opcode) assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS); insn->opcode = opcode; - insn->cc = 0; + insn->cc = NV_CC_P; insn->indirect = -1; insn->predicate = -1; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 2e554dbe4e4..6cfa03d5b16 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -99,6 +99,7 @@ inst_removable(struct nv_instruction *nvi) nvc0_insn_refcount(nvi))); } +/* Check if we do not actually have to emit this instruction. */ static INLINE boolean inst_is_noop(struct nv_instruction *nvi) { @@ -1043,7 +1044,6 @@ nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b) return 0; } -#if 0 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE. * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with * BREAK and dummy ELSE block. @@ -1064,24 +1064,92 @@ bb_is_if_else_endif(struct nv_basic_block *bb) } } -/* predicate instructions and remove branch at the end */ +/* Predicate instructions and delete any branch at the end if it is + * not a break from a loop. + */ static void predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b, - struct nv_value *p, ubyte cc) + struct nv_value *pred, uint8_t cc) { + struct nv_instruction *nvi, *prev; + int s; + if (!b->entry) + return; + for (nvi = b->entry; nvi; nvi = nvi->next) { + prev = nvi; + if (inst_is_noop(nvi)) + continue; + for (s = 0; nvi->src[s]; ++s); + assert(s < 6); + nvi->predicate = s; + nvi->cc = cc; + nv_reference(pc, nvi, nvi->predicate, pred); + } + if (prev->opcode == NV_OP_BRA && + b->out_kind[0] != CFG_EDGE_LOOP_LEAVE && + b->out_kind[1] != CFG_EDGE_LOOP_LEAVE) + nvc0_insn_delete(prev); } -#endif -/* NOTE: Run this after register allocation, we can just cut out the cflow - * instructions and hook the predicates to the conditional OPs if they are - * not using immediates; better than inserting SELECT to join definitions. - * - * NOTE: Should adapt prior optimization to make this possible more often. +static INLINE boolean +may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred) +{ + if (nvi->def[0] && values_equal(nvi->def[0], pred)) + return FALSE; + return nvc0_insn_is_predicateable(nvi); +} + +/* Transform IF/ELSE/ENDIF constructs into predicated instructions + * where feasible. */ static int nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) { + struct nv_instruction *nvi; + struct nv_value *pred; + int k; + int n0, n1; /* instruction counts of outgoing blocks */ + + if (bb_is_if_else_endif(b)) { + assert(b->exit && b->exit->opcode == NV_OP_BRA); + + assert(b->exit->predicate >= 0); + pred = b->exit->src[b->exit->predicate]->value; + + n1 = n0 = 0; + for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0) + if (!may_predicate_insn(nvi, pred)) + break; + if (!nvi) { + /* we're after register allocation, so there always is an ELSE block */ + for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1) + if (!may_predicate_insn(nvi, pred)) + break; + } + + /* 12 is an arbitrary limit */ + if (!nvi && n0 < 12 && n1 < 12) { + predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc); + predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc); + + nvc0_insn_delete(b->exit); /* delete the branch */ + + /* and a potential joinat before it */ + if (b->exit && b->exit->opcode == NV_OP_JOINAT) + nvc0_insn_delete(b->exit); + + /* remove join operations at the end of the conditional */ + k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0; + if ((nvi = b->out[0]->out[k]->entry)) { + nvi->join = 0; + if (nvi->opcode == NV_OP_JOIN) + nvc0_insn_delete(nvi); + } + } + } + DESCEND_ARBITRARY(k, nv_pass_flatten); + return 0; } diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 9e0bffacd60..7840078614f 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -225,7 +225,7 @@ nvc0_print_instruction(struct nv_instruction *i) PRINT("%s", gree); if (NV_BASEOP(i->opcode) == NV_OP_SET) - PRINT("set %s", nv_cond_name(i->set_cond)); + PRINT("%s %s", nvc0_opcode_name(i->opcode), nv_cond_name(i->set_cond)); else if (i->saturate) PRINT("sat %s", nvc0_opcode_name(i->opcode)); @@ -278,7 +278,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, { NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, - { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, @@ -343,18 +343,18 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, - { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, - { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 }, { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, @@ -369,7 +369,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 43c27fd8906..72bfcd0c95f 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -659,7 +659,7 @@ bld_kil(struct bld_context *bld, struct nv_value *src) static void bld_flow(struct bld_context *bld, uint opcode, - struct nv_value *src, struct nv_basic_block *target, + struct nv_value *pred, uint8_t cc, struct nv_basic_block *target, boolean reconverge) { struct nv_instruction *nvi; @@ -670,8 +670,10 @@ bld_flow(struct bld_context *bld, uint opcode, nvi = new_instruction(bld->pc, opcode); nvi->target = target; nvi->terminator = 1; - if (src) - bld_src_predicate(bld, nvi, 0, src); + if (pred) { + nvi->cc = cc; + bld_src_predicate(bld, nvi, 0, pred); + } } static ubyte @@ -1584,6 +1586,7 @@ bld_instruction(struct bld_context *bld, case TGSI_OPCODE_IF: { struct nv_basic_block *b = new_basic_block(bld->pc); + struct nv_value *pred = emit_fetch(bld, insn, 0, 0); assert(bld->cond_lvl < BLD_MAX_COND_NESTING); @@ -1592,10 +1595,19 @@ bld_instruction(struct bld_context *bld, bld->join_bb[bld->cond_lvl] = bld->pc->current_block; bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; - src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ, - emit_fetch(bld, insn, 0, 0), bld->zero); + if (pred->insn && NV_BASEOP(pred->insn->opcode) == NV_OP_SET) { + pred = bld_clone(bld, pred->insn); + pred->reg.size = 1; + pred->reg.file = NV_FILE_PRED; + if (pred->insn->opcode == NV_OP_FSET_F32) + pred->insn->opcode = NV_OP_SET_F32; + } else { + pred = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE | NV_CC_U, + pred, bld->zero); + } + assert(!mask); - bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0)); + bld_flow(bld, NV_OP_BRA, pred, NV_CC_NOT_P, NULL, (bld->cond_lvl == 0)); ++bld->cond_lvl; bld_new_block(bld, b); @@ -1661,7 +1673,7 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); @@ -1673,7 +1685,7 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); @@ -1689,7 +1701,7 @@ bld_instruction(struct bld_context *bld, struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */ - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); } -- cgit v1.2.3 From 0bd04cdd1245a9bfca67e87018125e7ab287d1c0 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 9 Feb 2011 14:26:14 +0100 Subject: nvc0: make CSE work for ops with multiple results --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 85 ++++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 6cfa03d5b16..9a7094e5d3c 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -1153,40 +1153,98 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) return 0; } +/* Tests instructions for equality, but independently of sources. */ +static boolean +is_operation_equal(struct nv_instruction *a, struct nv_instruction *b) +{ + if (a->opcode != b->opcode) + return FALSE; + if (nv_is_texture_op(a->opcode)) { + if (a->ext.tex.t != b->ext.tex.t || + a->ext.tex.s != b->ext.tex.s) + return FALSE; + if (a->tex_dim != b->tex_dim || + a->tex_array != b->tex_array || + a->tex_cube != b->tex_cube || + a->tex_shadow != b->tex_shadow || + a->tex_live != b->tex_live) + return FALSE; + } else + if (a->opcode == NV_OP_CVT) { + if (a->ext.cvt.s != b->ext.cvt.s || + a->ext.cvt.d != b->ext.cvt.d) + return FALSE; + } else + if (NV_BASEOP(a->opcode) == NV_OP_SET || + NV_BASEOP(a->opcode) == NV_OP_SLCT) { + if (a->set_cond != b->set_cond) + return FALSE; + } else + if (a->opcode == NV_OP_LINTERP || + a->opcode == NV_OP_PINTERP) { + if (a->centroid != b->centroid || + a->flat != b->flat) + return FALSE; + } + if (a->cc != b->cc) + return FALSE; + if (a->lanes != b->lanes || + a->patch != b->patch || + a->saturate != b->saturate) + return FALSE; + if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */ + return FALSE; + return TRUE; +} + /* local common subexpression elimination, stupid O(n^2) implementation */ static int nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_instruction *ir, *ik, *next; struct nv_instruction *entry = b->phi ? b->phi : b->entry; - int s; + int s, d; unsigned int reps; do { reps = 0; for (ir = entry; ir; ir = next) { next = ir->next; + if (ir->fixed) + continue; for (ik = entry; ik != ir; ik = ik->next) { - if (ir->opcode != ik->opcode || ir->fixed) + if (!is_operation_equal(ir, ik)) continue; - - if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1]) + if (!ir->def[0] || !ik->def[0]) continue; if (ik->indirect != ir->indirect || ik->predicate != ir->predicate) continue; - if (!values_equal(ik->def[0], ir->def[0])) + for (d = 0; d < 4; ++d) { + if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0)) + break; + if (ir->def[d]) { + if (!values_equal(ik->def[0], ir->def[0])) + break; + } else { + d = 4; + break; + } + } + if (d != 4) continue; - for (s = 0; s < 3; ++s) { + for (s = 0; s < 5; ++s) { struct nv_value *a, *b; - if (!ik->src[s]) { - if (ir->src[s]) - break; - continue; + if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0)) + break; + if (!ir->src[s]) { + s = 5; + break; } + if (ik->src[s]->mod != ir->src[s]->mod) break; a = ik->src[s]->value; @@ -1194,14 +1252,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) if (a == b) continue; if (a->reg.file != b->reg.file || - a->reg.id < 0 || + a->reg.id < 0 || /* this excludes memory loads/stores */ a->reg.id != b->reg.id) break; } - if (s == 3) { + if (s == 5) { nvc0_insn_delete(ir); + for (d = 0; d < 4 && ir->def[d]; ++d) + nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]); ++reps; - nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]); break; } } -- cgit v1.2.3 From 95f0aa0e52b694f496dcd32f640d1a29b97f4d0d Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 2 Feb 2011 16:35:21 +0100 Subject: nvc0: correct storage type for 16 bit surface formats --- src/gallium/drivers/nvc0/nvc0_miptree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c index 7c7e134146e..22f48c8a5fe 100644 --- a/src/gallium/drivers/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c @@ -143,8 +143,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, switch (pt->format) { case PIPE_FORMAT_Z16_UNORM: tile_flags = 0x0700; /* COMPRESSED */ - tile_flags = 0x0200; /* NORMAL ? */ - tile_flags = 0x0100; /* NORMAL ? */ + tile_flags = 0x0100; /* NORMAL */ break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: tile_flags = 0x5300; /* MSAA 4, COMPRESSED */ @@ -170,6 +169,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, break; case PIPE_FORMAT_R16G16B16A16_UNORM: tile_flags = 0xe900; /* COMPRESSED */ + tile_flags = 0xfe00; /* NORMAL */ break; default: tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */ -- cgit v1.2.3 From fc798dc37dd8a39c253a436fa0c9dd2071f09270 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 2 Feb 2011 22:04:53 +0100 Subject: nvc0: fix stride of NVC0_3D_RT methods --- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 1a34313912c..4b1325a3043 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -158,29 +158,29 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_RT__ESIZE 0x00000020 #define NVC0_3D_RT__LEN 0x00000008 -#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x20*(i0)) +#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x40*(i0)) -#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x20*(i0)) +#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x40*(i0)) -#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x20*(i0)) +#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x40*(i0)) -#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x20*(i0)) +#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x40*(i0)) -#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x20*(i0)) +#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x40*(i0)) -#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x20*(i0)) +#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x40*(i0)) #define NVC0_3D_RT_TILE_MODE_UNK0 0x00000001 #define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070 #define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4 #define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700 #define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8 -#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x20*(i0)) +#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x40*(i0)) #define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff #define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0 #define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000 -#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x20*(i0)) +#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x40*(i0)) #define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0)) #define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020 -- cgit v1.2.3 From b6e3130a3b6e40308ddb5d11638d509fe69eb912 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 9 Feb 2011 15:01:23 +0100 Subject: nvc0: serialize on PIPE_FLUSH_RENDER_CACHE as well Effects were easily visible in piglit/fbo-generatemipmap-formats. --- src/gallium/drivers/nvc0/nvc0_context.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 1ebf9e2bafb..20c1a31b5b3 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -41,6 +41,10 @@ nvc0_flush(struct pipe_context *pipe, unsigned flags, OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); OUT_RING (chan, 0x00); + } else + if ((flags & PIPE_FLUSH_RENDER_CACHE) && !(flags & PIPE_FLUSH_FRAME)) { + BEGIN_RING(chan, RING_3D(SERIALIZE), 1); + OUT_RING (chan, 0); } if (fence) { -- cgit v1.2.3 From cdca3c58aa2d9549f5188910e2a77b438516714f Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 10 Jan 2011 05:41:47 +0100 Subject: gallium: remove pipe_vertex_buffer::max_index This is redundant to pipe_draw_info::max_index and doesn't really fit in the optimizations I plan. --- src/gallium/auxiliary/draw/draw_llvm.c | 17 ++++------------- src/gallium/auxiliary/draw/draw_llvm.h | 5 +---- src/gallium/auxiliary/draw/draw_pt.c | 3 +-- src/gallium/auxiliary/draw/draw_pt_fetch.c | 4 ++-- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 2 +- src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c | 2 +- src/gallium/auxiliary/util/u_draw_quad.c | 1 - src/gallium/auxiliary/util/u_dump_state.c | 1 - src/gallium/docs/d3d11ddi.txt | 1 - src/gallium/drivers/nvc0/nvc0_vbo.c | 2 +- src/gallium/drivers/svga/svga_state_vs.c | 2 +- src/gallium/drivers/trace/tr_dump_state.c | 1 - src/gallium/include/pipe/p_state.h | 1 - .../state_trackers/d3d1x/dxgi/src/dxgi_native.cpp | 1 - src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h | 1 - src/gallium/state_trackers/vega/polygon.c | 2 -- src/gallium/tests/graw/fs-test.c | 1 - src/gallium/tests/graw/gs-test.c | 2 -- src/gallium/tests/graw/quad-tex.c | 1 - src/gallium/tests/graw/shader-leak.c | 1 - src/gallium/tests/graw/tri-gs.c | 1 - src/gallium/tests/graw/tri-instanced.c | 2 -- src/gallium/tests/graw/tri.c | 1 - src/gallium/tests/graw/vs-test.c | 1 - src/mesa/state_tracker/st_draw.c | 5 ----- src/mesa/state_tracker/st_draw_feedback.c | 1 - 26 files changed, 12 insertions(+), 50 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index a73bdd78087..a5217c1d4ec 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -214,13 +214,12 @@ static LLVMTypeRef create_jit_vertex_buffer_type(struct gallivm_state *gallivm) { LLVMTargetDataRef target = gallivm->target; - LLVMTypeRef elem_types[4]; + LLVMTypeRef elem_types[3]; LLVMTypeRef vb_type; elem_types[0] = - elem_types[1] = - elem_types[2] = LLVMInt32TypeInContext(gallivm->context); - elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */ + elem_types[1] = LLVMInt32TypeInContext(gallivm->context); + elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */ vb_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); @@ -229,10 +228,8 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm) LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, target, vb_type, 0); - LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, max_index, - target, vb_type, 1); LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, - target, vb_type, 2); + target, vb_type, 1); LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type); @@ -513,9 +510,7 @@ generate_fetch(struct gallivm_state *gallivm, LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &indices, 1, ""); LLVMValueRef vb_stride = draw_jit_vbuffer_stride(gallivm, vbuf); - LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(gallivm, vbuf); LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vbuf); - LLVMValueRef cond; LLVMValueRef stride; if (velem->instance_divisor) { @@ -525,10 +520,6 @@ generate_fetch(struct gallivm_state *gallivm, "instance_divisor"); } - /* limit index to min(index, vb_max_index) */ - cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); - index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); - stride = LLVMBuildMul(builder, vb_stride, index, ""); vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 9f038f1f04d..e8623e7bcdc 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -133,11 +133,8 @@ struct draw_jit_context #define draw_jit_vbuffer_stride(_gallivm, _ptr) \ lp_build_struct_get(_gallivm, _ptr, 0, "stride") -#define draw_jit_vbuffer_max_index(_gallivm, _ptr) \ - lp_build_struct_get(_gallivm, _ptr, 1, "max_index") - #define draw_jit_vbuffer_offset(_gallivm, _ptr) \ - lp_build_struct_get(_gallivm, _ptr, 2, "buffer_offset") + lp_build_struct_get(_gallivm, _ptr, 1, "buffer_offset") typedef int diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 4078b2a07d0..c3d7e871f7a 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -459,10 +459,9 @@ draw_vbo(struct draw_context *draw, } debug_printf("Buffers:\n"); for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { - debug_printf(" %u: stride=%u maxindex=%u offset=%u ptr=%p\n", + debug_printf(" %u: stride=%u offset=%u ptr=%p\n", i, draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index, draw->pt.vertex_buffer[i].buffer_offset, draw->pt.user.vbuffer[i]); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index ae12ee24bdc..4fa3b265e10 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -139,7 +139,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index); + draw->pt.user.max_index); } translate->run_elts( translate, @@ -166,7 +166,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index); + draw->pt.user.max_index); } translate->run( translate, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index e706b7796f8..51043102a61 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -186,7 +186,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index); + draw->pt.user.max_index); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index c98fb3d5205..1e926fb028e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -169,7 +169,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, ((const ubyte *) draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index ); + draw->pt.user.max_index ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 2747cd4b0c1..0defd919974 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -56,7 +56,6 @@ util_draw_vertex_buffer(struct pipe_context *pipe, vbuffer.buffer = vbuf; vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = offset; - vbuffer.max_index = num_verts - 1; if (cso) { cso_set_vertex_buffers(cso, 1, &vbuffer); diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index b471d59eebf..5ecf8cbb067 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -681,7 +681,6 @@ util_dump_vertex_buffer(struct os_stream *stream, const struct pipe_vertex_buffe util_dump_struct_begin(stream, "pipe_vertex_buffer"); util_dump_member(stream, uint, state, stride); - util_dump_member(stream, uint, state, max_index); util_dump_member(stream, uint, state, buffer_offset); util_dump_member(stream, ptr, state, buffer); diff --git a/src/gallium/docs/d3d11ddi.txt b/src/gallium/docs/d3d11ddi.txt index 11e77190895..0a9e7e50f1d 100644 --- a/src/gallium/docs/d3d11ddi.txt +++ b/src/gallium/docs/d3d11ddi.txt @@ -337,7 +337,6 @@ IaSetTopology + Gallium supports line loops, triangle fans, quads, quad strips and polygons IaSetVertexBuffers -> set_vertex_buffers - + Gallium allows to specify a max_index here - Gallium only allows setting all vertex buffers at once, while D3D11 supports setting a subset OpenResource -> texture_from_handle diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index aa5decfc233..80e05823759 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -152,7 +152,7 @@ nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi, if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) { /* TODO: use min and max instance divisor to get a proper range */ *base = 0; - *size = (nvc0->vtxbuf[vbi].max_index + 1) * nvc0->vtxbuf[vbi].stride; + *size = nvc0->vtxbuf[vbi].buffer->width0; } else { assert(nvc0->vbo_max_index != ~0); *base = nvc0->vbo_min_index * nvc0->vtxbuf[vbi].stride; diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 6682a1efe66..ae9a20ebb81 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -229,7 +229,7 @@ static int update_zero_stride( struct svga_context *svga, translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, - vbuffer->stride, vbuffer->max_index); + vbuffer->stride, ~0); translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 155c869fbd9..18805655bd7 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -517,7 +517,6 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) trace_dump_struct_begin("pipe_vertex_buffer"); trace_dump_member(uint, state, stride); - trace_dump_member(uint, state, max_index); trace_dump_member(uint, state, buffer_offset); trace_dump_member(resource_ptr, state, buffer); diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 574a7a80111..cf6c5b50268 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -408,7 +408,6 @@ struct pipe_transfer struct pipe_vertex_buffer { unsigned stride; /**< stride to same attrib in next vertex, in bytes */ - unsigned max_index; /**< number of vertices in this buffer */ unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */ struct pipe_resource *buffer; /**< the actual buffer */ }; diff --git a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp index 2ff24e17d41..61cf2ddd9df 100644 --- a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp +++ b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp @@ -796,7 +796,6 @@ struct dxgi_blitter vbuf.buffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(quad_data)); vbuf.buffer_offset = 0; - vbuf.max_index = ~0; vbuf.stride = 4 * sizeof(float); pipe_buffer_write(pipe, vbuf.buffer, 0, sizeof(quad_data), quad_data); diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h index e1ba6c184fd..542d6591293 100644 --- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h @@ -623,7 +623,6 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl vertex_buffers[start + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0; vertex_buffers[start + i].buffer_offset = new_offsets[i]; vertex_buffers[start + i].stride = new_strides[i]; - vertex_buffers[start + i].max_index = ~0; last_different = i; } } diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c index a491de27fa6..bcc5cb272ca 100644 --- a/src/gallium/state_trackers/vega/polygon.c +++ b/src/gallium/state_trackers/vega/polygon.c @@ -303,7 +303,6 @@ void polygon_fill(struct polygon *poly, struct vg_context *ctx) vbuffer.buffer = poly->vbuf; vbuffer.stride = COMPONENTS * sizeof(float); /* vertex size */ vbuffer.buffer_offset = 0; - vbuffer.max_index = poly->num_verts - 1; renderer_polygon_stencil_begin(ctx->renderer, &velement, ctx->state.vg.fill_rule, VG_FALSE); @@ -354,7 +353,6 @@ void polygon_array_fill(struct polygon_array *polyarray, struct vg_context *ctx) polygon_prepare_buffer(ctx, poly); vbuffer.buffer = poly->vbuf; - vbuffer.max_index = poly->num_verts - 1; renderer_polygon_stencil(ctx->renderer, &vbuffer, PIPE_PRIM_TRIANGLE_FAN, 0, (VGuint) poly->num_verts); diff --git a/src/gallium/tests/graw/fs-test.c b/src/gallium/tests/graw/fs-test.c index d21eb44e116..ff82b607110 100644 --- a/src/gallium/tests/graw/fs-test.c +++ b/src/gallium/tests/graw/fs-test.c @@ -215,7 +215,6 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/gs-test.c b/src/gallium/tests/graw/gs-test.c index 0c65390e109..cc05889dd05 100644 --- a/src/gallium/tests/graw/gs-test.c +++ b/src/gallium/tests/graw/gs-test.c @@ -251,13 +251,11 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); vbuf.buffer_offset = 0; if (draw_strip) { - vbuf.max_index = sizeof(vertices_strip) / vbuf.stride; vbuf.buffer = screen->user_buffer_create(screen, vertices_strip, sizeof(vertices_strip), PIPE_BIND_VERTEX_BUFFER); } else { - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer = screen->user_buffer_create(screen, vertices, sizeof(vertices), diff --git a/src/gallium/tests/graw/quad-tex.c b/src/gallium/tests/graw/quad-tex.c index 58ca639d207..4e66813b301 100644 --- a/src/gallium/tests/graw/quad-tex.c +++ b/src/gallium/tests/graw/quad-tex.c @@ -97,7 +97,6 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/shader-leak.c b/src/gallium/tests/graw/shader-leak.c index 9af76f51ea2..a23ca73ac1d 100644 --- a/src/gallium/tests/graw/shader-leak.c +++ b/src/gallium/tests/graw/shader-leak.c @@ -88,7 +88,6 @@ static void set_vertices( void ) vbuf.stride = sizeof(struct vertex); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/tri-gs.c b/src/gallium/tests/graw/tri-gs.c index a1a00b32098..47b76530c6b 100644 --- a/src/gallium/tests/graw/tri-gs.c +++ b/src/gallium/tests/graw/tri-gs.c @@ -89,7 +89,6 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/tri-instanced.c b/src/gallium/tests/graw/tri-instanced.c index f61d8b9844d..259b3d9527c 100644 --- a/src/gallium/tests/graw/tri-instanced.c +++ b/src/gallium/tests/graw/tri-instanced.c @@ -132,7 +132,6 @@ static void set_vertices( void ) /* vertex data */ vbuf[0].stride = sizeof( struct vertex ); - vbuf[0].max_index = sizeof(vertices) / vbuf[0].stride; vbuf[0].buffer_offset = 0; vbuf[0].buffer = screen->user_buffer_create(screen, vertices, @@ -141,7 +140,6 @@ static void set_vertices( void ) /* instance data */ vbuf[1].stride = sizeof( inst_data[0] ); - vbuf[1].max_index = sizeof(inst_data) / vbuf[1].stride; vbuf[1].buffer_offset = 0; vbuf[1].buffer = screen->user_buffer_create(screen, inst_data, diff --git a/src/gallium/tests/graw/tri.c b/src/gallium/tests/graw/tri.c index 006d61ca88c..4266c0394d8 100644 --- a/src/gallium/tests/graw/tri.c +++ b/src/gallium/tests/graw/tri.c @@ -93,7 +93,6 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/vs-test.c b/src/gallium/tests/graw/vs-test.c index 1358fa85dfd..dd64d8b9301 100644 --- a/src/gallium/tests/graw/vs-test.c +++ b/src/gallium/tests/graw/vs-test.c @@ -171,7 +171,6 @@ static void set_vertices( void ) } vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index d9b99a34990..4cbcecfd8ba 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -384,7 +384,6 @@ setup_interleaved_attribs(struct gl_context *ctx, vbuffer->buffer_offset = pointer_to_offset(low); } vbuffer->stride = stride; /* in bytes */ - vbuffer->max_index = max_index; } /* @@ -488,10 +487,6 @@ setup_non_interleaved_attribs(struct gl_context *ctx, /* common-case setup */ vbuffer[attr].stride = stride; /* in bytes */ - if (arrays[mesaAttr]->InstanceDivisor) - vbuffer[attr].max_index = arrays[mesaAttr]->_MaxElement; - else - vbuffer[attr].max_index = max_index; velements[attr].src_offset = 0; velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor; diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index 545b32d75bb..1e1220bfe52 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -179,7 +179,6 @@ st_feedback_draw_vbo(struct gl_context *ctx, /* common-case setup */ vbuffers[attr].stride = arrays[mesaAttr]->StrideB; /* in bytes */ - vbuffers[attr].max_index = max_index; velements[attr].instance_divisor = 0; velements[attr].vertex_buffer_index = attr; velements[attr].src_format = -- cgit v1.2.3 From 588fa884d212eba5ffbc69fda75db37d7c77214c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 9 Feb 2011 01:10:11 +0100 Subject: gallium: notify drivers about possible changes in user buffer contents Also implement the redefine_user_buffer hook in the drivers. --- src/gallium/auxiliary/util/u_blitter.c | 4 +++ src/gallium/auxiliary/util/u_transfer.c | 7 +++++ src/gallium/auxiliary/util/u_transfer.h | 10 +++---- src/gallium/docs/source/context.rst | 16 ++++++++++ src/gallium/drivers/cell/ppu/cell_state_vertex.c | 2 ++ src/gallium/drivers/failover/fo_state.c | 2 ++ src/gallium/drivers/galahad/glhd_context.c | 14 +++++++++ src/gallium/drivers/i915/i915_state.c | 2 ++ src/gallium/drivers/i965/brw_pipe_vertex.c | 2 ++ src/gallium/drivers/identity/id_context.c | 14 +++++++++ src/gallium/drivers/llvmpipe/lp_state_vertex.c | 3 ++ src/gallium/drivers/noop/noop_state.c | 2 ++ src/gallium/drivers/nv50/nv50_state.c | 2 ++ src/gallium/drivers/nvc0/nvc0_state.c | 3 ++ src/gallium/drivers/nvfx/nvfx_vbo.c | 3 ++ src/gallium/drivers/r300/r300_state.c | 2 ++ src/gallium/drivers/r600/evergreen_state.c | 1 + src/gallium/drivers/r600/r600_state.c | 2 ++ src/gallium/drivers/rbug/rbug_context.c | 14 +++++++++ src/gallium/drivers/softpipe/sp_state_vertex.c | 2 ++ src/gallium/drivers/svga/svga_pipe_vertex.c | 2 ++ src/gallium/drivers/trace/tr_context.c | 23 ++++++++++++++ src/gallium/include/pipe/p_context.h | 8 +++++ src/mesa/state_tracker/st_context.c | 5 ++++ src/mesa/state_tracker/st_context.h | 5 ++++ src/mesa/state_tracker/st_draw.c | 38 ++++++++++++++++++++++++ 26 files changed, 182 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index e27c445096d..fd1c2b72d04 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -639,6 +639,8 @@ static void blitter_draw_rectangle(struct blitter_context *blitter, } blitter_set_rectangle(ctx, x1, y1, x2, y2, depth); + ctx->base.pipe->redefine_user_buffer(ctx->base.pipe, ctx->vbuf, + 0, ctx->vbuf->width0); util_draw_vertex_buffer(ctx->base.pipe, NULL, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2); } @@ -867,6 +869,8 @@ void util_blitter_copy_region(struct blitter_context *blitter, /* Draw. */ blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0); + ctx->base.pipe->redefine_user_buffer(ctx->base.pipe, ctx->vbuf, + 0, ctx->vbuf->width0); util_draw_vertex_buffer(ctx->base.pipe, NULL, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2); break; diff --git a/src/gallium/auxiliary/util/u_transfer.c b/src/gallium/auxiliary/util/u_transfer.c index e2828cfd99e..b6c63d9642f 100644 --- a/src/gallium/auxiliary/util/u_transfer.c +++ b/src/gallium/auxiliary/util/u_transfer.c @@ -112,3 +112,10 @@ void u_default_transfer_destroy(struct pipe_context *pipe, FREE(transfer); } +void u_default_redefine_user_buffer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned offset, + unsigned size) +{ + resource->width0 = MAX2(resource->width0, offset + size); +} diff --git a/src/gallium/auxiliary/util/u_transfer.h b/src/gallium/auxiliary/util/u_transfer.h index 52191512ac7..8cf9c418b04 100644 --- a/src/gallium/auxiliary/util/u_transfer.h +++ b/src/gallium/auxiliary/util/u_transfer.h @@ -136,11 +136,9 @@ void u_transfer_inline_write_vtbl( struct pipe_context *rm_ctx, unsigned stride, unsigned layer_stride); - - - - - - +void u_default_redefine_user_buffer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned offset, + unsigned size); #endif diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index da847262c13..04a39511287 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -392,6 +392,22 @@ be flushed on write or unmap. Flushes must be requested with ``transfer_flush_region``. Flush ranges are relative to the mapped range, not the beginning of the resource. + + +.. _redefine_user_buffer: + +redefine_user_buffer +%%%%%%%%%%%%%%%%%%%% + +This function notifies a driver that the user buffer content has been changed. +The updated region starts at ``offset`` and is ``size`` bytes large. +The ``offset`` is relative to the pointer specified in ``user_buffer_create``. +While uploading the user buffer, the driver is allowed not to upload +the memory outside of this region. +The width0 is redefined to ``MAX2(width0, offset+size)``. + + + .. _pipe_transfer: PIPE_TRANSFER diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index eb22a09a913..7f65b82619e 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -33,6 +33,7 @@ #include "cell_state.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "draw/draw_context.h" @@ -115,4 +116,5 @@ cell_init_vertex_functions(struct cell_context *cell) cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state; cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state; cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state; + cell->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index af1fd953aaf..b4da1b8b901 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -30,6 +30,7 @@ #include "util/u_inlines.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "fo_context.h" @@ -656,4 +657,5 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_constant_buffer = failover_set_constant_buffer; failover->pipe.create_sampler_view = failover_create_sampler_view; failover->pipe.sampler_view_destroy = failover_sampler_view_destroy; + failover->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index 8cbf0b1de4a..75e4c253dd9 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -962,6 +962,19 @@ galahad_context_transfer_inline_write(struct pipe_context *_context, } +static void galahad_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *context = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + + context->redefine_user_buffer(context, resource, offset, size); +} + + struct pipe_context * galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) { @@ -1036,6 +1049,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.transfer_unmap = galahad_context_transfer_unmap; glhd_pipe->base.transfer_flush_region = galahad_context_transfer_flush_region; glhd_pipe->base.transfer_inline_write = galahad_context_transfer_inline_write; + glhd_pipe->base.redefine_user_buffer = galahad_redefine_user_buffer; glhd_pipe->pipe = pipe; diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index b31cc306a44..f380708847b 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -33,6 +33,7 @@ #include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" #include "i915_context.h" @@ -895,4 +896,5 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.set_viewport_state = i915_set_viewport_state; i915->base.set_vertex_buffers = i915_set_vertex_buffers; i915->base.set_index_buffer = i915_set_index_buffer; + i915->base.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index b23454b5808..570ea23ff45 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -4,6 +4,7 @@ #include "util/u_memory.h" #include "util/u_format.h" +#include "util/u_transfer.h" static unsigned brw_translate_surface_format( unsigned id ) @@ -302,6 +303,7 @@ brw_pipe_vertex_init( struct brw_context *brw ) brw->base.create_vertex_elements_state = brw_create_vertex_elements_state; brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state; brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state; + brw->base.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 3efbd6a246d..b533abe24c6 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -855,6 +855,19 @@ identity_context_transfer_inline_write(struct pipe_context *_context, } +static void identity_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct identity_context *id_context = identity_context(_context); + struct identity_resource *id_resource = identity_resource(_resource); + struct pipe_context *context = id_context->pipe; + struct pipe_resource *resource = id_resource->resource; + + context->redefine_user_buffer(context, resource, offset, size); +} + + struct pipe_context * identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) { @@ -929,6 +942,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.transfer_unmap = identity_context_transfer_unmap; id_pipe->base.transfer_flush_region = identity_context_transfer_flush_region; id_pipe->base.transfer_inline_write = identity_context_transfer_inline_write; + id_pipe->base.redefine_user_buffer = identity_redefine_user_buffer; id_pipe->pipe = pipe; diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index fffdeb6ccde..be86f66de91 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -34,6 +34,7 @@ #include "draw/draw_context.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" static void * @@ -114,4 +115,6 @@ llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; llvmpipe->pipe.set_index_buffer = llvmpipe_set_index_buffer; + + llvmpipe->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/noop/noop_state.c b/src/gallium/drivers/noop/noop_state.c index ad324774c03..00a4c1eb01e 100644 --- a/src/gallium/drivers/noop/noop_state.c +++ b/src/gallium/drivers/noop/noop_state.c @@ -28,6 +28,7 @@ #include #include #include +#include "util/u_transfer.h" static void noop_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { @@ -287,4 +288,5 @@ void noop_init_state_functions(struct pipe_context *ctx) ctx->sampler_view_destroy = noop_sampler_view_destroy; ctx->surface_destroy = noop_surface_destroy; ctx->draw_vbo = noop_draw_vbo; + ctx->redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index b4eda0f617d..ba2c3e8c281 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -23,6 +23,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" @@ -886,5 +887,6 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers; nv50->pipe.set_index_buffer = nv50_set_index_buffer; + nv50->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index f6a7f824d58..7fb91b1191d 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -22,6 +22,7 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" @@ -934,5 +935,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) nvc0->pipe.delete_stream_output_state = nvc0_tfb_state_delete; nvc0->pipe.bind_stream_output_state = nvc0_tfb_state_bind; nvc0->pipe.set_stream_output_buffers = nvc0_set_transform_feedback_buffers; + + nvc0->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 01dacb43dad..b72379d6536 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -2,6 +2,7 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/u_transfer.h" #include "translate/translate.h" #include "nvfx_context.h" @@ -631,4 +632,6 @@ nvfx_init_vbo_functions(struct nvfx_context *nvfx) nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create; nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete; nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; + + nvfx->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 242f883314e..1ec942854ff 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -28,6 +28,7 @@ #include "util/u_mm.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" @@ -1843,6 +1844,7 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.set_vertex_buffers = r300_set_vertex_buffers; r300->context.set_index_buffer = r300_set_index_buffer; + r300->context.redefine_user_buffer = u_default_redefine_user_buffer; r300->context.create_vertex_elements_state = r300_create_vertex_elements_state; r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 88dcc9ba544..89e2d06abdd 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -905,6 +905,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view; rctx->context.set_viewport_state = evergreen_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; + rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; } void evergreen_init_config(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 01c59072a26..43cba667de9 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -37,6 +37,7 @@ #include #include #include +#include "util/u_transfer.h" #include #include "r600.h" #include "r600d.h" @@ -941,6 +942,7 @@ void r600_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view; rctx->context.set_viewport_state = r600_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; + rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; } void r600_init_config(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index 94e57e40f86..3aefb5b3bb5 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -987,6 +987,19 @@ rbug_context_transfer_inline_write(struct pipe_context *_context, } +static void rbug_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct rbug_context *rb_pipe = rbug_context(_context); + struct rbug_resource *rb_resource = rbug_resource(_resource); + struct pipe_context *context = rb_pipe->pipe; + struct pipe_resource *resource = rb_resource->resource; + + context->redefine_user_buffer(context, resource, offset, size); +} + + struct pipe_context * rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) { @@ -1072,6 +1085,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.transfer_unmap = rbug_context_transfer_unmap; rb_pipe->base.transfer_flush_region = rbug_context_transfer_flush_region; rb_pipe->base.transfer_inline_write = rbug_context_transfer_inline_write; + rb_pipe->base.redefine_user_buffer = rbug_redefine_user_buffer; rb_pipe->pipe = pipe; diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 5f4d661abde..aa0b333c7a9 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -34,6 +34,7 @@ #include "util/u_memory.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" #include "draw/draw_context.h" @@ -119,4 +120,5 @@ softpipe_init_vertex_funcs(struct pipe_context *pipe) pipe->set_vertex_buffers = softpipe_set_vertex_buffers; pipe->set_index_buffer = softpipe_set_index_buffer; + pipe->redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 86c79459f3e..6bf37fbbbaf 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -27,6 +27,7 @@ #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" #include "svga_screen.h" @@ -131,6 +132,7 @@ void svga_init_vertex_functions( struct svga_context *svga ) svga->pipe.create_vertex_elements_state = svga_create_vertex_elements_state; svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state; svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state; + svga->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index eaabae8ce42..d24cc623c2e 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1419,6 +1419,28 @@ trace_context_transfer_inline_write(struct pipe_context *_context, } +static void trace_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct trace_context *tr_context = trace_context(_context); + struct trace_resource *tr_tex = trace_resource(_resource); + struct pipe_context *context = tr_context->pipe; + struct pipe_resource *resource = tr_tex->resource; + + assert(resource->screen == context->screen); + + trace_dump_call_begin("pipe_context", "redefine_user_buffer"); + + trace_dump_arg(ptr, context); + trace_dump_arg(ptr, resource); + trace_dump_arg(uint, offset); + trace_dump_arg(uint, size); + + trace_dump_call_end(); + + context->redefine_user_buffer(context, resource, offset, size); +} static const struct debug_named_value rbug_blocker_flags[] = { @@ -1506,6 +1528,7 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.transfer_unmap = trace_context_transfer_unmap; tr_ctx->base.transfer_flush_region = trace_context_transfer_flush_region; tr_ctx->base.transfer_inline_write = trace_context_transfer_inline_write; + tr_ctx->base.redefine_user_buffer = trace_redefine_user_buffer; tr_ctx->pipe = pipe; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 589cac2ddd3..24ee3fe1175 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -399,6 +399,14 @@ struct pipe_context { unsigned stride, unsigned layer_stride); + + /* Notify a driver that a content of a user buffer has been changed. + * The changed range is [offset, offset+size-1]. + * The new width0 of the buffer is offset+size. */ + void (*redefine_user_buffer)(struct pipe_context *, + struct pipe_resource *, + unsigned offset, + unsigned size); }; diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index dccbff3c1db..7a19f35bbf5 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -203,6 +203,11 @@ static void st_destroy_context_priv( struct st_context *st ) st_destroy_drawpix(st); st_destroy_drawtex(st); + /* Unreference any user vertex buffers. */ + for (i = 0; i < st->num_user_vbs; i++) { + pipe_resource_reference(&st->user_vb[i], NULL); + } + for (i = 0; i < Elements(st->state.sampler_views); i++) { pipe_sampler_view_reference(&st->state.sampler_views[i], NULL); } diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 64a8f790e22..77765f02379 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -185,6 +185,11 @@ struct st_context int force_msaa; void *winsys_drawable_handle; + + /* User vertex buffers. */ + struct pipe_resource *user_vb[PIPE_MAX_ATTRIBS]; + unsigned user_vb_stride[PIPE_MAX_ATTRIBS]; + unsigned num_user_vbs; }; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 34f75a37969..830e3e3c1bb 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -330,6 +330,11 @@ setup_interleaved_attribs(struct gl_context *ctx, stride * (max_index + 1), PIPE_BIND_VERTEX_BUFFER); vbuffer->buffer_offset = 0; + + /* Track user vertex buffers. */ + pipe_resource_reference(&st->user_vb[0], vbuffer->buffer); + st->user_vb_stride[0] = stride; + st->num_user_vbs = 1; } vbuffer->stride = stride; /* in bytes */ } @@ -405,6 +410,11 @@ setup_non_interleaved_attribs(struct gl_context *ctx, } vbuffer[attr].buffer_offset = 0; + + /* Track user vertex buffers. */ + pipe_resource_reference(&st->user_vb[attr], vbuffer->buffer); + st->user_vb_stride[attr] = stride; + st->num_user_vbs = MAX2(st->num_user_vbs, attr+1); } /* common-case setup */ @@ -538,12 +548,20 @@ st_validate_varrays(struct gl_context *ctx, struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; unsigned num_vbuffers, num_velements; GLuint attr; + unsigned i; /* must get these after state validation! */ vp = st->vp; vpv = st->vp_variant; memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs); + + /* Unreference any user vertex buffers. */ + for (i = 0; i < st->num_user_vbs; i++) { + pipe_resource_reference(&st->user_vb[i], NULL); + } + st->num_user_vbs = 0; + /* * Setup the vbuffer[] and velements[] arrays. */ @@ -646,6 +664,26 @@ st_draw_vbo(struct gl_context *ctx, #endif } + /* Notify the driver that the content of user buffers may have been + * changed. */ + if (!new_array && st->num_user_vbs) { + for (i = 0; i < st->num_user_vbs; i++) { + if (st->user_vb[i]) { + unsigned stride = st->user_vb_stride[i]; + + if (stride) { + pipe->redefine_user_buffer(pipe, st->user_vb[i], + min_index * stride, + (max_index + 1 - min_index) * stride); + } else { + /* stride == 0 */ + pipe->redefine_user_buffer(pipe, st->user_vb[i], + 0, st->user_vb[i]->width0); + } + } + } + } + setup_index_buffer(ctx, ib, &ibuffer); pipe->set_index_buffer(pipe, &ibuffer); -- cgit v1.2.3 From cd8af3b60bf1ab7ec157042d6a63fb7ece25d954 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Tue, 15 Feb 2011 00:25:08 -0800 Subject: nvc0: Fix uninitialized variable warning. Fixes this GCC warning. nvc0_tgsi_to_nc.c: In function 'bld_tex': nvc0_tgsi_to_nc.c:1392: warning: 'dim' may be used uninitialized in this function --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 72bfcd0c95f..687def0344d 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1175,7 +1175,7 @@ static INLINE void describe_texture_target(unsigned target, int *dim, int *array, int *cube, int *shadow) { - *array = *cube = *shadow = 0; + *dim = *array = *cube = *shadow = 0; switch (target) { case TGSI_TEXTURE_1D: -- cgit v1.2.3 From a3c62afa7c7f3435b3c28bee417e652c9bb018e6 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 15 Feb 2011 19:12:41 +0100 Subject: nvc0: fix user vertex buffer updates --- src/gallium/drivers/nvc0/nvc0_buffer.c | 19 ++++++++++++------- src/gallium/drivers/nvc0/nvc0_resource.h | 3 +++ src/gallium/drivers/nvc0/nvc0_screen.h | 6 ++++-- src/gallium/drivers/nvc0/nvc0_vbo.c | 25 +++++++++++++++++++++++-- 4 files changed, 42 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c index ea3e642a448..f16671ac7ff 100644 --- a/src/gallium/drivers/nvc0/nvc0_buffer.c +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c @@ -59,15 +59,23 @@ release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence) (*mm) = NULL; } -static INLINE boolean -nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf, - unsigned domain) +INLINE void +nvc0_buffer_release_gpu_storage(struct nvc0_resource *buf) { nouveau_bo_ref(NULL, &buf->bo); if (buf->mm) release_allocation(&buf->mm, buf->fence); + buf->domain = 0; +} + +static INLINE boolean +nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf, + unsigned domain) +{ + nvc0_buffer_release_gpu_storage(buf); + return nvc0_buffer_allocate(screen, buf, domain); } @@ -77,10 +85,7 @@ nvc0_buffer_destroy(struct pipe_screen *pscreen, { struct nvc0_resource *res = nvc0_resource(presource); - nouveau_bo_ref(NULL, &res->bo); - - if (res->mm) - release_allocation(&res->mm, res->fence); + nvc0_buffer_release_gpu_storage(res); if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY)) FREE(res->data); diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index 17e79642a6d..709e6157f55 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -51,6 +51,9 @@ struct nvc0_resource { struct nvc0_mm_allocation *mm; }; +void +nvc0_buffer_release_gpu_storage(struct nvc0_resource *); + boolean nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *, unsigned start, unsigned size); diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 1fac142e2be..3b676fd21a1 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -128,9 +128,11 @@ nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) { struct nvc0_screen *screen = nvc0_screen(res->base.screen); - nouveau_bo_validate(screen->base.channel, res->bo, flags); + if (likely(res->bo)) { + nouveau_bo_validate(screen->base.channel, res->bo, flags); - nvc0_resource_fence(res, flags); + nvc0_resource_fence(res, flags); + } } diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 80e05823759..fb135725c3f 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -171,12 +171,15 @@ nvc0_prevalidate_vbufs(struct nvc0_context *nvc0) nvc0->vbo_fifo = nvc0->vbo_user = 0; + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX); + for (i = 0; i < nvc0->num_vtxbufs; ++i) { vb = &nvc0->vtxbuf[i]; if (!vb->stride) continue; buf = nvc0_resource(vb->buffer); + /* NOTE: user buffers with temporary storage count as mapped by GPU */ if (!nvc0_resource_mapped_by_gpu(vb->buffer)) { if (nvc0->vbo_push_hint) { nvc0->vbo_fifo = ~0; @@ -230,14 +233,27 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0) MARK_RING (chan, 6, 4); BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); OUT_RING (chan, i); - OUT_RESRCh(chan, buf, size - 1, NOUVEAU_BO_RD); - OUT_RESRCl(chan, buf, size - 1, NOUVEAU_BO_RD); + OUT_RESRCh(chan, buf, base + size - 1, NOUVEAU_BO_RD); + OUT_RESRCl(chan, buf, base + size - 1, NOUVEAU_BO_RD); OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD); OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD); } nvc0->vbo_dirty = TRUE; } +static INLINE void +nvc0_release_user_vbufs(struct nvc0_context *nvc0) +{ + uint32_t vbo_user = nvc0->vbo_user; + + while (vbo_user) { + int i = ffs(vbo_user) - 1; + vbo_user &= ~(1 << i); + + nvc0_buffer_release_gpu_storage(nvc0_resource(nvc0->vtxbuf[i].buffer)); + } +} + void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) { @@ -564,6 +580,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvc0->vbo_min_index = info->min_index; nvc0->vbo_max_index = info->max_index; + if (nvc0->vbo_push_hint != !!nvc0->vbo_fifo) + nvc0->dirty |= NVC0_NEW_ARRAYS; + if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS))) nvc0_update_user_vbufs(nvc0); @@ -621,4 +640,6 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start, info->count, info->instance_count, info->index_bias); } + + nvc0_release_user_vbufs(nvc0); } -- cgit v1.2.3 From 1b4c0c8ea0b4e6065f23f9f2bbb954a7bd2549e4 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 12 Feb 2011 18:27:47 +0100 Subject: nvc0: update the set of formats supported by the 2D engine --- src/gallium/drivers/nvc0/nvc0_surface.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index cc0a65687dc..faa51769313 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -33,25 +33,15 @@ #include "nv50_defs.xml.h" +#define NVC0_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL + /* return TRUE for formats that can be converted among each other by NVC0_2D */ static INLINE boolean nvc0_2d_format_faithful(enum pipe_format format) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_B8G8R8X8_SRGB: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - return TRUE; - default: - return FALSE; - } + uint8_t id = nvc0_format_table[format].rt; + + return (id >= 0xc0) && (NVC0_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))); } static INLINE uint8_t @@ -62,7 +52,7 @@ nvc0_2d_format(enum pipe_format format) /* Hardware values for color formats range from 0xc0 to 0xff, * but the 2D engine doesn't support all of them. */ - if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0)))) + if (nvc0_2d_format_faithful(format)) return id; switch (util_format_get_blocksize(format)) { @@ -72,6 +62,10 @@ nvc0_2d_format(enum pipe_format format) return NV50_SURFACE_FORMAT_R16_UNORM; case 4: return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM; + case 8: + return NV50_SURFACE_FORMAT_R16G16B16A16_UNORM; + case 16: + return NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT; default: return 0; } -- cgit v1.2.3 From bf1ce9c64b3da731bc6073055abc9f3340ac5a17 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 12 Feb 2011 18:50:03 +0100 Subject: nvc0: use format from the template on surface creation Fixes piglit/fbo-srgb. --- src/gallium/drivers/nvc0/nvc0_miptree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c index 22f48c8a5fe..ea3ed9e0225 100644 --- a/src/gallium/drivers/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c @@ -283,7 +283,7 @@ nvc0_miptree_surface_new(struct pipe_context *pipe, pipe_reference_init(&ps->reference, 1); pipe_resource_reference(&ps->texture, pt); ps->context = pipe; - ps->format = pt->format; + ps->format = templ->format; ps->usage = templ->usage; ps->u.tex.level = templ->u.tex.level; ps->u.tex.first_layer = templ->u.tex.first_layer; -- cgit v1.2.3 From 17d680cc537acf8a967d9e36f7006afab560122a Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 12 Feb 2011 19:18:19 +0100 Subject: nvc0: force vertex data through FIFO if we need to convert it We may want to put the converted vertex buffer in persistent storage instead, but these are rare corner cases. --- src/gallium/drivers/nvc0/nvc0_stateobj.h | 1 + src/gallium/drivers/nvc0/nvc0_vbo.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index 752e927e2aa..5b15e286751 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -67,6 +67,7 @@ struct nvc0_vertex_stateobj { unsigned num_elements; uint32_t instance_elts; uint32_t instance_bufs; + boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */ unsigned vtx_size; unsigned vtx_per_packet_max; struct nvc0_vertex_element element[0]; diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index fb135725c3f..19fd85273c1 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -60,6 +60,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, so->num_elements = num_elements; so->instance_elts = 0; so->instance_bufs = 0; + so->need_conversion = FALSE; transkey.nr_elements = 0; transkey.output_stride = 0; @@ -83,6 +84,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, return NULL; } so->element[i].state = nvc0_format_table[fmt].vtx; + so->need_conversion = TRUE; } so->element[i].state |= i; @@ -263,7 +265,12 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) struct nvc0_vertex_element *ve; unsigned i; - nvc0_prevalidate_vbufs(nvc0); + if (unlikely(vertex->need_conversion)) { + nvc0->vbo_fifo = ~0; + nvc0->vbo_user = 0; + } else { + nvc0_prevalidate_vbufs(nvc0); + } BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements); for (i = 0; i < vertex->num_elements; ++i) { -- cgit v1.2.3 From 80a7ae3cc5735b7615c049425b306a53662740a9 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 12 Feb 2011 22:02:26 +0100 Subject: nvc0: disable early fragment tests if KIL is used Early-Z pass raises the occlusion counter. --- src/gallium/drivers/nvc0/nvc0_program.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 613dc431bfd..f7ea97ddb1d 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -633,7 +633,7 @@ nvc0_prog_scan(struct nvc0_translation_info *ti) if (ti->scan.writes_z) prog->flags[0] = 0x11; /* ? */ else - if (!ti->global_stores) + if (!ti->scan.uses_kill && !ti->global_stores) prog->fp.early_z = 1; ret = nvc0_fp_gen_header(prog, ti); -- cgit v1.2.3 From a24e9bd497d54a373b021370f90144596a37945b Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 13 Feb 2011 00:17:43 +0100 Subject: nvc0: clone memory values with multiple refs before modifying them --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 9a7094e5d3c..53010f8bd50 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -732,7 +732,8 @@ struct pass_reld_elim { * The two loads may not overlap but reference adjacent memory locations. */ static void -combine_load(struct mem_record *rec, struct nv_instruction *ld) +combine_load(struct nv_pc *pc, struct mem_record *rec, + struct nv_instruction *ld) { struct nv_instruction *fv = rec->insn; struct nv_value *mem = ld->src[0]->value; @@ -760,6 +761,8 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld) fv->def[d++]->insn = fv; } + if (fv->src[0]->value->refc > 1) + nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value)); fv->src[0]->value->reg.address = rec->ofst; fv->src[0]->value->reg.size = rec->size = size; @@ -841,7 +844,7 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) switch (ld->opcode) { case NV_OP_EXPORT: combine_export(it, ld); break; default: - combine_load(it, ld); + combine_load(ctx->pc, it, ld); break; } } else -- cgit v1.2.3 From 293a8d1b600cd5bd89b3c4c0b6c2bb245d9bd80f Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 14 Feb 2011 02:04:58 +0100 Subject: nvc0: front stencil mask and func mask methods are swapped --- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 6 +++--- src/gallium/drivers/nvc0/nvc0_state.c | 12 +++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 4b1325a3043..59da15ed430 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -575,7 +575,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004 #define NVC0_3D_BLEND_ENABLE__LEN 0x00000008 -#define NVC0_3D_STENCIL_FRONT_ENABLE 0x00001380 +#define NVC0_3D_STENCIL_ENABLE 0x00001380 #define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384 #define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 @@ -619,9 +619,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394 -#define NVC0_3D_STENCIL_FRONT_MASK 0x00001398 +#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x00001398 -#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c +#define NVC0_3D_STENCIL_FRONT_MASK 0x0000139c #define NVC0_3D_DRAW_TFB_BASE 0x000013a4 diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index 7fb91b1191d..7f59d40c5fb 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -284,20 +284,21 @@ nvc0_zsa_state_create(struct pipe_context *pipe, } if (cso->stencil[0].enabled) { - SB_BEGIN_3D(so, STENCIL_FRONT_ENABLE, 5); + SB_BEGIN_3D(so, STENCIL_ENABLE, 5); SB_DATA (so, 1); SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op)); SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func)); - SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2); - SB_DATA (so, cso->stencil[0].writemask); + SB_BEGIN_3D(so, STENCIL_FRONT_FUNC_MASK, 2); SB_DATA (so, cso->stencil[0].valuemask); + SB_DATA (so, cso->stencil[0].writemask); } else { - SB_IMMED_3D(so, STENCIL_FRONT_ENABLE, 0); + SB_IMMED_3D(so, STENCIL_ENABLE, 0); } if (cso->stencil[1].enabled) { + assert(cso->stencil[0].enabled); SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5); SB_DATA (so, 1); SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op)); @@ -307,7 +308,8 @@ nvc0_zsa_state_create(struct pipe_context *pipe, SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2); SB_DATA (so, cso->stencil[1].writemask); SB_DATA (so, cso->stencil[1].valuemask); - } else { + } else + if (cso->stencil[0].enabled) { SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0); } -- cgit v1.2.3 From 19f2272e94895cf241f6b05117535e008e07d0a7 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 14 Feb 2011 14:10:41 +0100 Subject: nvc0: demagic the clear flags and fix region clears The CLIP_RECTs always affect dedicated clears, and it's nicer than having to mark the viewport or scissor state dirty after it. --- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 60 ++++++++++++++++++++++----------- src/gallium/drivers/nvc0/nvc0_screen.c | 14 +++++--- src/gallium/drivers/nvc0/nvc0_surface.c | 20 ++++++----- 3 files changed, 62 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 59da15ed430..7352aa1e99f 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -230,21 +230,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010 #define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ(i0) (0x00000d00 + 0x8*(i0)) -#define NVC0_3D_VIEWPORT_CLIP_HORIZ__ESIZE 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ__LEN 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__MASK 0x0000ffff -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__SHIFT 0 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__MASK 0xffff0000 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__SHIFT 16 - -#define NVC0_3D_VIEWPORT_CLIP_VERT(i0) (0x00000d04 + 0x8*(i0)) -#define NVC0_3D_VIEWPORT_CLIP_VERT__ESIZE 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_VERT__LEN 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__MASK 0x0000ffff -#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__SHIFT 0 -#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__MASK 0xffff0000 -#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__SHIFT 16 +#define NVC0_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16 + +#define NVC0_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_VERT__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_VERT_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_VERT_MAX__SHIFT 16 #define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0)) #define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008 @@ -370,6 +370,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff #define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0 +#define NVC0_3D_CLEAR_FLAGS 0x000010f8 +#define NVC0_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001 +#define NVC0_3D_CLEAR_FLAGS_UNK4 0x00000010 +#define NVC0_3D_CLEAR_FLAGS_SCISSOR 0x00000100 +#define NVC0_3D_CLEAR_FLAGS_VIEWPORT 0x00001000 + #define NVC0_3D_VERTEX_ID 0x00001118 #define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c @@ -656,6 +662,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_CLIPID_HEIGHT 0x00001504 #define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ 0x00001508 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16 + +#define NVC0_3D_CLIPID_FILL_RECT_VERT 0x0000150c +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16 + #define NVC0_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510 #define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001 #define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002 @@ -954,12 +972,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12 0x00001000 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13 0x00002000 -#define NVC0_3D_VIEWPORT_CLIP_RECTS_EN 0x0000194c +#define NVC0_3D_CLIP_RECTS_EN 0x0000194c -#define NVC0_3D_VIEWPORT_CLIP_MODE 0x00001950 -#define NVC0_3D_VIEWPORT_CLIP_MODE_INSIDE_ANY 0x00000000 -#define NVC0_3D_VIEWPORT_CLIP_MODE_OUTSIDE_ALL 0x00000001 -#define NVC0_3D_VIEWPORT_CLIP_MODE_NEVER 0x00000002 +#define NVC0_3D_CLIP_RECTS_MODE 0x00001950 +#define NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000 +#define NVC0_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001 +#define NVC0_3D_CLIP_RECTS_MODE_NEVER 0x00000002 #define NVC0_3D_FP_ZORDER_CTRL 0x0000196c #define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001 @@ -1012,6 +1030,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00 #define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10 +#define NVC0_3D_CLIPID_FILL 0x000019d4 + #define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0)) #define NVC0_3D_COLOR_MASK__ESIZE 0x00000004 #define NVC0_3D_COLOR_MASK__LEN 0x00000008 diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 88daf31d46a..54510696dc0 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -283,9 +283,6 @@ nvc0_magic_3d_init(struct nouveau_channel *chan) BEGIN_RING(chan, RING_3D_(0x074c), 1); OUT_RING (chan, 0x3f); - BEGIN_RING(chan, RING_3D_(0x10f8), 1); - OUT_RING (chan, 0x0101); - BEGIN_RING(chan, RING_3D_(0x16a8), 1); OUT_RING (chan, (3 << 16) | 3); BEGIN_RING(chan, RING_3D_(0x1794), 1); @@ -534,11 +531,20 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */ OUT_RING (chan, 0x3f); - BEGIN_RING(chan, RING_3D(VIEWPORT_CLIP_RECTS_EN), 1); + BEGIN_RING(chan, RING_3D(CLIP_RECTS_MODE), 1); + OUT_RING (chan, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 8 * 2); + for (i = 0; i < 8 * 2; ++i) + OUT_RING(chan, 0); + BEGIN_RING(chan, RING_3D(CLIP_RECTS_EN), 1); OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1); OUT_RING (chan, 0); + /* neither scissors, viewport nor stencil mask should affect clears */ + BEGIN_RING(chan, RING_3D(CLEAR_FLAGS), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); OUT_RING (chan, 1); BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index faa51769313..8898bc733a3 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -243,15 +243,16 @@ nvc0_clear_render_target(struct pipe_context *pipe, OUT_RING (chan, 1); OUT_RING (chan, 0); - /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ - - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, (width << 16) | dstx); - OUT_RING (chan, (height << 16) | dsty); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2); + OUT_RING (chan, ((dstx + width) << 16) | dstx); + OUT_RING (chan, ((dsty + height) << 16) | dsty); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1); BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); OUT_RING (chan, 0x3c); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0); + nv50->dirty |= NVC0_NEW_FRAMEBUFFER; } @@ -300,13 +301,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, OUT_RING (chan, sf->height); OUT_RING (chan, (1 << 16) | 1); - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, (width << 16) | dstx); - OUT_RING (chan, (height << 16) | dsty); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2); + OUT_RING (chan, ((dstx + width) << 16) | dstx); + OUT_RING (chan, ((dsty + height) << 16) | dsty); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1); BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); OUT_RING (chan, mode); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0); + nv50->dirty |= NVC0_NEW_FRAMEBUFFER; } -- cgit v1.2.3 From e7845e319679e3539274c37e9c16692a2dfe59fe Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 15 Feb 2011 14:41:20 +0100 Subject: nvc0: fix clipping and use VIEWPORT instead of SCISSOR --- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 18 +++-- src/gallium/drivers/nvc0/nvc0_screen.c | 2 + src/gallium/drivers/nvc0/nvc0_state.c | 6 +- src/gallium/drivers/nvc0/nvc0_state_validate.c | 100 ++++++++++--------------- src/gallium/drivers/nvc0/nvc0_stateobj.h | 2 - 5 files changed, 58 insertions(+), 70 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 7352aa1e99f..73a605f94e1 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -962,15 +962,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK3 0x00000008 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK4 0x00000010 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK 0x00000006 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT 1 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 0x00000002 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK2 0x00000004 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12 0x00001000 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13 0x00002000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000 #define NVC0_3D_CLIP_RECTS_EN 0x0000194c diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 54510696dc0..321d86bdf1a 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -550,6 +550,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); OUT_RINGf (chan, 0.0f); OUT_RINGf (chan, 1.0f); + BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); + OUT_RING (chan, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1); /* We use scissors instead of exact view volume clipping, * so they're always enabled. diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index 7f59d40c5fb..ae5f335f9f2 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -177,9 +177,9 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, return NULL; so->pipe = *cso; -#ifndef NVC0_SCISSORS_CLIPPING - SB_IMMED_3D(so, SCISSOR_ENABLE(0), cso->scissor); -#endif + /* Scissor enables are handled in scissor state, we will not want to + * always emit 16 commands, one for each scissor rectangle, here. + */ SB_BEGIN_3D(so, SHADE_MODEL, 1); SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT : diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 7406f6c7917..96c1198d4cb 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -117,12 +117,6 @@ nvc0_validate_fb(struct nvc0_context *nvc0) BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); OUT_RING (chan, 0); } - -#ifndef NVC0_SCISSORS_CLIPPING - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, fb->width << 16); - OUT_RING (chan, fb->height << 16); -#endif } static void @@ -164,65 +158,54 @@ nvc0_validate_scissor(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; struct pipe_scissor_state *s = &nvc0->scissor; -#ifdef NVC0_SCISSORS_CLIPPING - struct pipe_viewport_state *vp = &nvc0->viewport; - int minx, maxx, miny, maxy; - if (!(nvc0->dirty & - (NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | NVC0_NEW_FRAMEBUFFER)) && - nvc0->state.scissor == nvc0->rast->pipe.scissor) + if (!(nvc0->dirty & NVC0_NEW_SCISSOR) && + nvc0->rast->pipe.scissor == nvc0->state.scissor) return; nvc0->state.scissor = nvc0->rast->pipe.scissor; - if (nvc0->state.scissor) { - minx = s->minx; - maxx = s->maxx; - miny = s->miny; - maxy = s->maxy; + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); + if (nvc0->rast->pipe.scissor) { + OUT_RING(chan, (s->maxx << 16) | s->minx); + OUT_RING(chan, (s->maxy << 16) | s->miny); } else { - minx = 0; - maxx = nvc0->framebuffer.width; - miny = 0; - maxy = nvc0->framebuffer.height; + OUT_RING(chan, (0xffff << 16) | 0); + OUT_RING(chan, (0xffff << 16) | 0); } - - minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0]))); - maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0]))); - miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1]))); - maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1]))); - - BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); - OUT_RING (chan, (maxx << 16) | minx); - OUT_RING (chan, (maxy << 16) | miny); - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, ((maxx - minx) << 16) | minx); - OUT_RING (chan, ((maxy - miny) << 16) | miny); -#else - BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); - OUT_RING (chan, (s->maxx << 16) | s->minx); - OUT_RING (chan, (s->maxy << 16) | s->miny); -#endif } static void nvc0_validate_viewport(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; + struct pipe_viewport_state *vp = &nvc0->viewport; + int x, y, w, h; + float zmin, zmax; BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3); - OUT_RINGf (chan, nvc0->viewport.translate[0]); - OUT_RINGf (chan, nvc0->viewport.translate[1]); - OUT_RINGf (chan, nvc0->viewport.translate[2]); + OUT_RINGf (chan, vp->translate[0]); + OUT_RINGf (chan, vp->translate[1]); + OUT_RINGf (chan, vp->translate[2]); BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3); - OUT_RINGf (chan, nvc0->viewport.scale[0]); - OUT_RINGf (chan, nvc0->viewport.scale[1]); - OUT_RINGf (chan, nvc0->viewport.scale[2]); + OUT_RINGf (chan, vp->scale[0]); + OUT_RINGf (chan, vp->scale[1]); + OUT_RINGf (chan, vp->scale[2]); -#ifdef NVC0_SCISSORS_CLIPPING + /* now set the viewport rectangle to viewport dimensions for clipping */ + + x = (int)(vp->translate[0] - fabsf(vp->scale[0])); + y = (int)(vp->translate[1] - fabsf(vp->scale[1])); + w = (int)fabsf(2.0f * vp->scale[0]); + h = (int)fabsf(2.0f * vp->scale[1]); + zmin = vp->translate[2] - fabsf(vp->scale[2]); + zmax = vp->translate[2] + fabsf(vp->scale[2]); + + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); + OUT_RING (chan, (w << 16) | x); + OUT_RING (chan, (h << 16) | y); BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); - OUT_RINGf (chan, nvc0->viewport.translate[2] - nvc0->viewport.scale[2]); - OUT_RINGf (chan, nvc0->viewport.translate[2] + nvc0->viewport.scale[2]); -#endif + OUT_RINGf (chan, zmin); + OUT_RINGf (chan, zmax); } static void @@ -231,10 +214,15 @@ nvc0_validate_clip(struct nvc0_context *nvc0) struct nouveau_channel *chan = nvc0->screen->base.channel; uint32_t clip; - clip = nvc0->clip.depth_clamp ? 0x201a : 0x0002; -#ifndef NVC0_SCISSORS_CLIPPING - clip |= 0x1080; -#endif + if (nvc0->clip.depth_clamp) { + clip = + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2; + } else { + clip = NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1; + } BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); OUT_RING (chan, clip); @@ -418,13 +406,7 @@ static struct state_validate { { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR }, { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF }, { nvc0_validate_stipple, NVC0_NEW_STIPPLE }, -#ifdef NVC0_SCISSORS_CLIPPING - { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | - NVC0_NEW_RASTERIZER | - NVC0_NEW_FRAMEBUFFER }, -#else - { nvc0_validate_scissor, NVC0_NEW_SCISSOR }, -#endif + { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER }, { nvc0_validate_viewport, NVC0_NEW_VIEWPORT }, { nvc0_validate_clip, NVC0_NEW_CLIP }, { nvc0_vertprog_validate, NVC0_NEW_VERTPROG }, diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index 5b15e286751..57566128ab5 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -4,8 +4,6 @@ #include "pipe/p_state.h" -#define NVC0_SCISSORS_CLIPPING - #define SB_BEGIN_3D(so, m, s) \ (so)->state[(so)->size++] = \ (0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2) -- cgit v1.2.3 From 2fa35eedd9dbc193904256e6004913e94a044158 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 15 Feb 2011 21:36:57 +0100 Subject: nvc0: add missing break statements in constant_operand --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 53010f8bd50..12a31493c75 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -544,6 +544,7 @@ constant_operand(struct nv_pc *pc, nv_reference(pc, nvi, s, nvi->src[t]->value); nvi->src[s]->mod = nvi->src[t]->mod; } + break; case NV_OP_ADD_F32: if (u.u32 == 0) { switch (nvi->src[t]->mod) { @@ -563,6 +564,7 @@ constant_operand(struct nv_pc *pc, if (nvi->opcode != NV_OP_CVT) nvi->src[0]->mod = 0; } + break; case NV_OP_ADD_B32: if (u.u32 == 0) { assert(nvi->src[t]->mod == 0); -- cgit v1.2.3 From bb2c8e709975223e8131b7c627c08eeb4f3ec88d Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 16 Feb 2011 11:42:28 +0100 Subject: nvc0: don't swap sources if either value is not in a GPR The memory / immediate source should already be in the only valid position. --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 12a31493c75..3d03400518b 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -245,6 +245,9 @@ check_swap_src_0_1(struct nv_instruction *nvi) return; assert(src0 && src1 && src0->value && src1->value); + if (src1->value->reg.file != NV_FILE_GPR) + return; + if (is_cspace_load(src0->value->insn)) { if (!is_cspace_load(src1->value->insn)) { nvi->src[0] = src1; -- cgit v1.2.3 From 3f1361e060822c369f3b375bc695c9e65db59c29 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 16 Feb 2011 11:57:00 +0100 Subject: nvc0: fix emit_dfdx,dfdy --- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index b2a80566a02..c10f920e6f1 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -479,6 +479,7 @@ emit_ddx(struct nv_pc *pc, struct nv_instruction *i) { i->quadop = 0x99; i->lanes = 4; + i->src[1] = i->src[0]; emit_quadop(pc, i); } @@ -487,6 +488,7 @@ emit_ddy(struct nv_pc *pc, struct nv_instruction *i) { i->quadop = 0xa5; i->lanes = 5; + i->src[1] = i->src[0]; emit_quadop(pc, i); } -- cgit v1.2.3 From 3903e25a2cd6c198581021242897b1952d2afea0 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 16 Feb 2011 15:41:32 +0100 Subject: nvc0: fix blend factor mapping --- src/gallium/drivers/nvc0/nvc0_state.c | 57 ++++++++++++++++------------------- 1 file changed, 26 insertions(+), 31 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index ae5f335f9f2..666e3802979 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -51,40 +51,35 @@ nvc0_colormask(unsigned mask) return ret; } +#define NVC0_BLEND_FACTOR_CASE(a, b) \ + case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b + static INLINE uint32_t nvc0_blend_fac(unsigned factor) { - static const uint16_t bf[] = { - NV50_3D_BLEND_FACTOR_ZERO, /* 0x00 */ - NV50_3D_BLEND_FACTOR_ONE, - NV50_3D_BLEND_FACTOR_SRC_COLOR, - NV50_3D_BLEND_FACTOR_SRC_ALPHA, - NV50_3D_BLEND_FACTOR_DST_ALPHA, - NV50_3D_BLEND_FACTOR_DST_COLOR, - NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE, - NV50_3D_BLEND_FACTOR_CONSTANT_COLOR, - NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA, - NV50_3D_BLEND_FACTOR_SRC1_COLOR, - NV50_3D_BLEND_FACTOR_SRC1_ALPHA, - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0b */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0c */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0d */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0e */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0f */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x10 */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x11 */ - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, - NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, - NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA - }; - - assert(factor < (sizeof(bf) / sizeof(bf[0]))); - return bf[factor]; + switch (factor) { + NVC0_BLEND_FACTOR_CASE(ONE, ONE); + NVC0_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE); + NVC0_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA); + NVC0_BLEND_FACTOR_CASE(ZERO, ZERO); + NVC0_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA); + default: + return NV50_3D_BLEND_FACTOR_ZERO; + } } static void * -- cgit v1.2.3 From 3d190e44dec40650d88256cb074a12ca74d7c31e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 19 Feb 2011 14:14:40 +0100 Subject: nvc0: don't overwrite phi sources at the end of a loop Except the reference to its own result. --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 687def0344d..e2838a0f1d0 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -465,6 +465,7 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) reg = (struct bld_register *)phi->target; phi->target = NULL; + /* start with s == 1, src[0] is from outside the loop */ for (s = 1, n = 0; n < bb->num_in; ++n) { if (bb->in_kind[n] != CFG_EDGE_BACK) continue; @@ -476,8 +477,11 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) for (i = 0; i < 4; ++i) if (phi->src[i] && phi->src[i]->value == val) break; - if (i == 4) + if (i == 4) { + /* skip values we do not want to replace */ + for (; phi->src[s] && phi->src[s]->value != phi->def[0]; ++s); nv_reference(bld->pc, phi, s++, val); + } } bld->pc->current_block = save; -- cgit v1.2.3 From 88066d62ae7ec9c715e195f8ff65a0dc5b64c25e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 19 Feb 2011 14:18:28 +0100 Subject: nvc0: don't visit target blocks of a loop break multiple times --- src/gallium/drivers/nvc0/nvc0_pc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index e0cba05b976..3a3a00f27be 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -189,7 +189,10 @@ nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, bb[p++] = b->out[j]; break; case CFG_EDGE_LOOP_LEAVE: - bbb[pp++] = b->out[j]; + if (!b->out[j]->priv) { + bbb[pp++] = b->out[j]; + b->out[j]->priv = 1; + } break; default: assert(0); -- cgit v1.2.3 From 7d8ff54feb0b590048184bb41e214a511770fd20 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 19 Feb 2011 20:26:29 +0100 Subject: nvc0: fix SSG --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index e2838a0f1d0..18ae0e00c41 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1825,11 +1825,11 @@ bld_instruction(struct bld_context *bld, case TGSI_OPCODE_SSG: FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */ src0 = emit_fetch(bld, insn, 0, c); - src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero); - temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000)); - temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f)); - dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp); - bld_src_predicate(bld, dst0[c]->insn, 1, src1); + src1 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero); + src2 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero); + src1->insn->set_cond = NV_CC_GT; + src2->insn->set_cond = NV_CC_LT; + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2); } break; case TGSI_OPCODE_SUB: -- cgit v1.2.3 From 1579017b08f28d460e17de65bcc8ba17ba695c37 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 19 Feb 2011 20:26:49 +0100 Subject: nvc0: multiply polygon offset units by 2 Wasn't sure if this still was necessary because the piglit test started to fail at some point on nv50 where we already do this. --- src/gallium/drivers/nvc0/nvc0_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index 666e3802979..aa437195764 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -238,7 +238,7 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1); SB_DATA (so, fui(cso->offset_scale)); SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1); - SB_DATA (so, fui(cso->offset_units)); /* XXX: multiply by 2 ? */ + SB_DATA (so, fui(cso->offset_units * 2.0f)); } assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); -- cgit v1.2.3 From 410a13c5ce799fe97a4e4503190d0f66fb2559a3 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 20 Feb 2011 15:10:02 +0100 Subject: nvc0: values for undefined outputs must have file GPR --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 18ae0e00c41..5e208e8623d 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -133,13 +133,10 @@ struct bld_context { static INLINE ubyte bld_register_file(struct bld_context *bld, struct bld_register *reg) { - if (reg < &bld->avs[0][0]) return NV_FILE_GPR; - else - if (reg < &bld->pvs[0][0]) return NV_FILE_GPR; - else - if (reg < &bld->ovs[0][0]) return NV_FILE_PRED; - else - return NV_FILE_MEM_V; + if (reg >= &bld->pvs[0][0] && + reg < &bld->ovs[0][0]) + return NV_FILE_PRED; + return NV_FILE_GPR; } static INLINE struct nv_value * -- cgit v1.2.3 From a6ea37da4bd02241ce3bf522b93dd7ff0757f959 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 20 Feb 2011 17:57:47 +0100 Subject: nvc0: improve userspace fencing Before, there were situations in which we never checked the fences for completion (some loading screens for example) and thus never released memory. --- src/gallium/drivers/nvc0/nvc0_context.c | 22 +++++++++++++------- src/gallium/drivers/nvc0/nvc0_context.h | 2 ++ src/gallium/drivers/nvc0/nvc0_fence.c | 36 +++++++++++++++++++-------------- src/gallium/drivers/nvc0/nvc0_fence.h | 3 ++- src/gallium/drivers/nvc0/nvc0_screen.h | 3 ++- src/gallium/drivers/nvc0/nvc0_vbo.c | 6 ++++-- 6 files changed, 46 insertions(+), 26 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 20c1a31b5b3..f02de4d044a 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -47,15 +47,12 @@ nvc0_flush(struct pipe_context *pipe, unsigned flags, OUT_RING (chan, 0); } - if (fence) { - nvc0_screen_fence_new(nvc0->screen, (struct nvc0_fence **)fence, TRUE); - } + if (fence) + nvc0_fence_reference((struct nvc0_fence **)fence, + nvc0->screen->fence.current); - if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) { + if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) FIRE_RING(chan); - - nvc0_screen_fence_next(nvc0->screen); - } } static void @@ -71,6 +68,16 @@ nvc0_destroy(struct pipe_context *pipe) FREE(nvc0); } +void +nvc0_default_flush_notify(struct nouveau_channel *chan) +{ + struct nvc0_context *nvc0 = chan->user_private; + + nvc0_screen_fence_update(nvc0->screen, TRUE); + + nvc0_screen_fence_next(nvc0->screen); +} + struct pipe_context * nvc0_create(struct pipe_screen *pscreen, void *priv) { @@ -95,6 +102,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) nvc0->pipe.flush = nvc0_flush; screen->base.channel->user_private = nvc0; + screen->base.channel->flush_notify = nvc0_default_flush_notify; nvc0_init_query_functions(nvc0); nvc0_init_surface_functions(nvc0); diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index a082ad4575c..3722f358d89 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -156,6 +156,8 @@ nvc0_surface(struct pipe_surface *ps) /* nvc0_context.c */ struct pipe_context *nvc0_create(struct pipe_screen *, void *); +void nvc0_default_flush_notify(struct nouveau_channel *); + void nvc0_bufctx_emit_relocs(struct nvc0_context *); void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx, struct nvc0_resource *, uint32_t flags); diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c index 3a3dd75c152..f2d4b1451bf 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.c +++ b/src/gallium/drivers/nvc0/nvc0_fence.c @@ -84,7 +84,8 @@ nvc0_fence_del(struct nvc0_fence *fence) struct nvc0_fence *it; struct nvc0_screen *screen = fence->screen; - if (fence->state == NVC0_FENCE_STATE_EMITTED) { + if (fence->state == NVC0_FENCE_STATE_EMITTED || + fence->state == NVC0_FENCE_STATE_FLUSHED) { if (fence == screen->fence.head) { screen->fence.head = fence->next; if (!screen->fence.head) @@ -119,8 +120,8 @@ nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence) fence->buffers = NULL; } -static void -nvc0_screen_fence_update(struct nvc0_screen *screen) +void +nvc0_screen_fence_update(struct nvc0_screen *screen, boolean flushed) { struct nvc0_fence *fence; struct nvc0_fence *next = NULL; @@ -147,38 +148,43 @@ nvc0_screen_fence_update(struct nvc0_screen *screen) screen->fence.head = next; if (!next) screen->fence.tail = NULL; -} -#define NVC0_FENCE_MAX_SPINS (1 << 17) + if (flushed) { + for (fence = next; fence; fence = fence->next) + fence->state = NVC0_FENCE_STATE_FLUSHED; + } +} boolean nvc0_fence_signalled(struct nvc0_fence *fence) { struct nvc0_screen *screen = fence->screen; - if (fence->state == NVC0_FENCE_STATE_EMITTED) - nvc0_screen_fence_update(screen); + if (fence->state >= NVC0_FENCE_STATE_EMITTED) + nvc0_screen_fence_update(screen, FALSE); return fence->state == NVC0_FENCE_STATE_SIGNALLED; } +#define NVC0_FENCE_MAX_SPINS (1 << 31) + boolean nvc0_fence_wait(struct nvc0_fence *fence) { struct nvc0_screen *screen = fence->screen; - int spins = 0; + uint32_t spins = 0; - if (fence->state == NVC0_FENCE_STATE_AVAILABLE) { + if (fence->state < NVC0_FENCE_STATE_EMITTED) { nvc0_fence_emit(fence); - FIRE_RING(screen->base.channel); - if (fence == screen->fence.current) nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); } + if (fence->state < NVC0_FENCE_STATE_FLUSHED) + FIRE_RING(screen->base.channel); do { - nvc0_screen_fence_update(screen); + nvc0_screen_fence_update(screen, FALSE); if (fence->state == NVC0_FENCE_STATE_SIGNALLED) return TRUE; @@ -189,8 +195,9 @@ nvc0_fence_wait(struct nvc0_fence *fence) #endif } while (spins < NVC0_FENCE_MAX_SPINS); - if (spins > 9000) - NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence); + debug_printf("Wait on fence %u (ack = %u, next = %u) timed out !\n", + fence->sequence, + screen->fence.sequence_ack, screen->fence.sequence); return FALSE; } @@ -200,5 +207,4 @@ nvc0_screen_fence_next(struct nvc0_screen *screen) { nvc0_fence_emit(screen->fence.current); nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); - nvc0_screen_fence_update(screen); } diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h index e63c164bda4..3d8c3f8ba60 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.h +++ b/src/gallium/drivers/nvc0/nvc0_fence.h @@ -7,7 +7,8 @@ #define NVC0_FENCE_STATE_AVAILABLE 0 #define NVC0_FENCE_STATE_EMITTED 1 -#define NVC0_FENCE_STATE_SIGNALLED 2 +#define NVC0_FENCE_STATE_FLUSHED 2 +#define NVC0_FENCE_STATE_SIGNALLED 3 struct nvc0_mm_allocation; diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 3b676fd21a1..5af96cbacea 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -138,9 +138,10 @@ nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) boolean nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit); - void nvc0_screen_fence_next(struct nvc0_screen *); +void +nvc0_screen_fence_update(struct nvc0_screen *, boolean flushed); static INLINE boolean nvc0_screen_fence_emit(struct nvc0_screen *screen) diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 19fd85273c1..2db43d8704b 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -371,6 +371,8 @@ nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) { struct nvc0_context *nvc0 = chan->user_private; + nvc0_screen_fence_update(nvc0->screen, TRUE); + nvc0_bufctx_emit_relocs(nvc0); } @@ -398,7 +400,7 @@ nvc0_draw_arrays(struct nvc0_context *nvc0, prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } - chan->flush_notify = NULL; + chan->flush_notify = nvc0_default_flush_notify; } static void @@ -568,7 +570,7 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, } } - chan->flush_notify = NULL; + chan->flush_notify = nvc0_default_flush_notify; } void -- cgit v1.2.3 From 67c7aefea33a7935e42ede30463eb7ca5009b068 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Feb 2011 17:04:49 +0100 Subject: nvc0: sync textures with render targets ourselves Fixes for example piglit/fbo-flushing and nexuiz' bloom effect. --- src/gallium/drivers/nvc0/nvc0_buffer.c | 6 +++--- src/gallium/drivers/nvc0/nvc0_resource.h | 5 +++-- src/gallium/drivers/nvc0/nvc0_screen.h | 5 +++++ src/gallium/drivers/nvc0/nvc0_state_validate.c | 16 ++++++++++++++++ src/gallium/drivers/nvc0/nvc0_tex.c | 7 +++++++ src/gallium/drivers/nvc0/nvc0_winsys.h | 2 +- 6 files changed, 35 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c index f16671ac7ff..aa949bdfa36 100644 --- a/src/gallium/drivers/nvc0/nvc0_buffer.c +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c @@ -117,7 +117,7 @@ nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf, memcpy(buf->data + start, bounce->map, size); nouveau_bo_unmap(bounce); - buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + buf->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING; nouveau_bo_ref(NULL, &bounce); if (mm) @@ -156,7 +156,7 @@ nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf, release_allocation(&mm, nvc0->screen->fence.current); if (start == 0 && size == buf->base.width0) - buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + buf->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING; return TRUE; } @@ -179,7 +179,7 @@ nvc0_buffer_transfer_get(struct pipe_context *pipe, if (buf->domain == NOUVEAU_BO_VRAM) { if (usage & PIPE_TRANSFER_READ) { - if (buf->status & NVC0_BUFFER_STATUS_DIRTY) + if (buf->status & NVC0_BUFFER_STATUS_GPU_WRITING) nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0); } } diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index 709e6157f55..599823c0dc9 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -24,7 +24,8 @@ struct nvc0_context; * USER_MEMORY: resource->data is a pointer to client memory and may change * between GL calls */ -#define NVC0_BUFFER_STATUS_DIRTY (1 << 0) +#define NVC0_BUFFER_STATUS_GPU_READING (1 << 0) +#define NVC0_BUFFER_STATUS_GPU_WRITING (1 << 1) #define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7) /* Resources, if mapped into the GPU's address space, are guaranteed to @@ -90,7 +91,7 @@ nvc0_resource_map_offset(struct nvc0_context *nvc0, nvc0_buffer_adjust_score(nvc0, res, -250); if ((res->domain == NOUVEAU_BO_VRAM) && - (res->status & NVC0_BUFFER_STATUS_DIRTY)) + (res->status & NVC0_BUFFER_STATUS_GPU_WRITING)) nvc0_buffer_download(nvc0, res, 0, res->base.width0); if ((res->domain != NOUVEAU_BO_GART) || diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 5af96cbacea..d952ff1f9b1 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -131,6 +131,11 @@ nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) if (likely(res->bo)) { nouveau_bo_validate(screen->base.channel, res->bo, flags); + if (flags & NOUVEAU_BO_WR) + res->status |= NVC0_BUFFER_STATUS_GPU_WRITING; + if (flags & NOUVEAU_BO_RD) + res->status |= NVC0_BUFFER_STATUS_GPU_READING; + nvc0_resource_fence(res, flags); } } diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 96c1198d4cb..0cc0a0c6236 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -58,6 +58,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) struct nouveau_channel *chan = nvc0->screen->base.channel; struct pipe_framebuffer_state *fb = &nvc0->framebuffer; unsigned i; + boolean serialize = FALSE; nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME); @@ -86,6 +87,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, sf->depth); OUT_RING (chan, mt->layer_stride >> 2); + if (mt->base.status & NVC0_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NVC0_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= ~NVC0_BUFFER_STATUS_GPU_READING; + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); } @@ -111,12 +117,22 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, sf->height); OUT_RING (chan, (unk << 16) | sf->depth); + if (mt->base.status & NVC0_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NVC0_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= ~NVC0_BUFFER_STATUS_GPU_READING; + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); } else { BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); OUT_RING (chan, 0); } + + if (serialize) { + BEGIN_RING(chan, RING_3D(SERIALIZE), 1); + OUT_RING (chan, 0); + } } static void diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c index b219f82c903..968558a5869 100644 --- a/src/gallium/drivers/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nvc0/nvc0_tex.c @@ -196,9 +196,16 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) OUT_RINGp (chan, &tic->tic[3], 5); need_flush = TRUE; + } else + if (res->status & NVC0_BUFFER_STATUS_GPU_WRITING) { + BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); + OUT_RING (chan, (tic->id << 4) | 1); } nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + res->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING; + res->status |= NVC0_BUFFER_STATUS_GPU_READING; + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h index 1544fb7a1de..45f71967eff 100644 --- a/src/gallium/drivers/nvc0/nvc0_winsys.h +++ b/src/gallium/drivers/nvc0/nvc0_winsys.h @@ -95,7 +95,7 @@ OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res, unsigned delta, unsigned flags) { if (flags & NOUVEAU_BO_WR) - res->status |= NVC0_BUFFER_STATUS_DIRTY; + res->status |= NVC0_BUFFER_STATUS_GPU_WRITING; return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); } -- cgit v1.2.3 From 4377657f8e204fe2c7b6af194293dd3bea63fca8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Feb 2011 17:08:23 +0100 Subject: nvc0: correct allocation of constrained registers In linear scan we can't allocate multiple values with different live ranges at the same time to assign them consecutive regs. Maybe we should just switch to graph coloring for all values ... --- src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 221 +++++++++++++++++++--------- 1 file changed, 154 insertions(+), 67 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index 718943bdbdf..d7213949483 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -39,6 +39,30 @@ struct register_set { struct nv_pc *pc; }; +/* aliasing is allowed */ +static void +intersect_register_sets(struct register_set *dst, + struct register_set *src1, struct register_set *src2) +{ + int i; + + for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) { + dst->bits[i][0] = src1->bits[i][0] | src2->bits[i][0]; + dst->bits[i][1] = src1->bits[i][1] | src2->bits[i][1]; + } +} + +static void +mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask) +{ + int i; + + for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) { + set->bits[i][0] = (set->bits[i][0] | mask) & umask; + set->bits[i][1] = (set->bits[i][1] | mask) & umask; + } +} + struct nv_pc_pass { struct nv_pc *pc; struct nv_instruction **insns; @@ -327,14 +351,14 @@ do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) assert(b->join == a->join); } -static INLINE void +static INLINE boolean try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) { if (!join_allowed(ctx, a, b)) { #ifdef NVC0_RA_DEBUG_JOIN debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n); #endif - return; + return FALSE; } if (livei_have_overlap(a->join, b->join)) { #ifdef NVC0_RA_DEBUG_JOIN @@ -342,10 +366,27 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) livei_print(a); livei_print(b); #endif - return; + return FALSE; } do_join_values(ctx, a, b); + + return TRUE; +} + +static void +join_values_nofail(struct nv_pc_pass *ctx, + struct nv_value *a, struct nv_value *b, boolean type_only) +{ + if (type_only) { + assert(join_allowed(ctx, a, b)); + do_join_values(ctx, a, b); + } else { + boolean ok = try_join_values(ctx, a, b); + if (!ok) { + NOUVEAU_ERR("failed to coalesce values\n"); + } + } } static INLINE boolean @@ -474,8 +515,13 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) return 0; } +#define JOIN_MASK_PHI (1 << 0) +#define JOIN_MASK_SELECT (1 << 1) +#define JOIN_MASK_MOV (1 << 2) +#define JOIN_MASK_BIND (1 << 3) + static int -pass_join_values(struct nv_pc_pass *ctx, int iter) +pass_join_values(struct nv_pc_pass *ctx, unsigned mask) { int c, n; @@ -484,29 +530,28 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) switch (i->opcode) { case NV_OP_PHI: - if (iter != 2) + if (!(mask & JOIN_MASK_PHI)) break; for (c = 0; c < 6 && i->src[c]; ++c) - try_join_values(ctx, i->def[0], i->src[c]->value); + join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE); break; case NV_OP_MOV: - if ((iter == 2) && i->src[0]->value->insn && - !nv_is_vector_op(i->src[0]->value->join->insn->opcode)) + if (!(mask & JOIN_MASK_MOV)) + break; + if (i->src[0]->value->insn && !i->src[0]->value->insn->def[1]) try_join_values(ctx, i->def[0], i->src[0]->value); break; case NV_OP_SELECT: - if (iter != 1) + if (!(mask & JOIN_MASK_SELECT)) break; - for (c = 0; c < 6 && i->src[c]; ++c) { - assert(join_allowed(ctx, i->def[0], i->src[c]->value)); - do_join_values(ctx, i->def[0], i->src[c]->value); - } + for (c = 0; c < 6 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE); break; case NV_OP_BIND: - if (iter) + if (!(mask & JOIN_MASK_BIND)) break; for (c = 0; c < 4 && i->src[c]; ++c) - do_join_values(ctx, i->def[c], i->src[c]->value); + join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE); break; case NV_OP_TEX: case NV_OP_TXB: @@ -743,21 +788,6 @@ nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set) set->pc = pc; } -/* We allocate registers for all defs of a vector instruction at once. - * Since we'll encounter all of them in the allocation loop, do the allocation - * when we're at the one with the live range that starts latest. - */ -static boolean -is_best_representative(struct nv_value *val) -{ - struct nv_instruction *nvi = val->insn; - int i; - for (i = 0; i < 4 && val->insn->def[i]; ++i) - if (nvi->def[i]->livei && nvi->def[i]->livei->bgn > val->livei->bgn) - return FALSE; - return TRUE; -} - static void insert_ordered_tail(struct nv_value *list, struct nv_value *nval) { @@ -774,42 +804,46 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval) elem->next = nval; } -static int -pass_linear_scan(struct nv_pc_pass *ctx, int iter) +static void +collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head, + boolean assigned_only) { - struct nv_instruction *i; - struct register_set f, free; + struct nv_value *val; int k, n; - struct nv_value *cur, *val, *tmp[2]; - struct nv_value active, inactive, handled, unhandled; - - make_empty_list(&active); - make_empty_list(&inactive); - make_empty_list(&handled); - make_empty_list(&unhandled); - nvc0_ctor_register_set(ctx->pc, &free); + make_empty_list(head); - /* joined values should have range = NULL and thus not be added; - * also, fixed memory values won't be added because they're not - * def'd, just used - */ for (n = 0; n < ctx->num_insns; ++n) { - i = ctx->insns[n]; + struct nv_instruction *i = ctx->insns[n]; + /* for joined values, only the representative will have livei != NULL */ for (k = 0; k < 5; ++k) { if (i->def[k] && i->def[k]->livei) - insert_ordered_tail(&unhandled, i->def[k]); - else - if (0 && i->def[k]) - debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n); + if (!assigned_only || i->def[k]->reg.id >= 0) + insert_ordered_tail(head, i->def[k]); } } - for (val = unhandled.next; val != unhandled.prev; val = val->next) { + for (val = head->next; val != head->prev; val = val->next) { assert(val->join == val); assert(val->livei->bgn <= val->next->livei->bgn); } +} + +static int +pass_linear_scan(struct nv_pc_pass *ctx) +{ + struct register_set f, free; + struct nv_value *cur, *val, *tmp[2]; + struct nv_value active, inactive, handled, unhandled; + + make_empty_list(&active); + make_empty_list(&inactive); + make_empty_list(&handled); + + nvc0_ctor_register_set(ctx->pc, &free); + + collect_register_values(ctx, &unhandled, FALSE); foreach_s(cur, tmp[0], &unhandled) { remove_from_list(cur); @@ -846,16 +880,7 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) reg_occupy(&f, val); if (cur->reg.id < 0) { - boolean mem = FALSE; - int v = nvi_vector_size(cur->insn); - - if (v > 1) { - if (is_best_representative(cur)) - mem = !reg_assign(&f, &cur->insn->def[0], v); - } else { - if (iter) - mem = !reg_assign(&f, &cur, 1); - } + boolean mem = !reg_assign(&f, &cur, 1); if (mem) { NOUVEAU_ERR("out of registers\n"); @@ -869,6 +894,68 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) return 0; } +/* Allocate values defined by instructions such as TEX, which have to be + * assigned to consecutive registers. + * Linear scan doesn't really work here since the values can have different + * live intervals. + */ +static int +pass_allocate_constrained_values(struct nv_pc_pass *ctx) +{ + struct nv_value regvals, *val; + struct nv_instruction *i; + struct nv_value *defs[4]; + struct register_set regs[4]; + int n, vsize, c; + uint32_t mask; + boolean mem; + + collect_register_values(ctx, ®vals, TRUE); + + for (n = 0; n < ctx->num_insns; ++n) { + i = ctx->insns[n]; + vsize = nvi_vector_size(i); + if (!(vsize > 1)) + continue; + assert(vsize <= 4); + + for (c = 0; c < vsize; ++c) + defs[c] = i->def[c]->join; + + if (defs[0]->reg.id >= 0) { + for (c = 1; c < vsize; ++c) + assert(defs[c]->reg.id >= 0); + continue; + } + + for (c = 0; c < vsize; ++c) { + nvc0_ctor_register_set(ctx->pc, ®s[c]); + + foreach(val, ®vals) { + if (val->reg.id >= 0 && livei_have_overlap(val, defs[c])) + reg_occupy(®s[c], val); + } + mask = 0x11111111; + if (vsize == 2) /* granularity is 2 and not 4 */ + mask |= 0x11111111 << 2; + mask_register_set(®s[c], 0, mask << c); + + if (defs[c]->livei) + insert_ordered_tail(®vals, defs[c]); + } + for (c = 1; c < vsize; ++c) + intersect_register_sets(®s[0], ®s[0], ®s[c]); + + mem = !reg_assign(®s[0], &defs[0], vsize); + + if (mem) { + NOUVEAU_ERR("out of registers\n"); + abort(); + } + } + return 0; +} + static int nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) { @@ -922,19 +1009,19 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) livei_print(&pc->values[i]); #endif - ret = pass_join_values(ctx, 0); + ret = pass_join_values(ctx, JOIN_MASK_PHI); if (ret) goto out; - ret = pass_linear_scan(ctx, 0); + ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_BIND); if (ret) goto out; - ret = pass_join_values(ctx, 1); + ret = pass_join_values(ctx, JOIN_MASK_MOV); if (ret) goto out; - ret = pass_join_values(ctx, 2); + ret = pass_allocate_constrained_values(ctx); if (ret) goto out; - ret = pass_linear_scan(ctx, 1); + ret = pass_linear_scan(ctx); if (ret) goto out; -- cgit v1.2.3 From cd47f10c901d7ac48843586432c2e592ed35eed3 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Feb 2011 17:22:15 +0100 Subject: nvc0: preemptively insert branch at ENDIF Might be necessary if a block sneaks in somewhere, like a common block for moves of phi sources after a loop break. This is harmless and normally will be removed before emission. --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 6 +++++- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 3d03400518b..8d4d0f3af60 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -142,9 +142,10 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) struct nv_instruction *nvi, *next; int j; + /* find first non-empty block emitted before b */ for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j); - if (j >= 0) { + for (; j >= 0; --j) { in = pc->bb_list[j]; /* check for no-op branches (BRA $PC+8) */ @@ -158,6 +159,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) nvc0_insn_delete(in->exit); } b->emit_pos = in->emit_pos + in->emit_size; + + if (in->emit_size) /* no more no-op branches to b */ + break; } pc->bb_list[pc->num_blocks++] = b; diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 5e208e8623d..fc19ef1eb19 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1634,6 +1634,10 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *b = new_basic_block(bld->pc); + if (bld->pc->current_block->exit && + !bld->pc->current_block->exit->terminator) + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, b, FALSE); + --bld->cond_lvl; nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind); nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); -- cgit v1.2.3 From f01748355360ac98c772ce8b82ca0e6c2f94629a Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 22 Feb 2011 21:50:17 +0100 Subject: nvc0: kick out empty live ranges They affect overlap tests even though they're actually empty. --- src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index d7213949483..f4afe083e2d 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -87,6 +87,9 @@ add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range) { struct nv_range *range, **nextp = &val->livei; + if (bgn == end) /* [a, a) is invalid / empty */ + return TRUE; + for (range = val->livei; range; range = range->next) { if (end < range->bgn) break; /* insert before */ -- cgit v1.2.3 From 96121399077787a9701c173dbb3ce0d1f30f00a9 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Feb 2011 17:23:23 +0100 Subject: nvc0: presin and preex2 can load from const space --- src/gallium/drivers/nvc0/nvc0_pc_print.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 7840078614f..4088a557231 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -353,8 +353,8 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 }, + { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 }, { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, -- cgit v1.2.3 From b5f04b20089c219f760fb6a369041bd782708247 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 20 Feb 2011 13:13:11 +0100 Subject: nvc0: don't fold loads from local memory --- src/gallium/drivers/nvc0/nvc0_pc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index 3a3a00f27be..f51d289e8cd 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -44,6 +44,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s, if (ld->indirect >= 0) return FALSE; + /* a few ops can use g[] sources directly, but we don't support g[] yet */ + if (ld->src[0]->value->reg.file == NV_FILE_MEM_L || + ld->src[0]->value->reg.file == NV_FILE_MEM_G) + return FALSE; + for (i = 0; i < 3 && nvi->src[i]; ++i) if (nvi->src[i]->value->reg.file == NV_FILE_IMM) return FALSE; -- cgit v1.2.3 From 1a8297139396ec2a6415ef803a3901e1ecef485c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Feb 2011 17:26:44 +0100 Subject: nvc0: set local memory usage info in shader header Before this, l[] access was a no-op. --- src/gallium/drivers/nvc0/nvc0_context.h | 1 + src/gallium/drivers/nvc0/nvc0_program.c | 9 ++++++++- src/gallium/drivers/nvc0/nvc0_program.h | 1 + src/gallium/drivers/nvc0/nvc0_screen.c | 8 ++++++-- src/gallium/drivers/nvc0/nvc0_shader_state.c | 15 +++++++++++++++ src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 3 +++ 6 files changed, 34 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 3722f358d89..1ce5554f7b7 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -81,6 +81,7 @@ struct nvc0_context { uint8_t num_vtxelts; uint8_t num_textures[5]; uint8_t num_samplers[5]; + uint8_t tls_required; /* bitmask of shader types using l[] */ uint16_t scissor; uint32_t uniform_buffer_bound[5]; } state; diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index f7ea97ddb1d..0685a842304 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -301,9 +301,11 @@ prog_decl(struct nvc0_translation_info *ti, ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]); assert(first == last); break; + case TGSI_FILE_TEMPORARY: + ti->temp128_nr = MAX2(ti->temp128_nr, last + 1); + break; case TGSI_FILE_NULL: case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: case TGSI_FILE_SAMPLER: case TGSI_FILE_ADDRESS: case TGSI_FILE_IMMEDIATE: @@ -644,6 +646,11 @@ nvc0_prog_scan(struct nvc0_translation_info *ti) break; } + if (ti->require_stores) { + prog->hdr[0] |= 1 << 26; + prog->hdr[1] |= ti->temp128_nr * 16; /* l[] size */ + } + assert(!ret); return ret; } diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h index 3450cec175d..f6fea29780b 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -76,6 +76,7 @@ struct nvc0_translation_info { uint32_t *immd32; ubyte *immd32_ty; unsigned immd32_nr; + unsigned temp128_nr; ubyte edgeflag_out; struct nvc0_subroutine *subr; unsigned num_subrs; diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 321d86bdf1a..f7f1fd09a12 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -475,7 +475,7 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RING (chan, (15 << 4) | 1); } - screen->tls_size = 4 * 4 * 32 * 128 * 4; + screen->tls_size = (16 * 32) * (NVC0_CAP_MAX_PROGRAM_TEMPS * 16); ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, screen->tls_size, &screen->tls); if (ret) @@ -489,6 +489,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RING (chan, screen->tls_size >> 32); OUT_RING (chan, screen->tls_size); + BEGIN_RING(chan, RING_3D_(0x07a0), 1); + OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1); OUT_RING (chan, 0); @@ -642,8 +644,10 @@ nvc0_screen_make_buffers_resident(struct nvc0_screen *screen) nouveau_bo_validate(chan, screen->text, flags); nouveau_bo_validate(chan, screen->uniforms, flags); nouveau_bo_validate(chan, screen->txc, flags); - nouveau_bo_validate(chan, screen->tls, flags); nouveau_bo_validate(chan, screen->mp_stack_bo, flags); + + if (screen->cur_ctx && screen->cur_ctx->state.tls_required) + nouveau_bo_validate(chan, screen->tls, flags); } int diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index 633641713dc..357f8b80deb 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -27,6 +27,16 @@ #include "nvc0_context.h" +static INLINE void +nvc0_program_update_context_state(struct nvc0_context *nvc0, + struct nvc0_program *prog, int stage) +{ + if (prog->hdr[1]) + nvc0->state.tls_required |= 1 << stage; + else + nvc0->state.tls_required &= ~(1 << stage); +} + static boolean nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) { @@ -77,6 +87,7 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0) if (!nvc0_program_validate(nvc0, vp)) return; + nvc0_program_update_context_state(nvc0, vp, 0); BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2); OUT_RING (chan, 0x11); @@ -98,6 +109,7 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) if (!nvc0_program_validate(nvc0, fp)) return; + nvc0_program_update_context_state(nvc0, fp, 4); BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1); OUT_RING (chan, fp->fp.early_z); @@ -127,6 +139,7 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0) } if (!nvc0_program_validate(nvc0, tp)) return; + nvc0_program_update_context_state(nvc0, tp, 1); BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2); OUT_RING (chan, 0x21); @@ -148,6 +161,7 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0) } if (!nvc0_program_validate(nvc0, tp)) return; + nvc0_program_update_context_state(nvc0, tp, 2); BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); OUT_RING (chan, 0x31); @@ -170,6 +184,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) } if (!nvc0_program_validate(nvc0, gp)) return; + nvc0_program_update_context_state(nvc0, gp, 3); BEGIN_RING(chan, RING_3D(GP_SELECT), 1); OUT_RING (chan, 0x41); diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index fc19ef1eb19..f7dff596c28 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -364,6 +364,9 @@ bld_loop_phi(struct bld_context *bld, struct bld_register *reg, struct nv_basic_block *bb = bld->pc->current_block; struct nv_value *val = NULL; + if (bld->ti->require_stores) /* XXX: actually only for INDEXABLE_TEMP */ + return NULL; + if (bld->loop_lvl > 1) { --bld->loop_lvl; if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl))) -- cgit v1.2.3 From 9dd7d0803e5a881510d05a61908d6a7ffc04d16b Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Feb 2011 15:28:04 +0100 Subject: nvc0: fix new_value calls using type instead of size --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 8d4d0f3af60..8b56aa427fd 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -592,7 +592,7 @@ constant_operand(struct nv_pc *pc, } else if (u.s32 > 0 && u.s32 == (1 << shift)) { nvi->opcode = NV_OP_SHL; - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.s32 = shift; nv_reference(pc, nvi, 0, nvi->src[t]->value); nv_reference(pc, nvi, 1, val); break; @@ -600,14 +600,14 @@ constant_operand(struct nv_pc *pc, break; case NV_OP_RCP: u.f32 = 1.0f / u.f32; - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32; nvi->opcode = NV_OP_MOV; assert(s == 0); nv_reference(pc, nvi, 0, val); break; case NV_OP_RSQ: u.f32 = 1.0f / sqrtf(u.f32); - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32; nvi->opcode = NV_OP_MOV; assert(s == 0); nv_reference(pc, nvi, 0, val); -- cgit v1.2.3 From b0bf4ee85f01e9cbe240e49e67a947d052daa3f3 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 23 Feb 2011 15:00:26 +0100 Subject: nvc0: sprite coord enable is per GENERIC, not overall index --- src/gallium/drivers/nvc0/nvc0_state_validate.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 0cc0a0c6236..70c418fad9b 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -302,14 +302,12 @@ nvc0_validate_sprite_coords(struct nvc0_context *nvc0) if (nvc0->rast->pipe.point_quad_rasterization) { uint32_t en = nvc0->rast->pipe.sprite_coord_enable; - int i; - struct nvc0_program *prog = nvc0->fragprog; while (en) { - i = ffs(en) - 1; + int i = ffs(en) - 1; en &= ~(1 << i); - if (prog->fp.in_pos[i] >= 0xc0 && prog->fp.in_pos[i] < 0xe0) - reg |= 8 << ((prog->fp.in_pos[i] - 0xc0) / 4); + if (i >= 0 && i < 8) + reg |= 8 << i; } } -- cgit v1.2.3 From d0caaba370cb70f426180a46e5475bf8a05ac19b Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 23 Feb 2011 17:29:02 +0100 Subject: nvc0: change TGSI CMP translation to use slct Saves us the explicit compare instruction needed with selp. --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 13 ++++++++++--- src/gallium/drivers/nvc0/nvc0_pc_print.c | 6 +++--- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 4 ++-- 3 files changed, 15 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 8b56aa427fd..c5a7367a5fd 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -245,7 +245,9 @@ check_swap_src_0_1(struct nv_instruction *nvi) struct nv_ref *src0 = nvi->src[0]; struct nv_ref *src1 = nvi->src[1]; - if (!nv_op_commutative(nvi->opcode) && NV_BASEOP(nvi->opcode) != NV_OP_SET) + if (!nv_op_commutative(nvi->opcode) && + NV_BASEOP(nvi->opcode) != NV_OP_SET && + NV_BASEOP(nvi->opcode) != NV_OP_SLCT) return; assert(src0 && src1 && src0->value && src1->value); @@ -266,8 +268,13 @@ check_swap_src_0_1(struct nv_instruction *nvi) } } - if (nvi->src[0] != src0 && NV_BASEOP(nvi->opcode) == NV_OP_SET) - nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; + if (nvi->src[0] != src0) { + if (NV_BASEOP(nvi->opcode) == NV_OP_SET) + nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; + else + if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT) + nvi->set_cond = NV_CC_INVERSE(nvi->set_cond); + } } static void diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 4088a557231..90c669cc4b8 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -363,9 +363,9 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SLCT, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SLCT, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SLCT, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 }, { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index f7dff596c28..a44d330c731 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1493,10 +1493,10 @@ bld_instruction(struct bld_context *bld, case TGSI_OPCODE_CMP: FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { src0 = emit_fetch(bld, insn, 0, c); - src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero); src1 = emit_fetch(bld, insn, 1, c); src2 = emit_fetch(bld, insn, 2, c); - dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0); + dst0[c] = bld_insn_3(bld, NV_OP_SLCT_F32, src1, src2, src0); + dst0[c]->insn->set_cond = NV_CC_LT; } break; case TGSI_OPCODE_COS: -- cgit v1.2.3 From 11b9f4439c56045a8f718e483844135dd9fbcb58 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 23 Feb 2011 14:54:25 +0100 Subject: nvc0: fix PointCoord enable in FP header --- src/gallium/drivers/nvc0/nvc0_program.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 0685a842304..899fe147c6a 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -535,8 +535,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) a = ti->input_loc[i][c] / 2; if (ti->input_loc[i][c] >= 0x2c0) a -= 32; - if ((a & ~7) == 0x70/2) - fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */ + if (ti->input_loc[i][0] == 0x70) + fp->hdr[5] |= 1 << (28 + c); /* FRAG_COORD_UMASK */ + else + if (ti->input_loc[i][0] == 0x2e0) + fp->hdr[14] |= 1 << (24 + c); /* POINT_COORD */ else fp->hdr[4 + a / 32] |= m << (a % 32); } -- cgit v1.2.3